sqlframe 1.1.2__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlframe-1.1.2 → sqlframe-1.2.0}/PKG-INFO +1 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/duckdb.md +1 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/postgres.md +1 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/_version.py +2 -2
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/catalog.py +6 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/dataframe.py +4 -3
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/mixins/catalog_mixins.py +11 -9
- sqlframe-1.2.0/sqlframe/base/mixins/dataframe_mixins.py +63 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/normalize.py +0 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/catalog.py +3 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/catalog.py +2 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/dataframe.py +3 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/catalog.py +1 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/dataframe.py +3 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/catalog.py +3 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe.egg-info/PKG-INFO +1 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe.egg-info/SOURCES.txt +3 -0
- sqlframe-1.2.0/tests/integration/engines/duck/test_duckdb_dataframe.py +79 -0
- sqlframe-1.2.0/tests/integration/engines/postgres/test_postgres_dataframe.py +64 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/test_int_functions.py +0 -2
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_dataframe.py +20 -1
- {sqlframe-1.1.2 → sqlframe-1.2.0}/.github/CODEOWNERS +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/.github/workflows/main.workflow.yaml +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/.github/workflows/publish.workflow.yaml +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/.gitignore +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/.readthedocs.yaml +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/LICENSE +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/Makefile +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/README.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/blogs/images/cake.gif +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/bigquery.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/bigquery.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/docs/postgres.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/images/SF.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/images/favicon.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/index.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/requirements.txt +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/standalone.md +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/mkdocs.yml +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/pytest.ini +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/renovate.json +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/setup.cfg +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/setup.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/LICENSE +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/_typing.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/decorators.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/exceptions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/function_alternatives.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/mixins/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/operations.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/readerwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/transforms.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/util.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/dataframe.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/readwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/dataframe.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/dataframe.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/dataframe.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe.egg-info/requires.txt +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/common_fixtures.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/conftest.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/fixtures/employee_extra_line.csv +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/bigquery/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/duck/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/postgres/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/redshift/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/snowflake/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/spark/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/test_engine_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/test_int_dataframe.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/__init__.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_column.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_functions.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_session.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_types.py +0 -0
- {sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_window.py +0 -0
|
@@ -171,6 +171,7 @@ df_store = session.createDataFrame(
|
|
|
171
171
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
172
172
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
173
173
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
174
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
174
175
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
175
176
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
176
177
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
@@ -174,6 +174,7 @@ df_store = session.createDataFrame(
|
|
|
174
174
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
175
175
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
176
176
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
177
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
177
178
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
178
179
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
179
180
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
@@ -26,6 +26,9 @@ else:
|
|
|
26
26
|
class _BaseCatalog(t.Generic[SESSION, DF]):
|
|
27
27
|
"""User-facing catalog API, accessible through `SparkSession.catalog`."""
|
|
28
28
|
|
|
29
|
+
TEMP_CATALOG_FILTER: t.Optional[exp.Expression] = None
|
|
30
|
+
TEMP_SCHEMA_FILTER: t.Optional[exp.Expression] = None
|
|
31
|
+
|
|
29
32
|
def __init__(self, sparkSession: SESSION, schema: t.Optional[MappingSchema] = None) -> None:
|
|
30
33
|
"""Create a new Catalog that wraps the underlying JVM object."""
|
|
31
34
|
self.session = sparkSession
|
|
@@ -569,7 +572,9 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
|
|
|
569
572
|
"""
|
|
570
573
|
raise NotImplementedError
|
|
571
574
|
|
|
572
|
-
def listColumns(
|
|
575
|
+
def listColumns(
|
|
576
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
577
|
+
) -> t.List[Column]:
|
|
573
578
|
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
574
579
|
|
|
575
580
|
.. versionadded:: 2.0.0
|
|
@@ -1093,15 +1093,16 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1093
1093
|
@operation(Operation.SELECT)
|
|
1094
1094
|
def withColumn(self, colName: str, col: Column) -> Self:
|
|
1095
1095
|
col = self._ensure_and_normalize_col(col)
|
|
1096
|
+
col_name = self._ensure_and_normalize_col(colName).alias_or_name
|
|
1096
1097
|
existing_col_names = self.expression.named_selects
|
|
1097
1098
|
existing_col_index = (
|
|
1098
|
-
existing_col_names.index(
|
|
1099
|
+
existing_col_names.index(col_name) if col_name in existing_col_names else None
|
|
1099
1100
|
)
|
|
1100
1101
|
if existing_col_index:
|
|
1101
1102
|
expression = self.expression.copy()
|
|
1102
|
-
expression.expressions[existing_col_index] = col.alias(
|
|
1103
|
+
expression.expressions[existing_col_index] = col.alias(col_name).expression
|
|
1103
1104
|
return self.copy(expression=expression)
|
|
1104
|
-
return self.
|
|
1105
|
+
return self.select.__wrapped__(self, col.alias(col_name), append=True) # type: ignore
|
|
1105
1106
|
|
|
1106
1107
|
@operation(Operation.SELECT)
|
|
1107
1108
|
def withColumnRenamed(self, existing: str, new: str) -> Self:
|
|
@@ -315,7 +315,9 @@ class ListTablesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]
|
|
|
315
315
|
|
|
316
316
|
class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
|
|
317
317
|
@normalize(["tableName", "dbName"])
|
|
318
|
-
def listColumns(
|
|
318
|
+
def listColumns(
|
|
319
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
320
|
+
) -> t.List[Column]:
|
|
319
321
|
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
320
322
|
|
|
321
323
|
.. versionadded:: 2.0.0
|
|
@@ -385,12 +387,6 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
|
|
|
385
387
|
"catalog",
|
|
386
388
|
exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
|
|
387
389
|
)
|
|
388
|
-
# if self.QUALIFY_INFO_SCHEMA_WITH_DATABASE:
|
|
389
|
-
# if not table.db:
|
|
390
|
-
# raise ValueError("dbName must be specified when listing columns from INFORMATION_SCHEMA")
|
|
391
|
-
# source_table = f"{table.db}.INFORMATION_SCHEMA.COLUMNS"
|
|
392
|
-
# else:
|
|
393
|
-
# source_table = "INFORMATION_SCHEMA.COLUMNS"
|
|
394
390
|
source_table = self._get_info_schema_table("columns", database=table.db)
|
|
395
391
|
select = (
|
|
396
392
|
exp.select(
|
|
@@ -402,9 +398,15 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
|
|
|
402
398
|
.where(exp.column("table_name").eq(table.name))
|
|
403
399
|
)
|
|
404
400
|
if table.db:
|
|
405
|
-
|
|
401
|
+
schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
|
|
402
|
+
if include_temp and self.TEMP_SCHEMA_FILTER:
|
|
403
|
+
schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
|
|
404
|
+
select = select.where(schema_filter)
|
|
406
405
|
if table.catalog:
|
|
407
|
-
|
|
406
|
+
catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
|
|
407
|
+
if include_temp and self.TEMP_CATALOG_FILTER:
|
|
408
|
+
catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
|
|
409
|
+
select = select.where(catalog_filter)
|
|
408
410
|
results = self.session._fetch_rows(select)
|
|
409
411
|
return [
|
|
410
412
|
Column(
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import typing as t
|
|
2
|
+
|
|
3
|
+
from sqlglot import exp
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.catalog import Column
|
|
6
|
+
from sqlframe.base.dataframe import (
|
|
7
|
+
GROUP_DATA,
|
|
8
|
+
NA,
|
|
9
|
+
SESSION,
|
|
10
|
+
STAT,
|
|
11
|
+
WRITER,
|
|
12
|
+
_BaseDataFrame,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PrintSchemaFromTempObjectsMixin(
|
|
17
|
+
_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
|
|
18
|
+
):
|
|
19
|
+
def _get_columns_from_temp_object(self) -> t.List[Column]:
|
|
20
|
+
table = exp.to_table(self.session._random_id)
|
|
21
|
+
self.session._execute(
|
|
22
|
+
exp.Create(
|
|
23
|
+
this=table,
|
|
24
|
+
kind="VIEW",
|
|
25
|
+
replace=True,
|
|
26
|
+
properties=exp.Properties(expressions=[exp.TemporaryProperty()]),
|
|
27
|
+
expression=self.expression,
|
|
28
|
+
)
|
|
29
|
+
)
|
|
30
|
+
return self.session.catalog.listColumns(
|
|
31
|
+
table.sql(dialect=self.session.input_dialect), include_temp=True
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def printSchema(self, level: t.Optional[int] = None) -> None:
|
|
35
|
+
def print_schema(
|
|
36
|
+
column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
|
|
37
|
+
):
|
|
38
|
+
if level and current_level >= level:
|
|
39
|
+
return
|
|
40
|
+
if current_level > 0:
|
|
41
|
+
print(" | " * current_level, end="")
|
|
42
|
+
print(
|
|
43
|
+
f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
|
|
44
|
+
)
|
|
45
|
+
if column_type.this == exp.DataType.Type.STRUCT:
|
|
46
|
+
for column_def in column_type.expressions:
|
|
47
|
+
print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
|
|
48
|
+
if column_type.this == exp.DataType.Type.ARRAY:
|
|
49
|
+
for data_type in column_type.expressions:
|
|
50
|
+
print_schema("element", data_type, True, current_level + 1)
|
|
51
|
+
if column_type.this == exp.DataType.Type.MAP:
|
|
52
|
+
print_schema("key", column_type.expressions[0], True, current_level + 1)
|
|
53
|
+
print_schema("value", column_type.expressions[1], True, current_level + 1)
|
|
54
|
+
|
|
55
|
+
columns = self._get_columns_from_temp_object()
|
|
56
|
+
print("root")
|
|
57
|
+
for column in columns:
|
|
58
|
+
print_schema(
|
|
59
|
+
column.name,
|
|
60
|
+
exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
|
|
61
|
+
column.nullable,
|
|
62
|
+
0,
|
|
63
|
+
)
|
|
@@ -20,7 +20,6 @@ def normalize(session: SESSION, expression_context: exp.Select, expr: t.List[NOR
|
|
|
20
20
|
expr = ensure_list(expr)
|
|
21
21
|
expressions = _ensure_expressions(expr)
|
|
22
22
|
for expression in expressions:
|
|
23
|
-
# normalize_identifiers(expression, session.input_dialect)
|
|
24
23
|
identifiers = expression.find_all(exp.Identifier)
|
|
25
24
|
for identifier in identifiers:
|
|
26
25
|
identifier.transform(session.input_dialect.normalize_identifier)
|
|
@@ -46,7 +46,9 @@ class BigQueryCatalog(
|
|
|
46
46
|
return to_schema(self.session.default_dataset).db
|
|
47
47
|
|
|
48
48
|
@normalize(["tableName", "dbName"])
|
|
49
|
-
def listColumns(
|
|
49
|
+
def listColumns(
|
|
50
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
51
|
+
) -> t.List[Column]:
|
|
50
52
|
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
51
53
|
|
|
52
54
|
.. versionadded:: 2.0.0
|
|
@@ -36,6 +36,8 @@ class DuckDBCatalog(
|
|
|
36
36
|
ListColumnsFromInfoSchemaMixin["DuckDBSession", "DuckDBDataFrame"],
|
|
37
37
|
_BaseCatalog["DuckDBSession", "DuckDBDataFrame"],
|
|
38
38
|
):
|
|
39
|
+
TEMP_CATALOG_FILTER = exp.column("table_catalog").eq("temp")
|
|
40
|
+
|
|
39
41
|
def listFunctions(
|
|
40
42
|
self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
|
|
41
43
|
) -> t.List[Function]:
|
|
@@ -9,6 +9,7 @@ from sqlframe.base.dataframe import (
|
|
|
9
9
|
_BaseDataFrameNaFunctions,
|
|
10
10
|
_BaseDataFrameStatFunctions,
|
|
11
11
|
)
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
|
|
12
13
|
from sqlframe.duckdb.group import DuckDBGroupedData
|
|
13
14
|
|
|
14
15
|
if sys.version_info >= (3, 11):
|
|
@@ -34,13 +35,14 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
|
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class DuckDBDataFrame(
|
|
38
|
+
PrintSchemaFromTempObjectsMixin,
|
|
37
39
|
_BaseDataFrame[
|
|
38
40
|
"DuckDBSession",
|
|
39
41
|
"DuckDBDataFrameWriter",
|
|
40
42
|
"DuckDBDataFrameNaFunctions",
|
|
41
43
|
"DuckDBDataFrameStatFunctions",
|
|
42
44
|
"DuckDBGroupedData",
|
|
43
|
-
]
|
|
45
|
+
],
|
|
44
46
|
):
|
|
45
47
|
_na = DuckDBDataFrameNaFunctions
|
|
46
48
|
_stat = DuckDBDataFrameStatFunctions
|
|
@@ -34,6 +34,7 @@ class PostgresCatalog(
|
|
|
34
34
|
_BaseCatalog["PostgresSession", "PostgresDataFrame"],
|
|
35
35
|
):
|
|
36
36
|
CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
|
|
37
|
+
TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
|
|
37
38
|
|
|
38
39
|
def listFunctions(
|
|
39
40
|
self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
|
|
@@ -9,6 +9,7 @@ from sqlframe.base.dataframe import (
|
|
|
9
9
|
_BaseDataFrameNaFunctions,
|
|
10
10
|
_BaseDataFrameStatFunctions,
|
|
11
11
|
)
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
|
|
12
13
|
from sqlframe.postgres.group import PostgresGroupedData
|
|
13
14
|
|
|
14
15
|
if sys.version_info >= (3, 11):
|
|
@@ -33,13 +34,14 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
|
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
class PostgresDataFrame(
|
|
37
|
+
PrintSchemaFromTempObjectsMixin,
|
|
36
38
|
_BaseDataFrame[
|
|
37
39
|
"PostgresSession",
|
|
38
40
|
"PostgresDataFrameWriter",
|
|
39
41
|
"PostgresDataFrameNaFunctions",
|
|
40
42
|
"PostgresDataFrameStatFunctions",
|
|
41
43
|
"PostgresGroupedData",
|
|
42
|
-
]
|
|
44
|
+
],
|
|
43
45
|
):
|
|
44
46
|
_na = PostgresDataFrameNaFunctions
|
|
45
47
|
_stat = PostgresDataFrameStatFunctions
|
|
@@ -468,7 +468,9 @@ class SparkCatalog(
|
|
|
468
468
|
)
|
|
469
469
|
return [Table(*x) for x in self._spark_catalog.listTables(dbName, pattern)]
|
|
470
470
|
|
|
471
|
-
def listColumns(
|
|
471
|
+
def listColumns(
|
|
472
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
473
|
+
) -> t.List[Column]:
|
|
472
474
|
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
473
475
|
|
|
474
476
|
.. versionadded:: 2.0.0
|
|
@@ -64,6 +64,7 @@ sqlframe/base/util.py
|
|
|
64
64
|
sqlframe/base/window.py
|
|
65
65
|
sqlframe/base/mixins/__init__.py
|
|
66
66
|
sqlframe/base/mixins/catalog_mixins.py
|
|
67
|
+
sqlframe/base/mixins/dataframe_mixins.py
|
|
67
68
|
sqlframe/base/mixins/readwriter_mixins.py
|
|
68
69
|
sqlframe/bigquery/__init__.py
|
|
69
70
|
sqlframe/bigquery/catalog.py
|
|
@@ -163,10 +164,12 @@ tests/integration/engines/bigquery/test_bigquery_catalog.py
|
|
|
163
164
|
tests/integration/engines/bigquery/test_bigquery_session.py
|
|
164
165
|
tests/integration/engines/duck/__init__.py
|
|
165
166
|
tests/integration/engines/duck/test_duckdb_catalog.py
|
|
167
|
+
tests/integration/engines/duck/test_duckdb_dataframe.py
|
|
166
168
|
tests/integration/engines/duck/test_duckdb_reader.py
|
|
167
169
|
tests/integration/engines/duck/test_duckdb_session.py
|
|
168
170
|
tests/integration/engines/postgres/__init__.py
|
|
169
171
|
tests/integration/engines/postgres/test_postgres_catalog.py
|
|
172
|
+
tests/integration/engines/postgres/test_postgres_dataframe.py
|
|
170
173
|
tests/integration/engines/postgres/test_postgres_session.py
|
|
171
174
|
tests/integration/engines/redshift/__init__.py
|
|
172
175
|
tests/integration/engines/redshift/test_redshift_catalog.py
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
from sqlframe.base.types import Row
|
|
4
|
+
from sqlframe.duckdb import DuckDBDataFrame, DuckDBSession
|
|
5
|
+
|
|
6
|
+
pytest_plugins = ["tests.integration.fixtures"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_print_schema_basic(duckdb_employee: DuckDBDataFrame, capsys):
|
|
10
|
+
duckdb_employee.printSchema()
|
|
11
|
+
captured = capsys.readouterr()
|
|
12
|
+
assert (
|
|
13
|
+
captured.out.strip()
|
|
14
|
+
== """
|
|
15
|
+
root
|
|
16
|
+
|-- employee_id: int (nullable = true)
|
|
17
|
+
|-- fname: text (nullable = true)
|
|
18
|
+
|-- lname: text (nullable = true)
|
|
19
|
+
|-- age: int (nullable = true)
|
|
20
|
+
|-- store_id: int (nullable = true)""".strip()
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_print_schema_nested(duckdb_session: DuckDBSession, capsys):
|
|
25
|
+
df = duckdb_session.createDataFrame(
|
|
26
|
+
[
|
|
27
|
+
(
|
|
28
|
+
1,
|
|
29
|
+
2.0,
|
|
30
|
+
"foo",
|
|
31
|
+
{"a": 1},
|
|
32
|
+
[Row(a=1, b=2)],
|
|
33
|
+
[1, 2, 3],
|
|
34
|
+
Row(a=1),
|
|
35
|
+
datetime.date(2022, 1, 1),
|
|
36
|
+
datetime.datetime(2022, 1, 1, 0, 0, 0),
|
|
37
|
+
datetime.datetime(2022, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
|
|
38
|
+
True,
|
|
39
|
+
)
|
|
40
|
+
],
|
|
41
|
+
[
|
|
42
|
+
"bigint_col",
|
|
43
|
+
"double_col",
|
|
44
|
+
"string_col",
|
|
45
|
+
"map<string,bigint>_col",
|
|
46
|
+
"array<struct<a:bigint,b:bigint>>",
|
|
47
|
+
"array<bigint>_col",
|
|
48
|
+
"struct<a:bigint>_col",
|
|
49
|
+
"date_col",
|
|
50
|
+
"timestamp_col",
|
|
51
|
+
"timestamptz_col",
|
|
52
|
+
"boolean_col",
|
|
53
|
+
],
|
|
54
|
+
)
|
|
55
|
+
df.printSchema()
|
|
56
|
+
captured = capsys.readouterr()
|
|
57
|
+
assert (
|
|
58
|
+
captured.out.strip()
|
|
59
|
+
== """
|
|
60
|
+
root
|
|
61
|
+
|-- bigint_col: bigint (nullable = true)
|
|
62
|
+
|-- double_col: double (nullable = true)
|
|
63
|
+
|-- string_col: text (nullable = true)
|
|
64
|
+
|-- map<string,bigint>_col: map(text, bigint) (nullable = true)
|
|
65
|
+
| |-- key: text (nullable = true)
|
|
66
|
+
| |-- value: bigint (nullable = true)
|
|
67
|
+
|-- array<struct<a:bigint,b:bigint>>: struct(a bigint, b bigint)[] (nullable = true)
|
|
68
|
+
| |-- element: struct(a bigint, b bigint) (nullable = true)
|
|
69
|
+
| | |-- a: bigint (nullable = true)
|
|
70
|
+
| | |-- b: bigint (nullable = true)
|
|
71
|
+
|-- array<bigint>_col: bigint[] (nullable = true)
|
|
72
|
+
| |-- element: bigint (nullable = true)
|
|
73
|
+
|-- struct<a:bigint>_col: struct(a bigint) (nullable = true)
|
|
74
|
+
| |-- a: bigint (nullable = true)
|
|
75
|
+
|-- date_col: date (nullable = true)
|
|
76
|
+
|-- timestamp_col: timestamp (nullable = true)
|
|
77
|
+
|-- timestamptz_col: timestamptz (nullable = true)
|
|
78
|
+
|-- boolean_col: boolean (nullable = true)""".strip()
|
|
79
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
from sqlframe.base.types import Row
|
|
4
|
+
from sqlframe.duckdb import DuckDBDataFrame, DuckDBSession
|
|
5
|
+
|
|
6
|
+
pytest_plugins = ["tests.integration.fixtures"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_print_schema_basic(postgres_employee: DuckDBDataFrame, capsys):
|
|
10
|
+
postgres_employee.printSchema()
|
|
11
|
+
captured = capsys.readouterr()
|
|
12
|
+
assert (
|
|
13
|
+
captured.out.strip()
|
|
14
|
+
== """
|
|
15
|
+
root
|
|
16
|
+
|-- employee_id: int (nullable = true)
|
|
17
|
+
|-- fname: text (nullable = true)
|
|
18
|
+
|-- lname: text (nullable = true)
|
|
19
|
+
|-- age: int (nullable = true)
|
|
20
|
+
|-- store_id: int (nullable = true)""".strip()
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_print_schema_nested(postgres_session: DuckDBSession, capsys):
|
|
25
|
+
df = postgres_session.createDataFrame(
|
|
26
|
+
[
|
|
27
|
+
(
|
|
28
|
+
1,
|
|
29
|
+
2.0,
|
|
30
|
+
"foo",
|
|
31
|
+
[1, 2, 3],
|
|
32
|
+
datetime.date(2022, 1, 1),
|
|
33
|
+
datetime.datetime(2022, 1, 1, 0, 0, 0),
|
|
34
|
+
datetime.datetime(2022, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
|
|
35
|
+
True,
|
|
36
|
+
)
|
|
37
|
+
],
|
|
38
|
+
[
|
|
39
|
+
"bigint_col",
|
|
40
|
+
"double_col",
|
|
41
|
+
"string_col",
|
|
42
|
+
"array<bigint>_col",
|
|
43
|
+
"date_col",
|
|
44
|
+
"timestamp_col",
|
|
45
|
+
"timestamptz_col",
|
|
46
|
+
"boolean_col",
|
|
47
|
+
],
|
|
48
|
+
)
|
|
49
|
+
df.printSchema()
|
|
50
|
+
captured = capsys.readouterr()
|
|
51
|
+
# array does not include type
|
|
52
|
+
assert (
|
|
53
|
+
captured.out.strip()
|
|
54
|
+
== """
|
|
55
|
+
root
|
|
56
|
+
|-- bigint_col: bigint (nullable = true)
|
|
57
|
+
|-- double_col: double precision (nullable = true)
|
|
58
|
+
|-- string_col: text (nullable = true)
|
|
59
|
+
|-- array<bigint>_col: array (nullable = true)
|
|
60
|
+
|-- date_col: date (nullable = true)
|
|
61
|
+
|-- timestamp_col: timestamp (nullable = true)
|
|
62
|
+
|-- timestamptz_col: timestamptz (nullable = true)
|
|
63
|
+
|-- boolean_col: boolean (nullable = true)""".strip()
|
|
64
|
+
)
|
|
@@ -6,7 +6,6 @@ import typing as t
|
|
|
6
6
|
from collections import Counter
|
|
7
7
|
|
|
8
8
|
import pytest
|
|
9
|
-
from pyspark.sql import DataFrame
|
|
10
9
|
from pyspark.sql import SparkSession as PySparkSession
|
|
11
10
|
from sqlglot import exp
|
|
12
11
|
|
|
@@ -175,7 +174,6 @@ def test_col(get_session_and_func, arg):
|
|
|
175
174
|
)
|
|
176
175
|
def test_typeof(get_session_and_func, get_types, arg, expected):
|
|
177
176
|
session, typeof = get_session_and_func("typeof")
|
|
178
|
-
types = get_types(session)
|
|
179
177
|
# If we just pass a struct in for values then Spark will automatically explode the struct into columns
|
|
180
178
|
# it won't do this though if there is another column so that is why we include an ignore column
|
|
181
179
|
df = session.createDataFrame([(1, arg)], schema=["ignore_col", "col"])
|
|
@@ -48,7 +48,7 @@ def test_persist_storagelevel(standalone_employee: StandaloneDataFrame, compare_
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
def test_with_column_duplicate_alias(standalone_employee: StandaloneDataFrame):
|
|
51
|
-
df = standalone_employee.withColumn("
|
|
51
|
+
df = standalone_employee.withColumn("fName", F.col("age").cast("string"))
|
|
52
52
|
assert df.columns == ["employee_id", "fname", "lname", "age", "store_id"]
|
|
53
53
|
# Make sure that the new columns is added with an alias to `fname`
|
|
54
54
|
assert (
|
|
@@ -57,6 +57,25 @@ def test_with_column_duplicate_alias(standalone_employee: StandaloneDataFrame):
|
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
|
|
60
|
+
# https://github.com/eakmanrq/sqlframe/issues/19
|
|
61
|
+
def test_with_column_dual_expression(standalone_employee: StandaloneDataFrame):
|
|
62
|
+
df1 = standalone_employee.withColumn("new_col1", standalone_employee.age)
|
|
63
|
+
df2 = df1.withColumn("new_col2", standalone_employee.store_id)
|
|
64
|
+
assert df2.columns == [
|
|
65
|
+
"employee_id",
|
|
66
|
+
"fname",
|
|
67
|
+
"lname",
|
|
68
|
+
"age",
|
|
69
|
+
"store_id",
|
|
70
|
+
"new_col1",
|
|
71
|
+
"new_col2",
|
|
72
|
+
]
|
|
73
|
+
assert (
|
|
74
|
+
df2.sql(pretty=False)
|
|
75
|
+
== "SELECT `a1`.`employee_id` AS `employee_id`, CAST(`a1`.`fname` AS STRING) AS `fname`, CAST(`a1`.`lname` AS STRING) AS `lname`, `a1`.`age` AS `age`, `a1`.`store_id` AS `store_id`, `a1`.`age` AS `new_col1`, `a1`.`store_id` AS `new_col2` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
60
79
|
def test_where_expr(standalone_employee: StandaloneDataFrame):
|
|
61
80
|
df = standalone_employee.where("fname = 'Jack' AND age = 37")
|
|
62
81
|
assert df.columns == ["employee_id", "fname", "lname", "age", "store_id"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|