sqlframe 1.10.0__tar.gz → 1.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlframe-1.10.0 → sqlframe-1.12.0}/PKG-INFO +1 -1
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/bigquery.md +2 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/duckdb.md +1 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/postgres.md +1 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/snowflake.md +2 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/_version.py +2 -2
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/column.py +41 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/dataframe.py +77 -3
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/exceptions.py +12 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/function_alternatives.py +5 -7
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/functions.py +4 -2
- sqlframe-1.12.0/sqlframe/base/mixins/dataframe_mixins.py +54 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/types.py +12 -2
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/util.py +51 -0
- sqlframe-1.12.0/sqlframe/bigquery/dataframe.py +74 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/functions.py +1 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/dataframe.py +6 -15
- sqlframe-1.12.0/sqlframe/postgres/catalog.py +227 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/dataframe.py +6 -10
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/dataframe.py +3 -14
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/dataframe.py +23 -13
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/dataframe.py +25 -15
- sqlframe-1.12.0/sqlframe/testing/__init__.py +3 -0
- sqlframe-1.12.0/sqlframe/testing/utils.py +320 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe.egg-info/PKG-INFO +1 -1
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe.egg-info/SOURCES.txt +7 -0
- sqlframe-1.12.0/tests/integration/engines/bigquery/test_bigquery_dataframe.py +159 -0
- sqlframe-1.12.0/tests/integration/engines/duck/test_duckdb_dataframe.py +165 -0
- sqlframe-1.12.0/tests/integration/engines/postgres/test_postgres_dataframe.py +122 -0
- sqlframe-1.12.0/tests/integration/engines/snowflake/test_snowflake_dataframe.py +158 -0
- sqlframe-1.12.0/tests/integration/engines/spark/test_spark_dataframe.py +165 -0
- sqlframe-1.12.0/tests/integration/engines/test_engine_column.py +27 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/test_engine_dataframe.py +25 -19
- sqlframe-1.12.0/tests/integration/engines/test_int_testing.py +79 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/test_int_dataframe.py +10 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_column.py +4 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_dataframe.py +14 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_functions.py +1 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_types.py +9 -9
- sqlframe-1.12.0/tests/unit/test_util.py +73 -0
- sqlframe-1.10.0/sqlframe/base/mixins/dataframe_mixins.py +0 -63
- sqlframe-1.10.0/sqlframe/bigquery/dataframe.py +0 -54
- sqlframe-1.10.0/sqlframe/postgres/catalog.py +0 -107
- sqlframe-1.10.0/tests/integration/engines/duck/test_duckdb_dataframe.py +0 -79
- sqlframe-1.10.0/tests/integration/engines/postgres/test_postgres_dataframe.py +0 -64
- sqlframe-1.10.0/tests/unit/test_util.py +0 -26
- {sqlframe-1.10.0 → sqlframe-1.12.0}/.github/CODEOWNERS +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/.github/workflows/main.workflow.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/.github/workflows/publish.workflow.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/.gitignore +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/.readthedocs.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/LICENSE +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/Makefile +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/README.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/add_chatgpt_support.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/adding_ai_to_meal.jpeg +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/hype_train.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/marvin_paranoid_robot.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/nonsense_sql.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/openai_full_rewrite.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/openai_replacing_cte_names.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/sqlglot_optimized_code.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/add_chatgpt_support/sunny_shake_head_no.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/cake.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/configuration.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/bigquery.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/docs/postgres.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/images/SF.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/images/favicon.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/index.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/requirements.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/spark.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/standalone.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/mkdocs.yml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/pytest.ini +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/renovate.json +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/setup.cfg +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/setup.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/LICENSE +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/_typing.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/decorators.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/mixins/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/normalize.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/operations.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/readerwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/transforms.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/base/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe.egg-info/requires.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/common_fixtures.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/conftest.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/fixtures/employee_extra_line.csv +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/bigquery/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/duck/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/postgres/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/redshift/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/snowflake/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/spark/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/test_engine_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/engines/test_int_functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.12.0}/tests/unit/standalone/test_window.py +0 -0
|
@@ -217,8 +217,10 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
217
217
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
218
218
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
219
219
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
220
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
220
221
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
221
222
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
223
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
222
224
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
223
225
|
* Vertical Argument is not Supported
|
|
224
226
|
* [sort](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.sort.html)
|
|
@@ -192,6 +192,7 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
192
192
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
193
193
|
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
194
194
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
195
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
195
196
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
196
197
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
197
198
|
* Vertical Argument is not Supported
|
|
@@ -203,6 +203,7 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
203
203
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
204
204
|
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
205
205
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
206
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
206
207
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
207
208
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
208
209
|
* Vertical Argument is not Supported
|
|
@@ -212,7 +212,9 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
212
212
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
213
213
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
214
214
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
215
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
215
216
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
217
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
216
218
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
217
219
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
218
220
|
* Vertical Argument is not Supported
|
|
@@ -407,3 +407,44 @@ class Column:
|
|
|
407
407
|
window_expression = window.expression.copy()
|
|
408
408
|
window_expression.set("this", self.column_expression)
|
|
409
409
|
return Column(window_expression)
|
|
410
|
+
|
|
411
|
+
def getItem(self, key: t.Any) -> Column:
|
|
412
|
+
"""
|
|
413
|
+
An expression that gets an item at position ``ordinal`` out of a list,
|
|
414
|
+
or gets an item by key out of a dict.
|
|
415
|
+
|
|
416
|
+
.. versionadded:: 1.3.0
|
|
417
|
+
|
|
418
|
+
.. versionchanged:: 3.4.0
|
|
419
|
+
Supports Spark Connect.
|
|
420
|
+
|
|
421
|
+
Parameters
|
|
422
|
+
----------
|
|
423
|
+
key
|
|
424
|
+
a literal value, or a :class:`Column` expression.
|
|
425
|
+
The result will only be true at a location if the item matches in the column.
|
|
426
|
+
|
|
427
|
+
.. deprecated:: 3.0.0
|
|
428
|
+
:class:`Column` as a parameter is deprecated.
|
|
429
|
+
|
|
430
|
+
Returns
|
|
431
|
+
-------
|
|
432
|
+
:class:`Column`
|
|
433
|
+
Column representing the item(s) got at position out of a list or by key out of a dict.
|
|
434
|
+
|
|
435
|
+
Examples
|
|
436
|
+
--------
|
|
437
|
+
>>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])
|
|
438
|
+
>>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
|
|
439
|
+
+----+------+
|
|
440
|
+
|l[0]|d[key]|
|
|
441
|
+
+----+------+
|
|
442
|
+
| 1| value|
|
|
443
|
+
+----+------+
|
|
444
|
+
"""
|
|
445
|
+
element_at = get_func_from_session("element_at")
|
|
446
|
+
lit = get_func_from_session("lit")
|
|
447
|
+
key = lit(key) if not isinstance(key, Column) else key
|
|
448
|
+
if isinstance(key.expression, exp.Literal) and key.expression.is_number:
|
|
449
|
+
key = key + lit(1)
|
|
450
|
+
return element_at(self, key)
|
|
@@ -22,6 +22,7 @@ from sqlglot.optimizer.pushdown_projections import pushdown_projections
|
|
|
22
22
|
from sqlglot.optimizer.qualify import qualify
|
|
23
23
|
from sqlglot.optimizer.qualify_columns import quote_identifiers
|
|
24
24
|
|
|
25
|
+
from sqlframe.base.catalog import Column as CatalogColumn
|
|
25
26
|
from sqlframe.base.decorators import normalize
|
|
26
27
|
from sqlframe.base.operations import Operation, operation
|
|
27
28
|
from sqlframe.base.transforms import replace_id_value
|
|
@@ -29,6 +30,7 @@ from sqlframe.base.util import (
|
|
|
29
30
|
get_func_from_session,
|
|
30
31
|
get_tables_from_expression_with_join,
|
|
31
32
|
quote_preserving_alias_or_name,
|
|
33
|
+
sqlglot_to_spark,
|
|
32
34
|
verify_openai_installed,
|
|
33
35
|
)
|
|
34
36
|
|
|
@@ -231,6 +233,10 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
231
233
|
def __copy__(self):
|
|
232
234
|
return self.copy()
|
|
233
235
|
|
|
236
|
+
@property
|
|
237
|
+
def _typed_columns(self) -> t.List[CatalogColumn]:
|
|
238
|
+
raise NotImplementedError
|
|
239
|
+
|
|
234
240
|
@property
|
|
235
241
|
def write(self) -> WRITER:
|
|
236
242
|
return self.session._writer(self)
|
|
@@ -293,7 +299,24 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
293
299
|
StructType([StructField('age', LongType(), True),
|
|
294
300
|
StructField('name', StringType(), True)])
|
|
295
301
|
"""
|
|
296
|
-
|
|
302
|
+
from sqlframe.base import types
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
return types.StructType(
|
|
306
|
+
[
|
|
307
|
+
types.StructField(
|
|
308
|
+
c.name,
|
|
309
|
+
sqlglot_to_spark(
|
|
310
|
+
exp.DataType.build(c.dataType, dialect=self.session.output_dialect)
|
|
311
|
+
),
|
|
312
|
+
)
|
|
313
|
+
for c in self._typed_columns
|
|
314
|
+
]
|
|
315
|
+
)
|
|
316
|
+
except NotImplementedError as e:
|
|
317
|
+
raise NotImplementedError(
|
|
318
|
+
"This engine does not support schema inference likely since it does not have an active connection."
|
|
319
|
+
) from e
|
|
297
320
|
|
|
298
321
|
def _replace_cte_names_with_hashes(self, expression: exp.Select):
|
|
299
322
|
replacement_mapping = {}
|
|
@@ -338,7 +361,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
338
361
|
|
|
339
362
|
cols = self._ensure_list_of_columns(cols)
|
|
340
363
|
normalize(self.session, expression or self.expression, cols)
|
|
341
|
-
return cols
|
|
364
|
+
return list(flatten([self._expand_star(col) for col in cols]))
|
|
342
365
|
|
|
343
366
|
def _ensure_and_normalize_col(self, col):
|
|
344
367
|
from sqlframe.base.column import Column
|
|
@@ -491,6 +514,27 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
491
514
|
select_expressions.append(expression_select_pair) # type: ignore
|
|
492
515
|
return select_expressions
|
|
493
516
|
|
|
517
|
+
def _expand_star(self, col: Column) -> t.List[Column]:
|
|
518
|
+
from sqlframe.base.column import Column
|
|
519
|
+
|
|
520
|
+
if isinstance(col.column_expression, exp.Star):
|
|
521
|
+
return self._get_outer_select_columns(self.expression)
|
|
522
|
+
elif (
|
|
523
|
+
isinstance(col.column_expression, exp.Column)
|
|
524
|
+
and isinstance(col.column_expression.this, exp.Star)
|
|
525
|
+
and col.column_expression.args.get("table")
|
|
526
|
+
):
|
|
527
|
+
for cte in self.expression.ctes:
|
|
528
|
+
if cte.alias_or_name == col.column_expression.args["table"].this:
|
|
529
|
+
return [
|
|
530
|
+
Column.ensure_col(exp.column(x.column_alias_or_name, cte.alias_or_name))
|
|
531
|
+
for x in self._get_outer_select_columns(cte)
|
|
532
|
+
]
|
|
533
|
+
raise ValueError(
|
|
534
|
+
f"Could not find table to expand star: {col.column_expression.args['table']}"
|
|
535
|
+
)
|
|
536
|
+
return [col]
|
|
537
|
+
|
|
494
538
|
@t.overload
|
|
495
539
|
def sql(
|
|
496
540
|
self,
|
|
@@ -1532,11 +1576,41 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1532
1576
|
result = self.session._fetch_rows(sql)
|
|
1533
1577
|
table = PrettyTable()
|
|
1534
1578
|
if row := seq_get(result, 0):
|
|
1535
|
-
table.field_names =
|
|
1579
|
+
table.field_names = row._unique_field_names
|
|
1536
1580
|
for row in result:
|
|
1537
1581
|
table.add_row(list(row))
|
|
1538
1582
|
print(table)
|
|
1539
1583
|
|
|
1584
|
+
def printSchema(self, level: t.Optional[int] = None) -> None:
|
|
1585
|
+
def print_schema(
|
|
1586
|
+
column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
|
|
1587
|
+
):
|
|
1588
|
+
if level and current_level >= level:
|
|
1589
|
+
return
|
|
1590
|
+
if current_level > 0:
|
|
1591
|
+
print(" | " * current_level, end="")
|
|
1592
|
+
print(
|
|
1593
|
+
f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
|
|
1594
|
+
)
|
|
1595
|
+
if column_type.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
|
|
1596
|
+
for column_def in column_type.expressions:
|
|
1597
|
+
print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
|
|
1598
|
+
if column_type.this == exp.DataType.Type.ARRAY:
|
|
1599
|
+
for data_type in column_type.expressions:
|
|
1600
|
+
print_schema("element", data_type, True, current_level + 1)
|
|
1601
|
+
if column_type.this == exp.DataType.Type.MAP:
|
|
1602
|
+
print_schema("key", column_type.expressions[0], True, current_level + 1)
|
|
1603
|
+
print_schema("value", column_type.expressions[1], True, current_level + 1)
|
|
1604
|
+
|
|
1605
|
+
print("root")
|
|
1606
|
+
for column in self._typed_columns:
|
|
1607
|
+
print_schema(
|
|
1608
|
+
column.name,
|
|
1609
|
+
exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
|
|
1610
|
+
column.nullable,
|
|
1611
|
+
0,
|
|
1612
|
+
)
|
|
1613
|
+
|
|
1540
1614
|
def toPandas(self) -> pd.DataFrame:
|
|
1541
1615
|
sql_kwargs = dict(
|
|
1542
1616
|
pretty=False, optimize=False, dialect=self.session.output_dialect, as_list=True
|
|
@@ -12,3 +12,15 @@ class RowError(SQLFrameException):
|
|
|
12
12
|
|
|
13
13
|
class TableSchemaError(SQLFrameException):
|
|
14
14
|
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PandasDiffError(SQLFrameException):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameDiffError(SQLFrameException):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SchemaDiffError(SQLFrameException):
|
|
26
|
+
pass
|
|
@@ -1135,13 +1135,11 @@ def array_intersect_using_intersection(col1: ColumnOrName, col2: ColumnOrName) -
|
|
|
1135
1135
|
def element_at_using_brackets(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
1136
1136
|
col_func = get_func_from_session("col")
|
|
1137
1137
|
lit = get_func_from_session("lit")
|
|
1138
|
-
#
|
|
1139
|
-
if
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
return Column(
|
|
1143
|
-
expression.Bracket(this=col_func(col).expression, expressions=[value_lit.expression])
|
|
1144
|
-
)
|
|
1138
|
+
# SQLGlot will auto add 1 to whatever we pass in for the brackets even though the value is already 1 based.
|
|
1139
|
+
value = value if isinstance(value, Column) else lit(value)
|
|
1140
|
+
if [x for x in value.expression.find_all(expression.Literal) if x.is_number]:
|
|
1141
|
+
value = value - lit(1)
|
|
1142
|
+
return Column(expression.Bracket(this=col_func(col).expression, expressions=[value.expression])) # type: ignore
|
|
1145
1143
|
|
|
1146
1144
|
|
|
1147
1145
|
def array_remove_using_filter(col: ColumnOrName, value: ColumnOrLiteral) -> Column:
|
|
@@ -1923,7 +1923,9 @@ def call_function(funcName: str, *cols: ColumnOrName) -> Column:
|
|
|
1923
1923
|
cols = ensure_list(cols) # type: ignore
|
|
1924
1924
|
if len(cols) > 1:
|
|
1925
1925
|
return Column.invoke_anonymous_function(cols[0], funcName, *cols[1:])
|
|
1926
|
-
|
|
1926
|
+
elif len(cols) == 1:
|
|
1927
|
+
return Column.invoke_anonymous_function(cols[0], funcName)
|
|
1928
|
+
return Column.invoke_anonymous_function(None, funcName)
|
|
1927
1929
|
|
|
1928
1930
|
|
|
1929
1931
|
# @meta(unsupported_engines="*")
|
|
@@ -2028,7 +2030,7 @@ def character_length(str: ColumnOrName) -> Column:
|
|
|
2028
2030
|
return Column.invoke_anonymous_function(str, "character_length")
|
|
2029
2031
|
|
|
2030
2032
|
|
|
2031
|
-
@meta(
|
|
2033
|
+
@meta()
|
|
2032
2034
|
def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
|
|
2033
2035
|
return Column.invoke_anonymous_function(left, "contains", right)
|
|
2034
2036
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.catalog import Column
|
|
8
|
+
from sqlframe.base.dataframe import (
|
|
9
|
+
GROUP_DATA,
|
|
10
|
+
NA,
|
|
11
|
+
SESSION,
|
|
12
|
+
STAT,
|
|
13
|
+
WRITER,
|
|
14
|
+
_BaseDataFrame,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
if sys.version_info >= (3, 11):
|
|
18
|
+
from typing import Self
|
|
19
|
+
else:
|
|
20
|
+
from typing_extensions import Self
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NoCachePersistSupportMixin(_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
27
|
+
def cache(self) -> Self:
|
|
28
|
+
logger.warning("This engine does not support caching. Ignoring cache() call.")
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def persist(self) -> Self:
|
|
32
|
+
logger.warning("This engine does not support persist. Ignoring persist() call.")
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TypedColumnsFromTempViewMixin(
|
|
37
|
+
_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
|
|
38
|
+
):
|
|
39
|
+
@property
|
|
40
|
+
def _typed_columns(self) -> t.List[Column]:
|
|
41
|
+
table = exp.to_table(self.session._random_id)
|
|
42
|
+
self.session._execute(
|
|
43
|
+
exp.Create(
|
|
44
|
+
this=table,
|
|
45
|
+
kind="VIEW",
|
|
46
|
+
replace=True,
|
|
47
|
+
properties=exp.Properties(expressions=[exp.TemporaryProperty()]),
|
|
48
|
+
expression=self.expression,
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return self.session.catalog.listColumns(
|
|
53
|
+
table.sql(dialect=self.session.input_dialect), include_temp=True
|
|
54
|
+
)
|
|
@@ -22,14 +22,14 @@ class DataType:
|
|
|
22
22
|
return not self.__eq__(other)
|
|
23
23
|
|
|
24
24
|
def __str__(self) -> str:
|
|
25
|
-
return self.
|
|
25
|
+
return self.simpleString()
|
|
26
26
|
|
|
27
27
|
@classmethod
|
|
28
28
|
def typeName(cls) -> str:
|
|
29
29
|
return cls.__name__[:-4].lower()
|
|
30
30
|
|
|
31
31
|
def simpleString(self) -> str:
|
|
32
|
-
return
|
|
32
|
+
return self.typeName()
|
|
33
33
|
|
|
34
34
|
def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
|
|
35
35
|
return str(self)
|
|
@@ -416,3 +416,13 @@ class Row(tuple):
|
|
|
416
416
|
)
|
|
417
417
|
else:
|
|
418
418
|
return "<Row(%s)>" % ", ".join(repr(field) for field in self)
|
|
419
|
+
|
|
420
|
+
# SQLFrame Specific
|
|
421
|
+
@property
|
|
422
|
+
def _unique_field_names(self) -> t.List[str]:
|
|
423
|
+
fields = []
|
|
424
|
+
for i, field in enumerate(self.__fields__):
|
|
425
|
+
if field in fields:
|
|
426
|
+
field = field + "_" + str(i)
|
|
427
|
+
fields.append(field)
|
|
428
|
+
return fields
|
|
@@ -291,3 +291,54 @@ def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
|
|
|
291
291
|
return col.sql(dialect=_BaseSession().input_dialect)
|
|
292
292
|
# We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
|
|
293
293
|
return col.alias_or_name
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
|
|
297
|
+
from sqlframe.base import types
|
|
298
|
+
|
|
299
|
+
primitive_mapping = {
|
|
300
|
+
exp.DataType.Type.VARCHAR: types.VarcharType,
|
|
301
|
+
exp.DataType.Type.CHAR: types.CharType,
|
|
302
|
+
exp.DataType.Type.TEXT: types.StringType,
|
|
303
|
+
exp.DataType.Type.BINARY: types.BinaryType,
|
|
304
|
+
exp.DataType.Type.BOOLEAN: types.BooleanType,
|
|
305
|
+
exp.DataType.Type.INT: types.IntegerType,
|
|
306
|
+
exp.DataType.Type.BIGINT: types.LongType,
|
|
307
|
+
exp.DataType.Type.SMALLINT: types.ShortType,
|
|
308
|
+
exp.DataType.Type.FLOAT: types.FloatType,
|
|
309
|
+
exp.DataType.Type.DOUBLE: types.DoubleType,
|
|
310
|
+
exp.DataType.Type.DECIMAL: types.DecimalType,
|
|
311
|
+
exp.DataType.Type.TIMESTAMP: types.TimestampType,
|
|
312
|
+
exp.DataType.Type.TIMESTAMPTZ: types.TimestampType,
|
|
313
|
+
exp.DataType.Type.TIMESTAMPLTZ: types.TimestampType,
|
|
314
|
+
exp.DataType.Type.TIMESTAMPNTZ: types.TimestampType,
|
|
315
|
+
exp.DataType.Type.DATE: types.DateType,
|
|
316
|
+
}
|
|
317
|
+
if sqlglot_dtype.this in primitive_mapping:
|
|
318
|
+
pyspark_class = primitive_mapping[sqlglot_dtype.this]
|
|
319
|
+
if issubclass(pyspark_class, types.DataTypeWithLength) and sqlglot_dtype.expressions:
|
|
320
|
+
return pyspark_class(length=int(sqlglot_dtype.expressions[0].this.this))
|
|
321
|
+
elif issubclass(pyspark_class, types.DecimalType) and sqlglot_dtype.expressions:
|
|
322
|
+
return pyspark_class(
|
|
323
|
+
precision=int(sqlglot_dtype.expressions[0].this.this),
|
|
324
|
+
scale=int(sqlglot_dtype.expressions[1].this.this),
|
|
325
|
+
)
|
|
326
|
+
return pyspark_class()
|
|
327
|
+
if sqlglot_dtype.this == exp.DataType.Type.ARRAY:
|
|
328
|
+
return types.ArrayType(sqlglot_to_spark(sqlglot_dtype.expressions[0]))
|
|
329
|
+
elif sqlglot_dtype.this == exp.DataType.Type.MAP:
|
|
330
|
+
return types.MapType(
|
|
331
|
+
sqlglot_to_spark(sqlglot_dtype.expressions[0]),
|
|
332
|
+
sqlglot_to_spark(sqlglot_dtype.expressions[1]),
|
|
333
|
+
)
|
|
334
|
+
elif sqlglot_dtype.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
|
|
335
|
+
return types.StructType(
|
|
336
|
+
[
|
|
337
|
+
types.StructField(
|
|
338
|
+
name=field.this.alias_or_name,
|
|
339
|
+
dataType=sqlglot_to_spark(field.args["kind"]),
|
|
340
|
+
)
|
|
341
|
+
for field in sqlglot_dtype.expressions
|
|
342
|
+
]
|
|
343
|
+
)
|
|
344
|
+
raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import typing as t
|
|
5
|
+
|
|
6
|
+
from sqlframe.base.catalog import Column as CatalogColumn
|
|
7
|
+
from sqlframe.base.dataframe import (
|
|
8
|
+
_BaseDataFrame,
|
|
9
|
+
_BaseDataFrameNaFunctions,
|
|
10
|
+
_BaseDataFrameStatFunctions,
|
|
11
|
+
)
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
|
|
13
|
+
from sqlframe.bigquery.group import BigQueryGroupedData
|
|
14
|
+
|
|
15
|
+
if t.TYPE_CHECKING:
|
|
16
|
+
from sqlframe.bigquery.readwriter import BigQueryDataFrameWriter
|
|
17
|
+
from sqlframe.bigquery.session import BigQuerySession
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BigQueryDataFrameNaFunctions(_BaseDataFrameNaFunctions["BigQueryDataFrame"]):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BigQueryDataFrameStatFunctions(_BaseDataFrameStatFunctions["BigQueryDataFrame"]):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BigQueryDataFrame(
|
|
32
|
+
NoCachePersistSupportMixin,
|
|
33
|
+
_BaseDataFrame[
|
|
34
|
+
"BigQuerySession",
|
|
35
|
+
"BigQueryDataFrameWriter",
|
|
36
|
+
"BigQueryDataFrameNaFunctions",
|
|
37
|
+
"BigQueryDataFrameStatFunctions",
|
|
38
|
+
"BigQueryGroupedData",
|
|
39
|
+
],
|
|
40
|
+
):
|
|
41
|
+
_na = BigQueryDataFrameNaFunctions
|
|
42
|
+
_stat = BigQueryDataFrameStatFunctions
|
|
43
|
+
_group_data = BigQueryGroupedData
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def _typed_columns(self) -> t.List[CatalogColumn]:
|
|
47
|
+
from google.cloud import bigquery
|
|
48
|
+
|
|
49
|
+
def field_to_column(field: bigquery.SchemaField) -> CatalogColumn:
|
|
50
|
+
if field.field_type == "RECORD":
|
|
51
|
+
data_type = "STRUCT<"
|
|
52
|
+
for subfield in field.fields:
|
|
53
|
+
column = field_to_column(subfield)
|
|
54
|
+
data_type += f"{column.name} {column.dataType},"
|
|
55
|
+
data_type += ">"
|
|
56
|
+
elif field.field_type == "INTEGER":
|
|
57
|
+
data_type = "INT64"
|
|
58
|
+
else:
|
|
59
|
+
data_type = field.field_type
|
|
60
|
+
if field.mode == "REPEATED":
|
|
61
|
+
data_type = f"ARRAY<{data_type}>"
|
|
62
|
+
return CatalogColumn(
|
|
63
|
+
name=field.name,
|
|
64
|
+
dataType=data_type,
|
|
65
|
+
nullable=field.is_nullable,
|
|
66
|
+
description=None,
|
|
67
|
+
isPartition=False,
|
|
68
|
+
isBucket=False,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
|
|
72
|
+
sql = self.session._to_sql(self.expression)
|
|
73
|
+
query_job = self.session._client.query(sql, job_config=job_config)
|
|
74
|
+
return [field_to_column(field) for field in query_job.schema]
|
|
@@ -52,6 +52,7 @@ from sqlframe.base.function_alternatives import ( # noqa
|
|
|
52
52
|
make_date_from_date_func as make_date,
|
|
53
53
|
to_date_from_timestamp as to_date,
|
|
54
54
|
last_day_with_cast as last_day,
|
|
55
|
+
sha1_force_sha1_and_to_hex as sha,
|
|
55
56
|
sha1_force_sha1_and_to_hex as sha1,
|
|
56
57
|
hash_from_farm_fingerprint as hash,
|
|
57
58
|
base64_from_blob as base64,
|
|
@@ -9,14 +9,12 @@ from sqlframe.base.dataframe import (
|
|
|
9
9
|
_BaseDataFrameNaFunctions,
|
|
10
10
|
_BaseDataFrameStatFunctions,
|
|
11
11
|
)
|
|
12
|
-
from sqlframe.base.mixins.dataframe_mixins import
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import (
|
|
13
|
+
NoCachePersistSupportMixin,
|
|
14
|
+
TypedColumnsFromTempViewMixin,
|
|
15
|
+
)
|
|
13
16
|
from sqlframe.duckdb.group import DuckDBGroupedData
|
|
14
17
|
|
|
15
|
-
if sys.version_info >= (3, 11):
|
|
16
|
-
from typing import Self
|
|
17
|
-
else:
|
|
18
|
-
from typing_extensions import Self
|
|
19
|
-
|
|
20
18
|
if t.TYPE_CHECKING:
|
|
21
19
|
from sqlframe.duckdb.session import DuckDBSession # noqa
|
|
22
20
|
from sqlframe.duckdb.readwriter import DuckDBDataFrameWriter # noqa
|
|
@@ -35,7 +33,8 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
|
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
class DuckDBDataFrame(
|
|
38
|
-
|
|
36
|
+
NoCachePersistSupportMixin,
|
|
37
|
+
TypedColumnsFromTempViewMixin,
|
|
39
38
|
_BaseDataFrame[
|
|
40
39
|
"DuckDBSession",
|
|
41
40
|
"DuckDBDataFrameWriter",
|
|
@@ -47,11 +46,3 @@ class DuckDBDataFrame(
|
|
|
47
46
|
_na = DuckDBDataFrameNaFunctions
|
|
48
47
|
_stat = DuckDBDataFrameStatFunctions
|
|
49
48
|
_group_data = DuckDBGroupedData
|
|
50
|
-
|
|
51
|
-
def cache(self) -> Self:
|
|
52
|
-
logger.warning("DuckDB does not support caching. Ignoring cache() call.")
|
|
53
|
-
return self
|
|
54
|
-
|
|
55
|
-
def persist(self) -> Self:
|
|
56
|
-
logger.warning("DuckDB does not support persist. Ignoring persist() call.")
|
|
57
|
-
return self
|