sqlframe 1.10.0__tar.gz → 1.11.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlframe-1.10.0 → sqlframe-1.11.0}/PKG-INFO +1 -1
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/bigquery.md +2 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/duckdb.md +1 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/postgres.md +1 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/snowflake.md +2 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/_version.py +2 -2
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/dataframe.py +54 -1
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/exceptions.py +12 -0
- sqlframe-1.11.0/sqlframe/base/mixins/dataframe_mixins.py +54 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/types.py +2 -2
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/util.py +51 -0
- sqlframe-1.11.0/sqlframe/bigquery/dataframe.py +74 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/dataframe.py +6 -15
- sqlframe-1.11.0/sqlframe/postgres/catalog.py +227 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/dataframe.py +6 -10
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/dataframe.py +3 -14
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/dataframe.py +23 -13
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/dataframe.py +25 -15
- sqlframe-1.11.0/sqlframe/testing/__init__.py +3 -0
- sqlframe-1.11.0/sqlframe/testing/utils.py +320 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe.egg-info/PKG-INFO +1 -1
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe.egg-info/SOURCES.txt +6 -0
- sqlframe-1.11.0/tests/integration/engines/bigquery/test_bigquery_dataframe.py +159 -0
- sqlframe-1.11.0/tests/integration/engines/duck/test_duckdb_dataframe.py +165 -0
- sqlframe-1.11.0/tests/integration/engines/postgres/test_postgres_dataframe.py +122 -0
- sqlframe-1.11.0/tests/integration/engines/snowflake/test_snowflake_dataframe.py +158 -0
- sqlframe-1.11.0/tests/integration/engines/spark/test_spark_dataframe.py +165 -0
- sqlframe-1.11.0/tests/integration/engines/test_int_testing.py +79 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_types.py +9 -9
- sqlframe-1.11.0/tests/unit/test_util.py +73 -0
- sqlframe-1.10.0/sqlframe/base/mixins/dataframe_mixins.py +0 -63
- sqlframe-1.10.0/sqlframe/bigquery/dataframe.py +0 -54
- sqlframe-1.10.0/sqlframe/postgres/catalog.py +0 -107
- sqlframe-1.10.0/tests/integration/engines/duck/test_duckdb_dataframe.py +0 -79
- sqlframe-1.10.0/tests/integration/engines/postgres/test_postgres_dataframe.py +0 -64
- sqlframe-1.10.0/tests/unit/test_util.py +0 -26
- {sqlframe-1.10.0 → sqlframe-1.11.0}/.github/CODEOWNERS +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/.github/workflows/main.workflow.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/.github/workflows/publish.workflow.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/.gitignore +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/.readthedocs.yaml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/LICENSE +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/Makefile +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/README.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/add_chatgpt_support.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/adding_ai_to_meal.jpeg +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/hype_train.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/marvin_paranoid_robot.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/nonsense_sql.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/openai_full_rewrite.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/openai_replacing_cte_names.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/sqlglot_optimized_code.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/add_chatgpt_support/sunny_shake_head_no.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/cake.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/configuration.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/bigquery.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/docs/postgres.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/images/SF.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/images/favicon.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/index.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/requirements.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/spark.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/standalone.md +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/mkdocs.yml +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/pytest.ini +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/renovate.json +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/setup.cfg +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/setup.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/LICENSE +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/_typing.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/decorators.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/function_alternatives.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/mixins/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/normalize.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/operations.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/readerwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/transforms.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/functions.pyi +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe.egg-info/requires.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/common_fixtures.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/conftest.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/fixtures/employee_extra_line.csv +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/bigquery/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/duck/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/postgres/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/redshift/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/snowflake/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/spark/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/test_engine_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/engines/test_int_functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/test_int_dataframe.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/types.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/__init__.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_column.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_dataframe.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_functions.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_session.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-1.10.0 → sqlframe-1.11.0}/tests/unit/standalone/test_window.py +0 -0
|
@@ -217,8 +217,10 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
217
217
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
218
218
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
219
219
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
220
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
220
221
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
221
222
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
223
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
222
224
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
223
225
|
* Vertical Argument is not Supported
|
|
224
226
|
* [sort](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.sort.html)
|
|
@@ -192,6 +192,7 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
192
192
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
193
193
|
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
194
194
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
195
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
195
196
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
196
197
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
197
198
|
* Vertical Argument is not Supported
|
|
@@ -203,6 +203,7 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
203
203
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
204
204
|
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
205
205
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
206
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
206
207
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
207
208
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
208
209
|
* Vertical Argument is not Supported
|
|
@@ -212,7 +212,9 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
212
212
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
213
213
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
214
214
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
215
|
+
* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
|
|
215
216
|
* [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
|
|
217
|
+
* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
|
|
216
218
|
* [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
|
|
217
219
|
* [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
|
|
218
220
|
* Vertical Argument is not Supported
|
|
@@ -22,6 +22,7 @@ from sqlglot.optimizer.pushdown_projections import pushdown_projections
|
|
|
22
22
|
from sqlglot.optimizer.qualify import qualify
|
|
23
23
|
from sqlglot.optimizer.qualify_columns import quote_identifiers
|
|
24
24
|
|
|
25
|
+
from sqlframe.base.catalog import Column as CatalogColumn
|
|
25
26
|
from sqlframe.base.decorators import normalize
|
|
26
27
|
from sqlframe.base.operations import Operation, operation
|
|
27
28
|
from sqlframe.base.transforms import replace_id_value
|
|
@@ -29,6 +30,7 @@ from sqlframe.base.util import (
|
|
|
29
30
|
get_func_from_session,
|
|
30
31
|
get_tables_from_expression_with_join,
|
|
31
32
|
quote_preserving_alias_or_name,
|
|
33
|
+
sqlglot_to_spark,
|
|
32
34
|
verify_openai_installed,
|
|
33
35
|
)
|
|
34
36
|
|
|
@@ -231,6 +233,10 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
231
233
|
def __copy__(self):
|
|
232
234
|
return self.copy()
|
|
233
235
|
|
|
236
|
+
@property
|
|
237
|
+
def _typed_columns(self) -> t.List[CatalogColumn]:
|
|
238
|
+
raise NotImplementedError
|
|
239
|
+
|
|
234
240
|
@property
|
|
235
241
|
def write(self) -> WRITER:
|
|
236
242
|
return self.session._writer(self)
|
|
@@ -293,7 +299,24 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
293
299
|
StructType([StructField('age', LongType(), True),
|
|
294
300
|
StructField('name', StringType(), True)])
|
|
295
301
|
"""
|
|
296
|
-
|
|
302
|
+
from sqlframe.base import types
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
return types.StructType(
|
|
306
|
+
[
|
|
307
|
+
types.StructField(
|
|
308
|
+
c.name,
|
|
309
|
+
sqlglot_to_spark(
|
|
310
|
+
exp.DataType.build(c.dataType, dialect=self.session.output_dialect)
|
|
311
|
+
),
|
|
312
|
+
)
|
|
313
|
+
for c in self._typed_columns
|
|
314
|
+
]
|
|
315
|
+
)
|
|
316
|
+
except NotImplementedError as e:
|
|
317
|
+
raise NotImplementedError(
|
|
318
|
+
"This engine does not support schema inference likely since it does not have an active connection."
|
|
319
|
+
) from e
|
|
297
320
|
|
|
298
321
|
def _replace_cte_names_with_hashes(self, expression: exp.Select):
|
|
299
322
|
replacement_mapping = {}
|
|
@@ -1537,6 +1560,36 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1537
1560
|
table.add_row(list(row))
|
|
1538
1561
|
print(table)
|
|
1539
1562
|
|
|
1563
|
+
def printSchema(self, level: t.Optional[int] = None) -> None:
|
|
1564
|
+
def print_schema(
|
|
1565
|
+
column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
|
|
1566
|
+
):
|
|
1567
|
+
if level and current_level >= level:
|
|
1568
|
+
return
|
|
1569
|
+
if current_level > 0:
|
|
1570
|
+
print(" | " * current_level, end="")
|
|
1571
|
+
print(
|
|
1572
|
+
f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
|
|
1573
|
+
)
|
|
1574
|
+
if column_type.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
|
|
1575
|
+
for column_def in column_type.expressions:
|
|
1576
|
+
print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
|
|
1577
|
+
if column_type.this == exp.DataType.Type.ARRAY:
|
|
1578
|
+
for data_type in column_type.expressions:
|
|
1579
|
+
print_schema("element", data_type, True, current_level + 1)
|
|
1580
|
+
if column_type.this == exp.DataType.Type.MAP:
|
|
1581
|
+
print_schema("key", column_type.expressions[0], True, current_level + 1)
|
|
1582
|
+
print_schema("value", column_type.expressions[1], True, current_level + 1)
|
|
1583
|
+
|
|
1584
|
+
print("root")
|
|
1585
|
+
for column in self._typed_columns:
|
|
1586
|
+
print_schema(
|
|
1587
|
+
column.name,
|
|
1588
|
+
exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
|
|
1589
|
+
column.nullable,
|
|
1590
|
+
0,
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1540
1593
|
def toPandas(self) -> pd.DataFrame:
|
|
1541
1594
|
sql_kwargs = dict(
|
|
1542
1595
|
pretty=False, optimize=False, dialect=self.session.output_dialect, as_list=True
|
|
@@ -12,3 +12,15 @@ class RowError(SQLFrameException):
|
|
|
12
12
|
|
|
13
13
|
class TableSchemaError(SQLFrameException):
|
|
14
14
|
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PandasDiffError(SQLFrameException):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameDiffError(SQLFrameException):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SchemaDiffError(SQLFrameException):
|
|
26
|
+
pass
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp
|
|
6
|
+
|
|
7
|
+
from sqlframe.base.catalog import Column
|
|
8
|
+
from sqlframe.base.dataframe import (
|
|
9
|
+
GROUP_DATA,
|
|
10
|
+
NA,
|
|
11
|
+
SESSION,
|
|
12
|
+
STAT,
|
|
13
|
+
WRITER,
|
|
14
|
+
_BaseDataFrame,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
if sys.version_info >= (3, 11):
|
|
18
|
+
from typing import Self
|
|
19
|
+
else:
|
|
20
|
+
from typing_extensions import Self
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NoCachePersistSupportMixin(_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
27
|
+
def cache(self) -> Self:
|
|
28
|
+
logger.warning("This engine does not support caching. Ignoring cache() call.")
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def persist(self) -> Self:
|
|
32
|
+
logger.warning("This engine does not support persist. Ignoring persist() call.")
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TypedColumnsFromTempViewMixin(
|
|
37
|
+
_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
|
|
38
|
+
):
|
|
39
|
+
@property
|
|
40
|
+
def _typed_columns(self) -> t.List[Column]:
|
|
41
|
+
table = exp.to_table(self.session._random_id)
|
|
42
|
+
self.session._execute(
|
|
43
|
+
exp.Create(
|
|
44
|
+
this=table,
|
|
45
|
+
kind="VIEW",
|
|
46
|
+
replace=True,
|
|
47
|
+
properties=exp.Properties(expressions=[exp.TemporaryProperty()]),
|
|
48
|
+
expression=self.expression,
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return self.session.catalog.listColumns(
|
|
53
|
+
table.sql(dialect=self.session.input_dialect), include_temp=True
|
|
54
|
+
)
|
|
@@ -22,14 +22,14 @@ class DataType:
|
|
|
22
22
|
return not self.__eq__(other)
|
|
23
23
|
|
|
24
24
|
def __str__(self) -> str:
|
|
25
|
-
return self.
|
|
25
|
+
return self.simpleString()
|
|
26
26
|
|
|
27
27
|
@classmethod
|
|
28
28
|
def typeName(cls) -> str:
|
|
29
29
|
return cls.__name__[:-4].lower()
|
|
30
30
|
|
|
31
31
|
def simpleString(self) -> str:
|
|
32
|
-
return
|
|
32
|
+
return self.typeName()
|
|
33
33
|
|
|
34
34
|
def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
|
|
35
35
|
return str(self)
|
|
@@ -291,3 +291,54 @@ def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
|
|
|
291
291
|
return col.sql(dialect=_BaseSession().input_dialect)
|
|
292
292
|
# We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
|
|
293
293
|
return col.alias_or_name
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
|
|
297
|
+
from sqlframe.base import types
|
|
298
|
+
|
|
299
|
+
primitive_mapping = {
|
|
300
|
+
exp.DataType.Type.VARCHAR: types.VarcharType,
|
|
301
|
+
exp.DataType.Type.CHAR: types.CharType,
|
|
302
|
+
exp.DataType.Type.TEXT: types.StringType,
|
|
303
|
+
exp.DataType.Type.BINARY: types.BinaryType,
|
|
304
|
+
exp.DataType.Type.BOOLEAN: types.BooleanType,
|
|
305
|
+
exp.DataType.Type.INT: types.IntegerType,
|
|
306
|
+
exp.DataType.Type.BIGINT: types.LongType,
|
|
307
|
+
exp.DataType.Type.SMALLINT: types.ShortType,
|
|
308
|
+
exp.DataType.Type.FLOAT: types.FloatType,
|
|
309
|
+
exp.DataType.Type.DOUBLE: types.DoubleType,
|
|
310
|
+
exp.DataType.Type.DECIMAL: types.DecimalType,
|
|
311
|
+
exp.DataType.Type.TIMESTAMP: types.TimestampType,
|
|
312
|
+
exp.DataType.Type.TIMESTAMPTZ: types.TimestampType,
|
|
313
|
+
exp.DataType.Type.TIMESTAMPLTZ: types.TimestampType,
|
|
314
|
+
exp.DataType.Type.TIMESTAMPNTZ: types.TimestampType,
|
|
315
|
+
exp.DataType.Type.DATE: types.DateType,
|
|
316
|
+
}
|
|
317
|
+
if sqlglot_dtype.this in primitive_mapping:
|
|
318
|
+
pyspark_class = primitive_mapping[sqlglot_dtype.this]
|
|
319
|
+
if issubclass(pyspark_class, types.DataTypeWithLength) and sqlglot_dtype.expressions:
|
|
320
|
+
return pyspark_class(length=int(sqlglot_dtype.expressions[0].this.this))
|
|
321
|
+
elif issubclass(pyspark_class, types.DecimalType) and sqlglot_dtype.expressions:
|
|
322
|
+
return pyspark_class(
|
|
323
|
+
precision=int(sqlglot_dtype.expressions[0].this.this),
|
|
324
|
+
scale=int(sqlglot_dtype.expressions[1].this.this),
|
|
325
|
+
)
|
|
326
|
+
return pyspark_class()
|
|
327
|
+
if sqlglot_dtype.this == exp.DataType.Type.ARRAY:
|
|
328
|
+
return types.ArrayType(sqlglot_to_spark(sqlglot_dtype.expressions[0]))
|
|
329
|
+
elif sqlglot_dtype.this == exp.DataType.Type.MAP:
|
|
330
|
+
return types.MapType(
|
|
331
|
+
sqlglot_to_spark(sqlglot_dtype.expressions[0]),
|
|
332
|
+
sqlglot_to_spark(sqlglot_dtype.expressions[1]),
|
|
333
|
+
)
|
|
334
|
+
elif sqlglot_dtype.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
|
|
335
|
+
return types.StructType(
|
|
336
|
+
[
|
|
337
|
+
types.StructField(
|
|
338
|
+
name=field.this.alias_or_name,
|
|
339
|
+
dataType=sqlglot_to_spark(field.args["kind"]),
|
|
340
|
+
)
|
|
341
|
+
for field in sqlglot_dtype.expressions
|
|
342
|
+
]
|
|
343
|
+
)
|
|
344
|
+
raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import typing as t
|
|
5
|
+
|
|
6
|
+
from sqlframe.base.catalog import Column as CatalogColumn
|
|
7
|
+
from sqlframe.base.dataframe import (
|
|
8
|
+
_BaseDataFrame,
|
|
9
|
+
_BaseDataFrameNaFunctions,
|
|
10
|
+
_BaseDataFrameStatFunctions,
|
|
11
|
+
)
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
|
|
13
|
+
from sqlframe.bigquery.group import BigQueryGroupedData
|
|
14
|
+
|
|
15
|
+
if t.TYPE_CHECKING:
|
|
16
|
+
from sqlframe.bigquery.readwriter import BigQueryDataFrameWriter
|
|
17
|
+
from sqlframe.bigquery.session import BigQuerySession
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BigQueryDataFrameNaFunctions(_BaseDataFrameNaFunctions["BigQueryDataFrame"]):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BigQueryDataFrameStatFunctions(_BaseDataFrameStatFunctions["BigQueryDataFrame"]):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BigQueryDataFrame(
|
|
32
|
+
NoCachePersistSupportMixin,
|
|
33
|
+
_BaseDataFrame[
|
|
34
|
+
"BigQuerySession",
|
|
35
|
+
"BigQueryDataFrameWriter",
|
|
36
|
+
"BigQueryDataFrameNaFunctions",
|
|
37
|
+
"BigQueryDataFrameStatFunctions",
|
|
38
|
+
"BigQueryGroupedData",
|
|
39
|
+
],
|
|
40
|
+
):
|
|
41
|
+
_na = BigQueryDataFrameNaFunctions
|
|
42
|
+
_stat = BigQueryDataFrameStatFunctions
|
|
43
|
+
_group_data = BigQueryGroupedData
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def _typed_columns(self) -> t.List[CatalogColumn]:
|
|
47
|
+
from google.cloud import bigquery
|
|
48
|
+
|
|
49
|
+
def field_to_column(field: bigquery.SchemaField) -> CatalogColumn:
|
|
50
|
+
if field.field_type == "RECORD":
|
|
51
|
+
data_type = "STRUCT<"
|
|
52
|
+
for subfield in field.fields:
|
|
53
|
+
column = field_to_column(subfield)
|
|
54
|
+
data_type += f"{column.name} {column.dataType},"
|
|
55
|
+
data_type += ">"
|
|
56
|
+
elif field.field_type == "INTEGER":
|
|
57
|
+
data_type = "INT64"
|
|
58
|
+
else:
|
|
59
|
+
data_type = field.field_type
|
|
60
|
+
if field.mode == "REPEATED":
|
|
61
|
+
data_type = f"ARRAY<{data_type}>"
|
|
62
|
+
return CatalogColumn(
|
|
63
|
+
name=field.name,
|
|
64
|
+
dataType=data_type,
|
|
65
|
+
nullable=field.is_nullable,
|
|
66
|
+
description=None,
|
|
67
|
+
isPartition=False,
|
|
68
|
+
isBucket=False,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
|
|
72
|
+
sql = self.session._to_sql(self.expression)
|
|
73
|
+
query_job = self.session._client.query(sql, job_config=job_config)
|
|
74
|
+
return [field_to_column(field) for field in query_job.schema]
|
|
@@ -9,14 +9,12 @@ from sqlframe.base.dataframe import (
|
|
|
9
9
|
_BaseDataFrameNaFunctions,
|
|
10
10
|
_BaseDataFrameStatFunctions,
|
|
11
11
|
)
|
|
12
|
-
from sqlframe.base.mixins.dataframe_mixins import
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import (
|
|
13
|
+
NoCachePersistSupportMixin,
|
|
14
|
+
TypedColumnsFromTempViewMixin,
|
|
15
|
+
)
|
|
13
16
|
from sqlframe.duckdb.group import DuckDBGroupedData
|
|
14
17
|
|
|
15
|
-
if sys.version_info >= (3, 11):
|
|
16
|
-
from typing import Self
|
|
17
|
-
else:
|
|
18
|
-
from typing_extensions import Self
|
|
19
|
-
|
|
20
18
|
if t.TYPE_CHECKING:
|
|
21
19
|
from sqlframe.duckdb.session import DuckDBSession # noqa
|
|
22
20
|
from sqlframe.duckdb.readwriter import DuckDBDataFrameWriter # noqa
|
|
@@ -35,7 +33,8 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
|
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
class DuckDBDataFrame(
|
|
38
|
-
|
|
36
|
+
NoCachePersistSupportMixin,
|
|
37
|
+
TypedColumnsFromTempViewMixin,
|
|
39
38
|
_BaseDataFrame[
|
|
40
39
|
"DuckDBSession",
|
|
41
40
|
"DuckDBDataFrameWriter",
|
|
@@ -47,11 +46,3 @@ class DuckDBDataFrame(
|
|
|
47
46
|
_na = DuckDBDataFrameNaFunctions
|
|
48
47
|
_stat = DuckDBDataFrameStatFunctions
|
|
49
48
|
_group_data = DuckDBGroupedData
|
|
50
|
-
|
|
51
|
-
def cache(self) -> Self:
|
|
52
|
-
logger.warning("DuckDB does not support caching. Ignoring cache() call.")
|
|
53
|
-
return self
|
|
54
|
-
|
|
55
|
-
def persist(self) -> Self:
|
|
56
|
-
logger.warning("DuckDB does not support persist. Ignoring persist() call.")
|
|
57
|
-
return self
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import typing as t
|
|
7
|
+
|
|
8
|
+
from sqlglot import exp, parse_one
|
|
9
|
+
|
|
10
|
+
from sqlframe.base.catalog import Column, Function, _BaseCatalog
|
|
11
|
+
from sqlframe.base.decorators import normalize
|
|
12
|
+
from sqlframe.base.mixins.catalog_mixins import (
|
|
13
|
+
GetCurrentCatalogFromFunctionMixin,
|
|
14
|
+
GetCurrentDatabaseFromFunctionMixin,
|
|
15
|
+
ListCatalogsFromInfoSchemaMixin,
|
|
16
|
+
ListDatabasesFromInfoSchemaMixin,
|
|
17
|
+
ListTablesFromInfoSchemaMixin,
|
|
18
|
+
SetCurrentDatabaseFromSearchPathMixin,
|
|
19
|
+
)
|
|
20
|
+
from sqlframe.base.util import to_schema
|
|
21
|
+
|
|
22
|
+
if t.TYPE_CHECKING:
|
|
23
|
+
from sqlframe.postgres.session import PostgresSession # noqa
|
|
24
|
+
from sqlframe.postgres.dataframe import PostgresDataFrame # noqa
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class PostgresCatalog(
|
|
28
|
+
GetCurrentCatalogFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
|
|
29
|
+
GetCurrentDatabaseFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
|
|
30
|
+
ListDatabasesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
|
|
31
|
+
ListCatalogsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
|
|
32
|
+
SetCurrentDatabaseFromSearchPathMixin["PostgresSession", "PostgresDataFrame"],
|
|
33
|
+
ListTablesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
|
|
34
|
+
_BaseCatalog["PostgresSession", "PostgresDataFrame"],
|
|
35
|
+
):
|
|
36
|
+
CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
|
|
37
|
+
TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
|
|
38
|
+
|
|
39
|
+
@normalize(["tableName", "dbName"])
|
|
40
|
+
def listColumns(
|
|
41
|
+
self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
|
|
42
|
+
) -> t.List[Column]:
|
|
43
|
+
"""Returns a t.List of columns for the given table/view in the specified database.
|
|
44
|
+
|
|
45
|
+
.. versionadded:: 2.0.0
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
tableName : str
|
|
50
|
+
name of the table to t.List columns.
|
|
51
|
+
|
|
52
|
+
.. versionchanged:: 3.4.0
|
|
53
|
+
Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
|
|
54
|
+
|
|
55
|
+
dbName : str, t.Optional
|
|
56
|
+
name of the database to find the table to t.List columns.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
t.List
|
|
61
|
+
A t.List of :class:`Column`.
|
|
62
|
+
|
|
63
|
+
Notes
|
|
64
|
+
-----
|
|
65
|
+
The order of arguments here is different from that of its JVM counterpart
|
|
66
|
+
because Python does not support method overloading.
|
|
67
|
+
|
|
68
|
+
If no database is specified, the current database and catalog
|
|
69
|
+
are used. This API includes all temporary views.
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
>>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
|
|
74
|
+
>>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
|
|
75
|
+
>>> spark.catalog.t.listColumns("tblA")
|
|
76
|
+
[Column(name='name', description=None, dataType='string', nullable=True, ...
|
|
77
|
+
>>> _ = spark.sql("DROP TABLE tblA")
|
|
78
|
+
"""
|
|
79
|
+
if df := self.session.temp_views.get(tableName):
|
|
80
|
+
return [
|
|
81
|
+
Column(
|
|
82
|
+
name=x,
|
|
83
|
+
description=None,
|
|
84
|
+
dataType="",
|
|
85
|
+
nullable=True,
|
|
86
|
+
isPartition=False,
|
|
87
|
+
isBucket=False,
|
|
88
|
+
)
|
|
89
|
+
for x in df.columns
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
table = exp.to_table(tableName, dialect=self.session.input_dialect)
|
|
93
|
+
schema = to_schema(dbName, dialect=self.session.input_dialect) if dbName else None
|
|
94
|
+
if not table.db:
|
|
95
|
+
if schema and schema.db:
|
|
96
|
+
table.set("db", schema.args["db"])
|
|
97
|
+
else:
|
|
98
|
+
table.set(
|
|
99
|
+
"db",
|
|
100
|
+
exp.parse_identifier(
|
|
101
|
+
self.currentDatabase(), dialect=self.session.input_dialect
|
|
102
|
+
),
|
|
103
|
+
)
|
|
104
|
+
if not table.catalog:
|
|
105
|
+
if schema and schema.catalog:
|
|
106
|
+
table.set("catalog", schema.args["catalog"])
|
|
107
|
+
else:
|
|
108
|
+
table.set(
|
|
109
|
+
"catalog",
|
|
110
|
+
exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
|
|
111
|
+
)
|
|
112
|
+
source_table = self._get_info_schema_table("columns", database=table.db)
|
|
113
|
+
select = parse_one(
|
|
114
|
+
f"""
|
|
115
|
+
SELECT
|
|
116
|
+
att.attname AS column_name,
|
|
117
|
+
pg_catalog.format_type(att.atttypid, NULL) AS data_type,
|
|
118
|
+
col.is_nullable
|
|
119
|
+
FROM
|
|
120
|
+
pg_catalog.pg_attribute att
|
|
121
|
+
JOIN
|
|
122
|
+
pg_catalog.pg_class cls ON cls.oid = att.attrelid
|
|
123
|
+
JOIN
|
|
124
|
+
pg_catalog.pg_namespace nsp ON nsp.oid = cls.relnamespace
|
|
125
|
+
JOIN
|
|
126
|
+
information_schema.columns col ON col.table_schema = nsp.nspname AND col.table_name = cls.relname AND col.column_name = att.attname
|
|
127
|
+
WHERE
|
|
128
|
+
cls.relname = '{table.name}' AND -- replace with your table name
|
|
129
|
+
att.attnum > 0 AND
|
|
130
|
+
NOT att.attisdropped
|
|
131
|
+
ORDER BY
|
|
132
|
+
att.attnum;
|
|
133
|
+
""",
|
|
134
|
+
dialect="postgres",
|
|
135
|
+
)
|
|
136
|
+
if table.db:
|
|
137
|
+
schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
|
|
138
|
+
if include_temp and self.TEMP_SCHEMA_FILTER:
|
|
139
|
+
schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
|
|
140
|
+
select = select.where(schema_filter) # type: ignore
|
|
141
|
+
if table.catalog:
|
|
142
|
+
catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
|
|
143
|
+
if include_temp and self.TEMP_CATALOG_FILTER:
|
|
144
|
+
catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
|
|
145
|
+
select = select.where(catalog_filter) # type: ignore
|
|
146
|
+
results = self.session._fetch_rows(select)
|
|
147
|
+
return [
|
|
148
|
+
Column(
|
|
149
|
+
name=x["column_name"],
|
|
150
|
+
description=None,
|
|
151
|
+
dataType=x["data_type"],
|
|
152
|
+
nullable=x["is_nullable"] == "YES",
|
|
153
|
+
isPartition=False,
|
|
154
|
+
isBucket=False,
|
|
155
|
+
)
|
|
156
|
+
for x in results
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
def listFunctions(
|
|
160
|
+
self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
|
|
161
|
+
) -> t.List[Function]:
|
|
162
|
+
"""
|
|
163
|
+
Returns a t.List of functions registered in the specified database.
|
|
164
|
+
|
|
165
|
+
.. versionadded:: 3.4.0
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
dbName : str
|
|
170
|
+
name of the database to t.List the functions.
|
|
171
|
+
``dbName`` can be qualified with catalog name.
|
|
172
|
+
pattern : str
|
|
173
|
+
The pattern that the function name needs to match.
|
|
174
|
+
|
|
175
|
+
.. versionchanged: 3.5.0
|
|
176
|
+
Adds ``pattern`` argument.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
t.List
|
|
181
|
+
A t.List of :class:`Function`.
|
|
182
|
+
|
|
183
|
+
Notes
|
|
184
|
+
-----
|
|
185
|
+
If no database is specified, the current database and catalog
|
|
186
|
+
are used. This API includes all temporary functions.
|
|
187
|
+
|
|
188
|
+
Examples
|
|
189
|
+
--------
|
|
190
|
+
>>> spark.catalog.t.listFunctions()
|
|
191
|
+
[Function(name=...
|
|
192
|
+
|
|
193
|
+
>>> spark.catalog.t.listFunctions(pattern="to_*")
|
|
194
|
+
[Function(name=...
|
|
195
|
+
|
|
196
|
+
>>> spark.catalog.t.listFunctions(pattern="*not_existing_func*")
|
|
197
|
+
[]
|
|
198
|
+
"""
|
|
199
|
+
# SO: https://stackoverflow.com/questions/44143816/any-way-to-list-all-user-defined-postgresql-functions
|
|
200
|
+
query = parse_one(
|
|
201
|
+
"""SELECT n.nspname as "namespace",
|
|
202
|
+
p.proname as "name"
|
|
203
|
+
FROM pg_catalog.pg_proc p
|
|
204
|
+
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
|
205
|
+
WHERE pg_catalog.pg_function_is_visible(p.oid)
|
|
206
|
+
AND n.nspname <> 'pg_catalog'
|
|
207
|
+
AND n.nspname <> 'information_schema'
|
|
208
|
+
ORDER BY 1, 2;
|
|
209
|
+
""",
|
|
210
|
+
dialect=self.session.input_dialect,
|
|
211
|
+
)
|
|
212
|
+
functions = self.session._fetch_rows(query)
|
|
213
|
+
catalog = self.currentCatalog()
|
|
214
|
+
results = [
|
|
215
|
+
Function(
|
|
216
|
+
name=x["name"],
|
|
217
|
+
catalog=catalog,
|
|
218
|
+
namespace=[x["namespace"]],
|
|
219
|
+
description=None,
|
|
220
|
+
className="",
|
|
221
|
+
isTemporary=False,
|
|
222
|
+
)
|
|
223
|
+
for x in functions
|
|
224
|
+
]
|
|
225
|
+
if pattern:
|
|
226
|
+
results = [x for x in results if fnmatch.fnmatch(x.name, pattern)]
|
|
227
|
+
return results
|
|
@@ -9,7 +9,10 @@ from sqlframe.base.dataframe import (
|
|
|
9
9
|
_BaseDataFrameNaFunctions,
|
|
10
10
|
_BaseDataFrameStatFunctions,
|
|
11
11
|
)
|
|
12
|
-
from sqlframe.base.mixins.dataframe_mixins import
|
|
12
|
+
from sqlframe.base.mixins.dataframe_mixins import (
|
|
13
|
+
NoCachePersistSupportMixin,
|
|
14
|
+
TypedColumnsFromTempViewMixin,
|
|
15
|
+
)
|
|
13
16
|
from sqlframe.postgres.group import PostgresGroupedData
|
|
14
17
|
|
|
15
18
|
if sys.version_info >= (3, 11):
|
|
@@ -34,7 +37,8 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
|
|
|
34
37
|
|
|
35
38
|
|
|
36
39
|
class PostgresDataFrame(
|
|
37
|
-
|
|
40
|
+
NoCachePersistSupportMixin,
|
|
41
|
+
TypedColumnsFromTempViewMixin,
|
|
38
42
|
_BaseDataFrame[
|
|
39
43
|
"PostgresSession",
|
|
40
44
|
"PostgresDataFrameWriter",
|
|
@@ -46,11 +50,3 @@ class PostgresDataFrame(
|
|
|
46
50
|
_na = PostgresDataFrameNaFunctions
|
|
47
51
|
_stat = PostgresDataFrameStatFunctions
|
|
48
52
|
_group_data = PostgresGroupedData
|
|
49
|
-
|
|
50
|
-
def cache(self) -> Self:
|
|
51
|
-
logger.warning("Postgres does not support caching. Ignoring cache() call.")
|
|
52
|
-
return self
|
|
53
|
-
|
|
54
|
-
def persist(self) -> Self:
|
|
55
|
-
logger.warning("Postgres does not support persist. Ignoring persist() call.")
|
|
56
|
-
return self
|