sqlframe 2.4.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlframe-2.4.0 → sqlframe-3.0.0}/PKG-INFO +57 -29
- {sqlframe-2.4.0 → sqlframe-3.0.0}/README.md +56 -28
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/bigquery.md +72 -16
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/configuration.md +47 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/duckdb.md +65 -16
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/postgres.md +68 -17
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/snowflake.md +78 -21
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/spark.md +58 -12
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/standalone.md +42 -12
- {sqlframe-2.4.0 → sqlframe-3.0.0}/setup.py +1 -0
- sqlframe-3.0.0/sqlframe/__init__.py +83 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/_version.py +2 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/dataframe.py +11 -1
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/session.py +4 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/__init__.py +11 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/session.py +1 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/__init__.py +12 -3
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/dataframe.py +11 -5
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/session.py +1 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/__init__.py +11 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/session.py +1 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/__init__.py +11 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/session.py +1 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/__init__.py +7 -1
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/session.py +1 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/__init__.py +11 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/session.py +1 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/__init__.py +7 -1
- sqlframe-3.0.0/sqlframe/standalone/column.py +1 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/session.py +1 -2
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/PKG-INFO +57 -29
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/SOURCES.txt +17 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/requires.txt +1 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/conftest.py +1 -0
- sqlframe-3.0.0/tests/integration/engines/duck/test_duckdb_activate.py +37 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_dataframe.py +53 -0
- sqlframe-3.0.0/tests/integration/engines/postgres/test_postgres_activate.py +37 -0
- sqlframe-3.0.0/tests/unit/bigquery/test_activate.py +51 -0
- sqlframe-3.0.0/tests/unit/conftest.py +124 -0
- sqlframe-3.0.0/tests/unit/duck/test_activate.py +41 -0
- sqlframe-3.0.0/tests/unit/postgres/__init__.py +0 -0
- sqlframe-3.0.0/tests/unit/postgres/test_activate.py +41 -0
- sqlframe-3.0.0/tests/unit/redshift/__init__.py +0 -0
- sqlframe-3.0.0/tests/unit/redshift/test_activate.py +41 -0
- sqlframe-3.0.0/tests/unit/snowflake/__init__.py +0 -0
- sqlframe-3.0.0/tests/unit/snowflake/test_activate.py +41 -0
- sqlframe-3.0.0/tests/unit/spark/__init__.py +0 -0
- sqlframe-3.0.0/tests/unit/spark/test_activate.py +41 -0
- sqlframe-3.0.0/tests/unit/standalone/__init__.py +0 -0
- sqlframe-3.0.0/tests/unit/standalone/test_activate.py +41 -0
- sqlframe-3.0.0/tests/unit/test_activate.py +37 -0
- sqlframe-2.4.0/sqlframe/duckdb/column.py +0 -1
- {sqlframe-2.4.0 → sqlframe-3.0.0}/.github/CODEOWNERS +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/.github/workflows/main.workflow.yaml +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/.github/workflows/publish.workflow.yaml +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/.gitignore +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/.readthedocs.yaml +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/LICENSE +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/Makefile +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/add_chatgpt_support.md +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/adding_ai_to_meal.jpeg +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/hype_train.gif +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/marvin_paranoid_robot.gif +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/nonsense_sql.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/openai_full_rewrite.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/openai_replacing_cte_names.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/sqlglot_optimized_code.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/sunny_shake_head_no.gif +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/cake.gif +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/bigquery.md +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/postgres.md +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/SF.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/favicon.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/index.md +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/requirements.txt +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/mkdocs.yml +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/pytest.ini +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/renovate.json +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/setup.cfg +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/LICENSE +0 -0
- {sqlframe-2.4.0/sqlframe → sqlframe-3.0.0/sqlframe/base}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/_typing.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/decorators.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/exceptions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/function_alternatives.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/group.py +0 -0
- {sqlframe-2.4.0/sqlframe/base → sqlframe-3.0.0/sqlframe/base/mixins}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/mixins/dataframe_mixins.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/normalize.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/operations.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/readerwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/transforms.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/util.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/catalog.py +0 -0
- {sqlframe-2.4.0/sqlframe/postgres → sqlframe-3.0.0/sqlframe/duckdb}/column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/readwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/catalog.py +0 -0
- {sqlframe-2.4.0/sqlframe/redshift → sqlframe-3.0.0/sqlframe/postgres}/column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-2.4.0/sqlframe/snowflake → sqlframe-3.0.0/sqlframe/redshift}/column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/catalog.py +0 -0
- {sqlframe-2.4.0/sqlframe/spark → sqlframe-3.0.0/sqlframe/snowflake}/column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/functions.pyi +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/catalog.py +0 -0
- {sqlframe-2.4.0/sqlframe/standalone → sqlframe-3.0.0/sqlframe/spark}/column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/functions.pyi +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/udf.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/testing/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/testing/utils.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-2.4.0/sqlframe/base/mixins → sqlframe-3.0.0/tests}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/common_fixtures.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee_extra_line.csv +0 -0
- {sqlframe-2.4.0/tests → sqlframe-3.0.0/tests/integration}/__init__.py +0 -0
- {sqlframe-2.4.0/tests/integration → sqlframe-3.0.0/tests/integration/engines}/__init__.py +0 -0
- {sqlframe-2.4.0/tests/integration/engines → sqlframe-3.0.0/tests/integration/engines/bigquery}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/bigquery/test_bigquery_dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
- {sqlframe-2.4.0/tests/integration/engines/bigquery → sqlframe-3.0.0/tests/integration/engines/duck}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_udf.py +0 -0
- {sqlframe-2.4.0/tests/integration/engines/duck → sqlframe-3.0.0/tests/integration/engines/postgres}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/postgres/test_postgres_dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
- {sqlframe-2.4.0/tests/integration/engines/postgres → sqlframe-3.0.0/tests/integration/engines/redshift}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
- {sqlframe-2.4.0/tests/integration/engines/redshift → sqlframe-3.0.0/tests/integration/engines/snowflake}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/snowflake/test_snowflake_dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
- {sqlframe-2.4.0/tests/integration/engines/snowflake → sqlframe-3.0.0/tests/integration/engines/spark}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/spark/test_spark_dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_session.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_int_functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_int_testing.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/types.py +0 -0
- {sqlframe-2.4.0/tests/integration/engines/spark → sqlframe-3.0.0/tests/unit}/__init__.py +0 -0
- {sqlframe-2.4.0/tests/unit → sqlframe-3.0.0/tests/unit/bigquery}/__init__.py +0 -0
- {sqlframe-2.4.0/tests/unit/standalone → sqlframe-3.0.0/tests/unit/duck}/__init__.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_column.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_dataframe.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_functions.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_session.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_types.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_window.py +0 -0
- {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/test_util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -48,10 +48,10 @@ SQLFrame also has a "Standalone" session that be used to generate SQL without an
|
|
|
48
48
|
|
|
49
49
|
SQLFrame is great for:
|
|
50
50
|
|
|
51
|
-
* Users who want
|
|
51
|
+
* Users who want a DataFrame API that leverages the full power of their engine to do the processing
|
|
52
|
+
* Users who want to run PySpark code quickly locally without the overhead of starting a Spark session
|
|
52
53
|
* Users who want a SQL representation of their DataFrame code for debugging or sharing with others
|
|
53
|
-
|
|
54
|
-
* Users who want a DataFrame API that leverages the full power of their engine to do the processing
|
|
54
|
+
* Users who want to run PySpark DataFrame code without the complexity of using Spark for processing
|
|
55
55
|
|
|
56
56
|
## Installation
|
|
57
57
|
|
|
@@ -75,44 +75,72 @@ See specific engine documentation for additional setup instructions.
|
|
|
75
75
|
## Configuration
|
|
76
76
|
|
|
77
77
|
SQLFrame generates consistently accurate yet complex SQL for engine execution.
|
|
78
|
-
However, when using df.sql(), it produces more human-readable SQL.
|
|
78
|
+
However, when using df.sql(optimize=True), it produces more human-readable SQL.
|
|
79
79
|
For details on how to configure this output and leverage OpenAI to enhance the SQL, see [Generated SQL Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#generated-sql).
|
|
80
80
|
|
|
81
81
|
SQLFrame by default uses the Spark dialect for input and output.
|
|
82
82
|
This can be changed to make SQLFrame feel more like a native DataFrame API for the engine you are using.
|
|
83
83
|
See [Input and Output Dialect Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#input-and-output-dialect).
|
|
84
84
|
|
|
85
|
+
## Activating SQLFrame
|
|
86
|
+
|
|
87
|
+
SQLFrame can either replace pyspark imports or be used alongside them.
|
|
88
|
+
To replace pyspark imports, use the [activate function](https://sqlframe.readthedocs.io/en/stable/configuration/#activating-sqlframe) to set the engine to use.
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from sqlframe import activate
|
|
92
|
+
|
|
93
|
+
# Activate SQLFrame to run directly on DuckDB
|
|
94
|
+
activate(engine="duckdb")
|
|
95
|
+
|
|
96
|
+
from pyspark.sql import SparkSession
|
|
97
|
+
session = SparkSession.builder.getOrCreate()
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
SQLFrame can also be directly imported which both maintains pyspark imports but also allows for a more engine-native DataFrame API:
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from sqlframe.duckdb import DuckDBSession
|
|
104
|
+
|
|
105
|
+
session = DuckDBSession.builder.getOrCreate()
|
|
106
|
+
```
|
|
107
|
+
|
|
85
108
|
## Example Usage
|
|
86
109
|
|
|
87
110
|
```python
|
|
88
|
-
from sqlframe
|
|
89
|
-
|
|
90
|
-
|
|
111
|
+
from sqlframe import activate
|
|
112
|
+
|
|
113
|
+
# Activate SQLFrame to run directly on BigQuery
|
|
114
|
+
activate(engine="bigquery")
|
|
115
|
+
|
|
116
|
+
from pyspark.sql import SparkSession
|
|
117
|
+
from pyspark.sql import functions as F
|
|
118
|
+
from pyspark.sql import Window
|
|
91
119
|
|
|
92
|
-
session =
|
|
120
|
+
session = SparkSession.builder.getOrCreate()
|
|
93
121
|
table_path = '"bigquery-public-data".samples.natality'
|
|
94
122
|
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
95
123
|
df = (
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
124
|
+
session.table(table_path)
|
|
125
|
+
.where(F.col("ever_born") == 1)
|
|
126
|
+
.groupBy("year")
|
|
127
|
+
.agg(F.count("*").alias("num_single_child_families"))
|
|
128
|
+
.withColumn(
|
|
129
|
+
"last_year_num_single_child_families",
|
|
130
|
+
F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
|
|
131
|
+
)
|
|
132
|
+
.withColumn(
|
|
133
|
+
"percent_change",
|
|
134
|
+
(F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
|
|
135
|
+
/ F.col("last_year_num_single_child_families")
|
|
136
|
+
)
|
|
137
|
+
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
138
|
+
.select(
|
|
139
|
+
F.col("year").alias("year"),
|
|
140
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
141
|
+
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
142
|
+
)
|
|
143
|
+
.limit(5)
|
|
116
144
|
)
|
|
117
145
|
```
|
|
118
146
|
```python
|
|
@@ -18,10 +18,10 @@ SQLFrame also has a "Standalone" session that be used to generate SQL without an
|
|
|
18
18
|
|
|
19
19
|
SQLFrame is great for:
|
|
20
20
|
|
|
21
|
-
* Users who want
|
|
21
|
+
* Users who want a DataFrame API that leverages the full power of their engine to do the processing
|
|
22
|
+
* Users who want to run PySpark code quickly locally without the overhead of starting a Spark session
|
|
22
23
|
* Users who want a SQL representation of their DataFrame code for debugging or sharing with others
|
|
23
|
-
|
|
24
|
-
* Users who want a DataFrame API that leverages the full power of their engine to do the processing
|
|
24
|
+
* Users who want to run PySpark DataFrame code without the complexity of using Spark for processing
|
|
25
25
|
|
|
26
26
|
## Installation
|
|
27
27
|
|
|
@@ -45,44 +45,72 @@ See specific engine documentation for additional setup instructions.
|
|
|
45
45
|
## Configuration
|
|
46
46
|
|
|
47
47
|
SQLFrame generates consistently accurate yet complex SQL for engine execution.
|
|
48
|
-
However, when using df.sql(), it produces more human-readable SQL.
|
|
48
|
+
However, when using df.sql(optimize=True), it produces more human-readable SQL.
|
|
49
49
|
For details on how to configure this output and leverage OpenAI to enhance the SQL, see [Generated SQL Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#generated-sql).
|
|
50
50
|
|
|
51
51
|
SQLFrame by default uses the Spark dialect for input and output.
|
|
52
52
|
This can be changed to make SQLFrame feel more like a native DataFrame API for the engine you are using.
|
|
53
53
|
See [Input and Output Dialect Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#input-and-output-dialect).
|
|
54
54
|
|
|
55
|
+
## Activating SQLFrame
|
|
56
|
+
|
|
57
|
+
SQLFrame can either replace pyspark imports or be used alongside them.
|
|
58
|
+
To replace pyspark imports, use the [activate function](https://sqlframe.readthedocs.io/en/stable/configuration/#activating-sqlframe) to set the engine to use.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from sqlframe import activate
|
|
62
|
+
|
|
63
|
+
# Activate SQLFrame to run directly on DuckDB
|
|
64
|
+
activate(engine="duckdb")
|
|
65
|
+
|
|
66
|
+
from pyspark.sql import SparkSession
|
|
67
|
+
session = SparkSession.builder.getOrCreate()
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
SQLFrame can also be directly imported which both maintains pyspark imports but also allows for a more engine-native DataFrame API:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from sqlframe.duckdb import DuckDBSession
|
|
74
|
+
|
|
75
|
+
session = DuckDBSession.builder.getOrCreate()
|
|
76
|
+
```
|
|
77
|
+
|
|
55
78
|
## Example Usage
|
|
56
79
|
|
|
57
80
|
```python
|
|
58
|
-
from sqlframe
|
|
59
|
-
|
|
60
|
-
|
|
81
|
+
from sqlframe import activate
|
|
82
|
+
|
|
83
|
+
# Activate SQLFrame to run directly on BigQuery
|
|
84
|
+
activate(engine="bigquery")
|
|
85
|
+
|
|
86
|
+
from pyspark.sql import SparkSession
|
|
87
|
+
from pyspark.sql import functions as F
|
|
88
|
+
from pyspark.sql import Window
|
|
61
89
|
|
|
62
|
-
session =
|
|
90
|
+
session = SparkSession.builder.getOrCreate()
|
|
63
91
|
table_path = '"bigquery-public-data".samples.natality'
|
|
64
92
|
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
65
93
|
df = (
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
94
|
+
session.table(table_path)
|
|
95
|
+
.where(F.col("ever_born") == 1)
|
|
96
|
+
.groupBy("year")
|
|
97
|
+
.agg(F.count("*").alias("num_single_child_families"))
|
|
98
|
+
.withColumn(
|
|
99
|
+
"last_year_num_single_child_families",
|
|
100
|
+
F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
|
|
101
|
+
)
|
|
102
|
+
.withColumn(
|
|
103
|
+
"percent_change",
|
|
104
|
+
(F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
|
|
105
|
+
/ F.col("last_year_num_single_child_families")
|
|
106
|
+
)
|
|
107
|
+
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
108
|
+
.select(
|
|
109
|
+
F.col("year").alias("year"),
|
|
110
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
111
|
+
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
112
|
+
)
|
|
113
|
+
.limit(5)
|
|
86
114
|
)
|
|
87
115
|
```
|
|
88
116
|
```python
|
|
@@ -6,6 +6,46 @@
|
|
|
6
6
|
pip install "sqlframe[bigquery]"
|
|
7
7
|
```
|
|
8
8
|
|
|
9
|
+
## Enabling SQLFrame
|
|
10
|
+
|
|
11
|
+
SQLFrame can be used in two ways:
|
|
12
|
+
|
|
13
|
+
* Directly importing the `sqlframe.bigquery` package
|
|
14
|
+
* Using the [activate](./configuration.md#activating-sqlframe) function to allow for continuing to use `pyspark.sql` but have it use SQLFrame behind the scenes.
|
|
15
|
+
|
|
16
|
+
### Import
|
|
17
|
+
|
|
18
|
+
If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.bigquery`.
|
|
19
|
+
In addition, many classes will have a `BigQuery` prefix.
|
|
20
|
+
For example, `BigQueryDataFrame` instead of `DataFrame`.
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# PySpark import
|
|
25
|
+
# from pyspark.sql import SparkSession
|
|
26
|
+
# from pyspark.sql import functions as F
|
|
27
|
+
# from pyspark.sql.dataframe import DataFrame
|
|
28
|
+
# SQLFrame import
|
|
29
|
+
from sqlframe.bigquery import BigQuerySession
|
|
30
|
+
from sqlframe.bigquery import functions as F
|
|
31
|
+
from sqlframe.bigquery import BigQueryDataFrame
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Activate
|
|
35
|
+
|
|
36
|
+
If you would like to continue using `pyspark.sql` but have it use SQLFrame behind the scenes, you can use the [activate](./configuration.md#activating-sqlframe) function.
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from sqlframe import activate
|
|
40
|
+
activate("bigquery", config={"default_dataset": "sqlframe.db1"})
|
|
41
|
+
|
|
42
|
+
from pyspark.sql import SparkSession
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
`SparkSession` will now be a SQLFrame `BigQuerySession` object and everything will be run on BigQuery directly.
|
|
46
|
+
|
|
47
|
+
See [activate configuration](./configuration.md#activating-sqlframe) for information on how to pass in a connection and config options.
|
|
48
|
+
|
|
9
49
|
## Creating a Session
|
|
10
50
|
|
|
11
51
|
SQLFrame uses the [BigQuery DBAPI Connection](https://cloud.google.com/python/docs/reference/bigquery/latest/dbapi#class-googlecloudbigquerydbapiconnectionclientnone-bqstorageclientnone) to connect to BigQuery.
|
|
@@ -13,7 +53,7 @@ A BigQuerySession, which implements the PySpark Session API, can be created by p
|
|
|
13
53
|
By default, SQLFrame will create a connection by inferring it from the environment (for example using gcloud auth).
|
|
14
54
|
Regardless of approach, it is recommended to configure `default_dataset` in the `BigQuerySession` constructor in order to make it easier to use the catalog methods (see example below).
|
|
15
55
|
|
|
16
|
-
=== "Without Providing Connection"
|
|
56
|
+
=== "Import + Without Providing Connection"
|
|
17
57
|
|
|
18
58
|
```python
|
|
19
59
|
from sqlframe.bigquery import BigQuerySession
|
|
@@ -21,7 +61,7 @@ Regardless of approach, it is recommended to configure `default_dataset` in the
|
|
|
21
61
|
session = BigQuerySession(default_dataset="sqlframe.db1")
|
|
22
62
|
```
|
|
23
63
|
|
|
24
|
-
=== "With Providing Connection"
|
|
64
|
+
=== "Import + With Providing Connection"
|
|
25
65
|
|
|
26
66
|
```python
|
|
27
67
|
import google.auth
|
|
@@ -43,23 +83,39 @@ Regardless of approach, it is recommended to configure `default_dataset` in the
|
|
|
43
83
|
session = BigQuerySession(conn=conn, default_dataset="sqlframe.db1")
|
|
44
84
|
```
|
|
45
85
|
|
|
46
|
-
|
|
86
|
+
=== "Activate + Without Providing Connection"
|
|
47
87
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
88
|
+
```python
|
|
89
|
+
from sqlframe import activate
|
|
90
|
+
activate("bigquery", config={"default_dataset": "sqlframe.db1"})
|
|
91
|
+
|
|
92
|
+
from pyspark.sql import SparkSession
|
|
93
|
+
session = SparkSession.builder.getOrCreate()
|
|
94
|
+
```
|
|
51
95
|
|
|
96
|
+
=== "Activate + With Providing Connection"
|
|
52
97
|
|
|
53
|
-
```python
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
98
|
+
```python
|
|
99
|
+
import google.auth
|
|
100
|
+
from google.api_core import client_info
|
|
101
|
+
from google.oauth2 import service_account
|
|
102
|
+
from google.cloud.bigquery.dbapi import connect
|
|
103
|
+
from sqlframe import activate
|
|
104
|
+
creds = service_account.Credentials.from_service_account_file("path/to/credentials.json")
|
|
105
|
+
|
|
106
|
+
client = google.cloud.bigquery.Client(
|
|
107
|
+
project="my-project",
|
|
108
|
+
credentials=creds,
|
|
109
|
+
location="us-central1",
|
|
110
|
+
client_info=client_info.ClientInfo(user_agent="sqlframe"),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
conn = connect(client=client)
|
|
114
|
+
activate("bigquery", conn=conn, config={"default_dataset": "sqlframe.db1"})
|
|
115
|
+
|
|
116
|
+
from pyspark.sql import SparkSession
|
|
117
|
+
session = SparkSession.builder.getOrCreate()
|
|
118
|
+
```
|
|
63
119
|
|
|
64
120
|
## Using BigQuery Unique Functions
|
|
65
121
|
|
|
@@ -24,6 +24,53 @@ In this configuration, you can use BigQuery syntax for elements such as date for
|
|
|
24
24
|
|
|
25
25
|
SQLFrame supports multiple dialects, all of which can be specific as the `input_dialect` and `output_dialect`.
|
|
26
26
|
|
|
27
|
+
## Activating SQLFrame
|
|
28
|
+
|
|
29
|
+
SQLFrame can be activated in order to replace `pyspark` imports with `sqlframe` imports for the given engine.
|
|
30
|
+
This allows you to use SQLFrame as a drop-in replacement for PySpark by just adding two lines of code.
|
|
31
|
+
|
|
32
|
+
### Activate with Engine
|
|
33
|
+
|
|
34
|
+
If you just provide an engine to `activate` then it will create a connection for that engine with default settings (if the engine supports it).
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
|
|
38
|
+
from sqlframe import activate
|
|
39
|
+
activate("duckdb")
|
|
40
|
+
|
|
41
|
+
from pyspark.sql import SparkSession
|
|
42
|
+
spark = SparkSession.builder.getOrCreate()
|
|
43
|
+
# "spark" is not a SQLFrame DuckDBSession and will run directly on DuckDB
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Activate with Connection
|
|
47
|
+
|
|
48
|
+
If you provide a connection to `activate` then it will use that connection for the engine.
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import duckdb
|
|
52
|
+
from sqlframe import activate
|
|
53
|
+
connection = duckdb.connect("file.duckdb")
|
|
54
|
+
activate("duckdb", conn=connection)
|
|
55
|
+
|
|
56
|
+
from pyspark.sql import SparkSession
|
|
57
|
+
spark = SparkSession.builder.getOrCreate()
|
|
58
|
+
# "spark" is a SQLFrame DuckDBSession and will run directly on DuckDB using `file.duckdb` for persistence
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Activate with Configuration
|
|
62
|
+
|
|
63
|
+
If you provide a configuration to `activate` then it will use that configuration to create a connection for the engine.
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from sqlframe import activate
|
|
67
|
+
activate("duckdb", config={"sqlframe.input.dialect": "duckdb"})
|
|
68
|
+
|
|
69
|
+
from pyspark.sql import SparkSession
|
|
70
|
+
spark = SparkSession.builder.getOrCreate()
|
|
71
|
+
# "spark" is a SQLFrame DuckDBSession and will run directly on DuckDB with input dialect set to DuckDB
|
|
72
|
+
```
|
|
73
|
+
|
|
27
74
|
## Generated SQL
|
|
28
75
|
|
|
29
76
|
### Pretty
|
|
@@ -6,6 +6,46 @@
|
|
|
6
6
|
pip install "sqlframe[duckdb]"
|
|
7
7
|
```
|
|
8
8
|
|
|
9
|
+
## Enabling SQLFrame
|
|
10
|
+
|
|
11
|
+
SQLFrame can be used in two ways:
|
|
12
|
+
|
|
13
|
+
* Directly importing the `sqlframe.duckdb` package
|
|
14
|
+
* Using the [activate](./configuration.md#activating-sqlframe) function to allow for continuing to use `pyspark.sql` but have it use SQLFrame behind the scenes.
|
|
15
|
+
|
|
16
|
+
### Import
|
|
17
|
+
|
|
18
|
+
If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.duckdb`.
|
|
19
|
+
In addition, many classes will have a `DuckDB` prefix.
|
|
20
|
+
For example, `DuckDBDataFrame` instead of `DataFrame`.
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# PySpark import
|
|
25
|
+
# from pyspark.sql import SparkSession
|
|
26
|
+
# from pyspark.sql import functions as F
|
|
27
|
+
# from pyspark.sql.dataframe import DataFrame
|
|
28
|
+
# SQLFrame import
|
|
29
|
+
from sqlframe.duckdb import DuckDBSession
|
|
30
|
+
from sqlframe.duckdb import functions as F
|
|
31
|
+
from sqlframe.duckdb import DuckDBDataFrame
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Activate
|
|
35
|
+
|
|
36
|
+
If you would like to continue using `pyspark.sql` but have it use SQLFrame behind the scenes, you can use the [activate](./configuration.md#activating-sqlframe) function.
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from sqlframe import activate
|
|
40
|
+
activate("duckdb")
|
|
41
|
+
|
|
42
|
+
from pyspark.sql import SparkSession
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
`SparkSession` will now be a SQLFrame `DuckDBSession` object and everything will be run on DuckDB directly.
|
|
46
|
+
|
|
47
|
+
See [activate configuration](./configuration.md#activating-sqlframe) for information on how to pass in a connection and config options.
|
|
48
|
+
|
|
9
49
|
## Creating a Session
|
|
10
50
|
|
|
11
51
|
SQLFrame uses the `duckdb` package to connect to DuckDB.
|
|
@@ -13,7 +53,7 @@ A DuckDBSession, which implements the PySpark Session API, can be created by pas
|
|
|
13
53
|
By default, SQLFrame will create a connection to an in-memory database.
|
|
14
54
|
|
|
15
55
|
|
|
16
|
-
=== "Without Providing Connection"
|
|
56
|
+
=== "Import + Without Providing Connection"
|
|
17
57
|
|
|
18
58
|
```python
|
|
19
59
|
from sqlframe.duckdb import DuckDBSession
|
|
@@ -21,7 +61,7 @@ By default, SQLFrame will create a connection to an in-memory database.
|
|
|
21
61
|
session = DuckDBSession()
|
|
22
62
|
```
|
|
23
63
|
|
|
24
|
-
=== "With Providing Connection"
|
|
64
|
+
=== "Import + With Providing Connection"
|
|
25
65
|
|
|
26
66
|
```python
|
|
27
67
|
import duckdb
|
|
@@ -30,23 +70,30 @@ By default, SQLFrame will create a connection to an in-memory database.
|
|
|
30
70
|
conn = duckdb.connect(database=":memory:")
|
|
31
71
|
session = DuckDBSession(conn=conn)
|
|
32
72
|
```
|
|
33
|
-
## Imports
|
|
34
73
|
|
|
35
|
-
|
|
36
|
-
In addition, many classes will have a `DuckDB` prefix.
|
|
37
|
-
For example, `DuckDBDataFrame` instead of `DataFrame`.
|
|
74
|
+
=== "Activate + Without Providing Connection"
|
|
38
75
|
|
|
76
|
+
```python
|
|
77
|
+
from sqlframe import activate
|
|
78
|
+
activate("duckdb")
|
|
39
79
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
80
|
+
from pyspark.sql import SparkSession
|
|
81
|
+
|
|
82
|
+
session = SparkSession.builder.getOrCreate()
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
=== "Activate + With Providing Connection"
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
import duckdb
|
|
89
|
+
from sqlframe import activate
|
|
90
|
+
conn = duckdb.connect(database=":memory:")
|
|
91
|
+
activate("duckdb", conn=conn)
|
|
92
|
+
|
|
93
|
+
from pyspark.sql import SparkSession
|
|
94
|
+
|
|
95
|
+
session = SparkSession.builder.getOrCreate()
|
|
96
|
+
```
|
|
50
97
|
|
|
51
98
|
## Using DuckDB Unique Functions
|
|
52
99
|
|
|
@@ -202,6 +249,8 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
202
249
|
* sql
|
|
203
250
|
* SQLFrame Specific: Get the SQL representation of the WindowSpec
|
|
204
251
|
* [stat](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.stat.html)
|
|
252
|
+
* [toArrow](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.toArrow.html)
|
|
253
|
+
* SQLFrame Specific Argument: `batch_size` sets the number of rows to read per-batch and returns a `RecrodBatchReader`
|
|
205
254
|
* [toDF](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.toDF.html)
|
|
206
255
|
* [toPandas](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.toPandas.html)
|
|
207
256
|
* [union](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.union.html)
|
|
@@ -6,26 +6,14 @@
|
|
|
6
6
|
pip install "sqlframe[postgres]"
|
|
7
7
|
```
|
|
8
8
|
|
|
9
|
-
##
|
|
9
|
+
## Enabling SQLFrame
|
|
10
10
|
|
|
11
|
-
SQLFrame
|
|
12
|
-
A PostgresSession, which implements the PySpark Session API, is created by passing in a `psycopg2.Connection` object.
|
|
13
|
-
|
|
14
|
-
```python
|
|
15
|
-
from psycopg2 import connect
|
|
16
|
-
from sqlframe.postgres import PostgresSession
|
|
11
|
+
SQLFrame can be used in two ways:
|
|
17
12
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
user="postgres",
|
|
21
|
-
password="password",
|
|
22
|
-
host="localhost",
|
|
23
|
-
port="5432",
|
|
24
|
-
)
|
|
25
|
-
session = PostgresSession(conn=conn)
|
|
26
|
-
```
|
|
13
|
+
* Directly importing the `sqlframe.postgres` package
|
|
14
|
+
* Using the [activate](./configuration.md#activating-sqlframe) function to allow for continuing to use `pyspark.sql` but have it use SQLFrame behind the scenes.
|
|
27
15
|
|
|
28
|
-
|
|
16
|
+
### Import
|
|
29
17
|
|
|
30
18
|
If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.postgres`.
|
|
31
19
|
In addition, many classes will have a `Postgres` prefix.
|
|
@@ -43,6 +31,69 @@ from sqlframe.postgres import functions as F
|
|
|
43
31
|
from sqlframe.postgres import PostgresDataFrame
|
|
44
32
|
```
|
|
45
33
|
|
|
34
|
+
### Activate
|
|
35
|
+
|
|
36
|
+
If you would like to continue using `pyspark.sql` but have it use SQLFrame behind the scenes, you can use the [activate](./configuration.md#activating-sqlframe) function.
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from psycopg2 import connect
|
|
40
|
+
from sqlframe import activate
|
|
41
|
+
conn = connect(
|
|
42
|
+
dbname="postgres",
|
|
43
|
+
user="postgres",
|
|
44
|
+
password="password",
|
|
45
|
+
host="localhost",
|
|
46
|
+
port="5432",
|
|
47
|
+
)
|
|
48
|
+
activate("postgres", conn=conn)
|
|
49
|
+
|
|
50
|
+
from pyspark.sql import SparkSession
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
`SparkSession` will now be a SQLFrame `PostgresSession` object and everything will be run on Postgres directly.
|
|
54
|
+
|
|
55
|
+
See [activate configuration](./configuration.md#activating-sqlframe) for information on how to pass in a connection and config options.
|
|
56
|
+
|
|
57
|
+
## Creating a Session
|
|
58
|
+
|
|
59
|
+
SQLFrame uses the `psycopg2` package to connect to Postgres.
|
|
60
|
+
A PostgresSession, which implements the PySpark Session API, is created by passing in a `psycopg2.Connection` object.
|
|
61
|
+
|
|
62
|
+
=== "Import"
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from psycopg2 import connect
|
|
66
|
+
from sqlframe.postgres import PostgresSession
|
|
67
|
+
|
|
68
|
+
conn = connect(
|
|
69
|
+
dbname="postgres",
|
|
70
|
+
user="postgres",
|
|
71
|
+
password="password",
|
|
72
|
+
host="localhost",
|
|
73
|
+
port="5432",
|
|
74
|
+
)
|
|
75
|
+
session = PostgresSession(conn=conn)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
=== "Activate"
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from sqlframe import activate
|
|
82
|
+
|
|
83
|
+
conn = connect(
|
|
84
|
+
dbname="postgres",
|
|
85
|
+
user="postgres",
|
|
86
|
+
password="password",
|
|
87
|
+
host="localhost",
|
|
88
|
+
port="5432",
|
|
89
|
+
)
|
|
90
|
+
activate("postgres", conn=conn)
|
|
91
|
+
|
|
92
|
+
from pyspark.sql import SparkSession
|
|
93
|
+
session = SparkSession.builder.getOrCreate()
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
|
|
46
97
|
## Using Postgres Unique Functions
|
|
47
98
|
|
|
48
99
|
Postgres may have a function that isn't represented within the PySpark API.
|