sqlframe 2.0.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlframe-2.0.0 → sqlframe-2.2.0}/PKG-INFO +2 -2
- {sqlframe-2.0.0 → sqlframe-2.2.0}/README.md +1 -1
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/add_chatgpt_support.md +1 -1
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/sqlframe_universal_dataframe_api.md +1 -1
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/bigquery.md +4 -2
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/configuration.md +1 -1
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/duckdb.md +3 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/postgres.md +2 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/snowflake.md +3 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/spark.md +2 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/standalone.md +2 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/setup.py +3 -3
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/_version.py +2 -2
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/_typing.py +1 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/dataframe.py +8 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/functions.py +10 -5
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/session.py +11 -64
- sqlframe-2.2.0/sqlframe/base/udf.py +36 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/session.py +3 -0
- sqlframe-2.2.0/sqlframe/bigquery/udf.py +11 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/session.py +3 -0
- sqlframe-2.2.0/sqlframe/duckdb/udf.py +19 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/session.py +3 -0
- sqlframe-2.2.0/sqlframe/postgres/udf.py +11 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/session.py +3 -0
- sqlframe-2.2.0/sqlframe/redshift/udf.py +11 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/session.py +4 -0
- sqlframe-2.2.0/sqlframe/snowflake/udf.py +11 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/session.py +3 -0
- sqlframe-2.2.0/sqlframe/spark/udf.py +34 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/session.py +3 -0
- sqlframe-2.2.0/sqlframe/standalone/udf.py +11 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe.egg-info/PKG-INFO +2 -2
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe.egg-info/SOURCES.txt +9 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe.egg-info/requires.txt +3 -3
- sqlframe-2.2.0/tests/integration/engines/duck/test_duckdb_udf.py +12 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/test_int_functions.py +10 -1
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_dataframe.py +11 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_functions.py +12 -3
- {sqlframe-2.0.0 → sqlframe-2.2.0}/.github/CODEOWNERS +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/.github/workflows/main.workflow.yaml +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/.github/workflows/publish.workflow.yaml +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/.gitignore +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/.readthedocs.yaml +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/LICENSE +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/Makefile +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/adding_ai_to_meal.jpeg +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/hype_train.gif +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/marvin_paranoid_robot.gif +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/nonsense_sql.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/openai_full_rewrite.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/openai_replacing_cte_names.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/sqlglot_optimized_code.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/add_chatgpt_support/sunny_shake_head_no.gif +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/cake.gif +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/bigquery.md +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/docs/postgres.md +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/images/SF.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/images/favicon.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/index.md +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/requirements.txt +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/mkdocs.yml +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/pytest.ini +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/renovate.json +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/setup.cfg +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/LICENSE +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/decorators.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/exceptions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/function_alternatives.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/mixins/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/mixins/dataframe_mixins.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/normalize.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/operations.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/readerwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/transforms.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/util.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/base/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/functions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/readwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/functions.pyi +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/functions.pyi +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/testing/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe/testing/utils.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/common_fixtures.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/conftest.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/fixtures/employee_extra_line.csv +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/bigquery/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/bigquery/test_bigquery_dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/duck/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/duck/test_duckdb_dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/postgres/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/postgres/test_postgres_dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/redshift/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/snowflake/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/snowflake/test_snowflake_dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/spark/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/spark/test_spark_dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/test_engine_column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/test_engine_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/engines/test_int_testing.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/test_int_dataframe.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/__init__.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_column.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_session.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_types.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/standalone/test_window.py +0 -0
- {sqlframe-2.0.0 → sqlframe-2.2.0}/tests/unit/test_util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -90,7 +90,7 @@ from sqlframe.bigquery import functions as F
|
|
|
90
90
|
from sqlframe.bigquery import Window
|
|
91
91
|
|
|
92
92
|
session = BigQuerySession()
|
|
93
|
-
table_path = "bigquery-public-data.samples.natality
|
|
93
|
+
table_path = '"bigquery-public-data".samples.natality'
|
|
94
94
|
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
95
95
|
df = (
|
|
96
96
|
session.table(table_path)
|
|
@@ -60,7 +60,7 @@ from sqlframe.bigquery import functions as F
|
|
|
60
60
|
from sqlframe.bigquery import Window
|
|
61
61
|
|
|
62
62
|
session = BigQuerySession()
|
|
63
|
-
table_path = "bigquery-public-data.samples.natality
|
|
63
|
+
table_path = '"bigquery-public-data".samples.natality'
|
|
64
64
|
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
65
65
|
df = (
|
|
66
66
|
session.table(table_path)
|
|
@@ -47,7 +47,7 @@ from sqlframe.bigquery import functions as F
|
|
|
47
47
|
from sqlframe.bigquery import Window
|
|
48
48
|
|
|
49
49
|
session = BigQuerySession()
|
|
50
|
-
table_path = "bigquery-public-data.samples.natality
|
|
50
|
+
table_path = '"bigquery-public-data".samples.natality'
|
|
51
51
|
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
52
52
|
df = (
|
|
53
53
|
session.table(table_path)
|
|
@@ -39,7 +39,7 @@ from sqlframe.bigquery import Window
|
|
|
39
39
|
|
|
40
40
|
# Unique to SQLFrame: Ability to connect directly to BigQuery
|
|
41
41
|
session = BigQuerySession()
|
|
42
|
-
table_path = "bigquery-public-data.samples.natality
|
|
42
|
+
table_path = '"bigquery-public-data".samples.natality'
|
|
43
43
|
# Get the top 5 years with the greatest year-over-year % change in new families with a single child
|
|
44
44
|
df = (
|
|
45
45
|
session.table(table_path)
|
|
@@ -72,7 +72,7 @@ from sqlframe.bigquery import functions as F
|
|
|
72
72
|
|
|
73
73
|
session = BigQuerySession(default_dataset="sqlframe.db1")
|
|
74
74
|
(
|
|
75
|
-
session.table("bigquery-public-data.samples.natality
|
|
75
|
+
session.table('"bigquery-public-data".samples.natality')
|
|
76
76
|
.select(F.call_function("FARM_FINGERPRINT", F.col("source")).alias("source_hash"))
|
|
77
77
|
.show()
|
|
78
78
|
)
|
|
@@ -86,7 +86,7 @@ from sqlframe.bigquery import functions as F
|
|
|
86
86
|
from sqlframe.bigquery import Window
|
|
87
87
|
|
|
88
88
|
session = BigQuerySession(default_dataset="sqlframe.db1")
|
|
89
|
-
table_path = "bigquery-public-data.samples.natality
|
|
89
|
+
table_path = '"bigquery-public-data".samples.natality'
|
|
90
90
|
# Get columns in the table
|
|
91
91
|
print(session.catalog.listColumns(table_path))
|
|
92
92
|
# Get the top 5 years with the greatest year-over-year % change in new families with a single child
|
|
@@ -214,6 +214,8 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
214
214
|
* [intersectAll](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.intersectAll.html)
|
|
215
215
|
* [join](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
|
|
216
216
|
* [limit](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.limit.html)
|
|
217
|
+
* lineage
|
|
218
|
+
* Get lineage for a specific column. [Returns a SQLGlot Node](https://sqlglot.com/sqlglot/lineage.html#Node). Can be used to get lineage SQL or HTML representation.
|
|
217
219
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
218
220
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
219
221
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
@@ -62,7 +62,7 @@ from sqlframe.bigquery import functions as F
|
|
|
62
62
|
from sqlframe.bigquery import Window
|
|
63
63
|
|
|
64
64
|
session = BigQuerySession()
|
|
65
|
-
table_path = "bigquery-public-data.samples.natality
|
|
65
|
+
table_path = '"bigquery-public-data".samples.natality'
|
|
66
66
|
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
67
67
|
df = (
|
|
68
68
|
session.table(table_path)
|
|
@@ -187,6 +187,8 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
187
187
|
* [intersectAll](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.intersectAll.html)
|
|
188
188
|
* [join](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
|
|
189
189
|
* [limit](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.limit.html)
|
|
190
|
+
* lineage
|
|
191
|
+
* Get lineage for a specific column. [Returns a SQLGlot Node](https://sqlglot.com/sqlglot/lineage.html#Node). Can be used to get lineage SQL or HTML representation.
|
|
190
192
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
191
193
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
192
194
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
@@ -258,6 +260,7 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
258
260
|
* [concat](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.concat.html)
|
|
259
261
|
* Only works on strings (does not work on arrays)
|
|
260
262
|
* [concat_ws](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.concat_ws.html)
|
|
263
|
+
* [convert_timezone](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.convert_timezone.html)
|
|
261
264
|
* [corr](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.corr.html)
|
|
262
265
|
* [cos](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.cos.html)
|
|
263
266
|
* [cot](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.cot.html)
|
|
@@ -198,6 +198,8 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
198
198
|
* [intersectAll](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.intersectAll.html)
|
|
199
199
|
* [join](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
|
|
200
200
|
* [limit](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.limit.html)
|
|
201
|
+
* lineage
|
|
202
|
+
* Get lineage for a specific column. [Returns a SQLGlot Node](https://sqlglot.com/sqlglot/lineage.html#Node). Can be used to get lineage SQL or HTML representation.
|
|
201
203
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
202
204
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
203
205
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
@@ -209,6 +209,8 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
209
209
|
* [intersectAll](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.intersectAll.html)
|
|
210
210
|
* [join](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
|
|
211
211
|
* [limit](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.limit.html)
|
|
212
|
+
* lineage
|
|
213
|
+
* Get lineage for a specific column. [Returns a SQLGlot Node](https://sqlglot.com/sqlglot/lineage.html#Node). Can be used to get lineage SQL or HTML representation.
|
|
212
214
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
213
215
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
214
216
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
@@ -286,6 +288,7 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
286
288
|
* [concat](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.concat.html)
|
|
287
289
|
* Can only concat strings not arrays
|
|
288
290
|
* [concat_ws](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.concat_ws.html)
|
|
291
|
+
* [convert_timezone](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.convert_timezone.html)
|
|
289
292
|
* [corr](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.corr.html)
|
|
290
293
|
* [cos](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.cos.html)
|
|
291
294
|
* [cosh](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.cosh.html)
|
|
@@ -156,6 +156,8 @@ df.show(5)
|
|
|
156
156
|
* [intersectAll](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.intersectAll.html)
|
|
157
157
|
* [join](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
|
|
158
158
|
* [limit](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.limit.html)
|
|
159
|
+
* lineage
|
|
160
|
+
* Get lineage for a specific column. [Returns a SQLGlot Node](https://sqlglot.com/sqlglot/lineage.html#Node). Can be used to get lineage SQL or HTML representation.
|
|
159
161
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
160
162
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
161
163
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
@@ -133,6 +133,8 @@ See something that you would like to see supported? [Open an issue](https://gith
|
|
|
133
133
|
* [intersectAll](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.intersectAll.html)
|
|
134
134
|
* [join](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
|
|
135
135
|
* [limit](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.limit.html)
|
|
136
|
+
* lineage
|
|
137
|
+
* Get lineage for a specific column. [Returns a SQLGlot Node](https://sqlglot.com/sqlglot/lineage.html#Node). Can be used to get lineage SQL or HTML representation.
|
|
136
138
|
* [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
|
|
137
139
|
* [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
|
|
138
140
|
* [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
|
|
@@ -20,7 +20,7 @@ setup(
|
|
|
20
20
|
python_requires=">=3.8",
|
|
21
21
|
install_requires=[
|
|
22
22
|
"prettytable<3.11.0",
|
|
23
|
-
"sqlglot>=24.0.0,<25.
|
|
23
|
+
"sqlglot>=24.0.0,<25.11",
|
|
24
24
|
"typing_extensions>=4.8,<5",
|
|
25
25
|
],
|
|
26
26
|
extras_require={
|
|
@@ -31,7 +31,7 @@ setup(
|
|
|
31
31
|
"dev": [
|
|
32
32
|
"duckdb>=0.9,<1.1",
|
|
33
33
|
"mypy>=1.10.0,<1.12",
|
|
34
|
-
"openai>=1.30,<1.
|
|
34
|
+
"openai>=1.30,<1.41",
|
|
35
35
|
"pandas>=2,<3",
|
|
36
36
|
"pandas-stubs>=2,<3",
|
|
37
37
|
"psycopg>=3.1,<4",
|
|
@@ -57,7 +57,7 @@ setup(
|
|
|
57
57
|
"pandas>=2,<3",
|
|
58
58
|
],
|
|
59
59
|
"openai": [
|
|
60
|
-
"openai>=1.30,<1.
|
|
60
|
+
"openai>=1.30,<1.41",
|
|
61
61
|
],
|
|
62
62
|
"pandas": [
|
|
63
63
|
"pandas>=2,<3",
|
|
@@ -24,6 +24,7 @@ OutputExpressionContainer = t.Union[exp.Select, exp.Create, exp.Insert]
|
|
|
24
24
|
StorageLevel = str
|
|
25
25
|
PathOrPaths = t.Union[str, t.List[str]]
|
|
26
26
|
OptionalPrimitiveType = t.Optional[PrimitiveType]
|
|
27
|
+
DataTypeOrString = t.Union[DataType, str]
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class UserDefinedFunctionLike(t.Protocol):
|
|
@@ -17,6 +17,7 @@ import sqlglot
|
|
|
17
17
|
from prettytable import PrettyTable
|
|
18
18
|
from sqlglot import Dialect
|
|
19
19
|
from sqlglot import expressions as exp
|
|
20
|
+
from sqlglot import lineage as sqlglot_lineage
|
|
20
21
|
from sqlglot.helper import ensure_list, flatten, object_to_dict, seq_get
|
|
21
22
|
from sqlglot.optimizer.pushdown_projections import pushdown_projections
|
|
22
23
|
from sqlglot.optimizer.qualify import qualify
|
|
@@ -1613,6 +1614,13 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1613
1614
|
0,
|
|
1614
1615
|
)
|
|
1615
1616
|
|
|
1617
|
+
def lineage(self, col: ColumnOrName, optimize: bool = True) -> sqlglot_lineage.Node:
|
|
1618
|
+
return sqlglot_lineage.lineage(
|
|
1619
|
+
column=self._ensure_and_normalize_col(col).alias_or_name,
|
|
1620
|
+
sql=self._get_expressions(optimize=optimize)[0],
|
|
1621
|
+
schema=self.session.catalog._schema,
|
|
1622
|
+
)
|
|
1623
|
+
|
|
1616
1624
|
def toPandas(self) -> pd.DataFrame:
|
|
1617
1625
|
return self.session._fetchdf(self._get_expressions(optimize=False))
|
|
1618
1626
|
|
|
@@ -2070,14 +2070,19 @@ def contains(left: ColumnOrName, right: ColumnOrName) -> Column:
|
|
|
2070
2070
|
return Column.invoke_anonymous_function(left, "contains", right)
|
|
2071
2071
|
|
|
2072
2072
|
|
|
2073
|
-
@meta(unsupported_engines="
|
|
2073
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
2074
2074
|
def convert_timezone(
|
|
2075
2075
|
sourceTz: t.Optional[Column], targetTz: Column, sourceTs: ColumnOrName
|
|
2076
2076
|
) -> Column:
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2077
|
+
to_timestamp = get_func_from_session("to_timestamp")
|
|
2078
|
+
|
|
2079
|
+
return Column(
|
|
2080
|
+
expression.ConvertTimezone(
|
|
2081
|
+
timestamp=to_timestamp(Column.ensure_col(sourceTs)).expression,
|
|
2082
|
+
source_tz=sourceTz.expression if sourceTz else None,
|
|
2083
|
+
target_tz=Column.ensure_col(targetTz).expression,
|
|
2084
|
+
)
|
|
2085
|
+
)
|
|
2081
2086
|
|
|
2082
2087
|
|
|
2083
2088
|
@meta(unsupported_engines="postgres")
|
|
@@ -27,6 +27,7 @@ from sqlframe.base.catalog import _BaseCatalog
|
|
|
27
27
|
from sqlframe.base.dataframe import _BaseDataFrame
|
|
28
28
|
from sqlframe.base.normalize import normalize_dict
|
|
29
29
|
from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
|
|
30
|
+
from sqlframe.base.udf import _BaseUDFRegistration
|
|
30
31
|
from sqlframe.base.util import (
|
|
31
32
|
get_column_mapping_from_schema_input,
|
|
32
33
|
normalize_string,
|
|
@@ -64,16 +65,18 @@ CATALOG = t.TypeVar("CATALOG", bound=_BaseCatalog)
|
|
|
64
65
|
READER = t.TypeVar("READER", bound=_BaseDataFrameReader)
|
|
65
66
|
WRITER = t.TypeVar("WRITER", bound=_BaseDataFrameWriter)
|
|
66
67
|
DF = t.TypeVar("DF", bound=_BaseDataFrame)
|
|
68
|
+
UDF_REGISTRATION = t.TypeVar("UDF_REGISTRATION", bound=_BaseUDFRegistration)
|
|
67
69
|
|
|
68
70
|
_MISSING = "MISSING"
|
|
69
71
|
|
|
70
72
|
|
|
71
|
-
class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
73
|
+
class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN, UDF_REGISTRATION]):
|
|
72
74
|
_instance = None
|
|
73
75
|
_reader: t.Type[READER]
|
|
74
76
|
_writer: t.Type[WRITER]
|
|
75
77
|
_catalog: t.Type[CATALOG]
|
|
76
78
|
_df: t.Type[DF]
|
|
79
|
+
_udf_registration: t.Type[UDF_REGISTRATION]
|
|
77
80
|
|
|
78
81
|
SANITIZE_COLUMN_NAMES = False
|
|
79
82
|
|
|
@@ -81,7 +84,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
81
84
|
self,
|
|
82
85
|
conn: t.Optional[CONN] = None,
|
|
83
86
|
schema: t.Optional[MappingSchema] = None,
|
|
84
|
-
case_sensitive: bool = False,
|
|
85
87
|
*args,
|
|
86
88
|
**kwargs,
|
|
87
89
|
):
|
|
@@ -91,11 +93,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
91
93
|
self.execution_dialect: Dialect = Dialect.get_or_raise(
|
|
92
94
|
self.builder.DEFAULT_EXECUTION_DIALECT
|
|
93
95
|
)
|
|
94
|
-
self.case_sensitive: bool = case_sensitive
|
|
95
|
-
if self.case_sensitive:
|
|
96
|
-
self.input_dialect.NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
|
|
97
|
-
self.output_dialect.NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
|
|
98
|
-
self.execution_dialect.NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
|
|
99
96
|
self.known_ids: t.Set[str] = set()
|
|
100
97
|
self.known_branch_ids: t.Set[str] = set()
|
|
101
98
|
self.known_sequence_ids: t.Set[str] = set()
|
|
@@ -176,6 +173,13 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
176
173
|
def _has_connection(self) -> bool:
|
|
177
174
|
return hasattr(self, "_connection") and bool(self._connection)
|
|
178
175
|
|
|
176
|
+
@property
|
|
177
|
+
def udf(self) -> UDF_REGISTRATION:
|
|
178
|
+
return self._udf_registration(self)
|
|
179
|
+
|
|
180
|
+
def getActiveSession(self) -> Self:
|
|
181
|
+
return self
|
|
182
|
+
|
|
179
183
|
def range(self, *args):
|
|
180
184
|
start = 0
|
|
181
185
|
step = 1
|
|
@@ -573,53 +577,10 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
573
577
|
converted_values.append(cls._to_value(value))
|
|
574
578
|
return _create_row(columns, converted_values)
|
|
575
579
|
|
|
576
|
-
@property
|
|
577
|
-
def _is_standalone(self) -> bool:
|
|
578
|
-
from sqlframe.standalone.session import StandaloneSession
|
|
579
|
-
|
|
580
|
-
return isinstance(self, StandaloneSession)
|
|
581
|
-
|
|
582
|
-
@property
|
|
583
|
-
def _is_duckdb(self) -> bool:
|
|
584
|
-
from sqlframe.duckdb.session import DuckDBSession
|
|
585
|
-
|
|
586
|
-
return isinstance(self, DuckDBSession)
|
|
587
|
-
|
|
588
|
-
@property
|
|
589
|
-
def _is_postgres(self) -> bool:
|
|
590
|
-
from sqlframe.postgres.session import PostgresSession
|
|
591
|
-
|
|
592
|
-
return isinstance(self, PostgresSession)
|
|
593
|
-
|
|
594
|
-
@property
|
|
595
|
-
def _is_spark(self) -> bool:
|
|
596
|
-
from sqlframe.spark.session import SparkSession
|
|
597
|
-
|
|
598
|
-
return isinstance(self, SparkSession)
|
|
599
|
-
|
|
600
|
-
@property
|
|
601
|
-
def _is_bigquery(self) -> bool:
|
|
602
|
-
from sqlframe.bigquery.session import BigQuerySession
|
|
603
|
-
|
|
604
|
-
return isinstance(self, BigQuerySession)
|
|
605
|
-
|
|
606
|
-
@property
|
|
607
|
-
def _is_redshift(self) -> bool:
|
|
608
|
-
from sqlframe.redshift.session import RedshiftSession
|
|
609
|
-
|
|
610
|
-
return isinstance(self, RedshiftSession)
|
|
611
|
-
|
|
612
|
-
@property
|
|
613
|
-
def _is_snowflake(self) -> bool:
|
|
614
|
-
from sqlframe.snowflake.session import SnowflakeSession
|
|
615
|
-
|
|
616
|
-
return isinstance(self, SnowflakeSession)
|
|
617
|
-
|
|
618
580
|
class Builder:
|
|
619
581
|
SQLFRAME_INPUT_DIALECT_KEY = "sqlframe.input.dialect"
|
|
620
582
|
SQLFRAME_OUTPUT_DIALECT_KEY = "sqlframe.output.dialect"
|
|
621
583
|
SQLFRAME_EXECUTION_DIALECT_KEY = "sqlframe.execution.dialect"
|
|
622
|
-
SQLFRAME_CASE_SENSITIVE_KEY = "spark.sql.caseSensitive"
|
|
623
584
|
SQLFRAME_CONN_KEY = "sqlframe.conn"
|
|
624
585
|
SQLFRAME_SCHEMA_KEY = "sqlframe.schema"
|
|
625
586
|
DEFAULT_INPUT_DIALECT = "spark"
|
|
@@ -665,8 +626,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
665
626
|
self._session_kwargs["conn"] = value
|
|
666
627
|
elif key == self.SQLFRAME_SCHEMA_KEY:
|
|
667
628
|
self._session_kwargs["schema"] = value
|
|
668
|
-
elif key == self.SQLFRAME_CASE_SENSITIVE_KEY:
|
|
669
|
-
self._session_kwargs["case_sensitive"] = value
|
|
670
629
|
else:
|
|
671
630
|
self._session_kwargs[key] = value
|
|
672
631
|
if map:
|
|
@@ -676,8 +635,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
676
635
|
self.output_dialect = map[self.SQLFRAME_OUTPUT_DIALECT_KEY]
|
|
677
636
|
if self.SQLFRAME_EXECUTION_DIALECT_KEY in map:
|
|
678
637
|
self.execution_dialect = map[self.SQLFRAME_EXECUTION_DIALECT_KEY]
|
|
679
|
-
if self.SQLFRAME_CASE_SENSITIVE_KEY in map:
|
|
680
|
-
self._session_kwargs["case_sensitive"] = map[self.SQLFRAME_CASE_SENSITIVE_KEY]
|
|
681
638
|
if self.SQLFRAME_CONN_KEY in map:
|
|
682
639
|
self._session_kwargs["conn"] = map[self.SQLFRAME_CONN_KEY]
|
|
683
640
|
if self.SQLFRAME_SCHEMA_KEY in map:
|
|
@@ -700,15 +657,5 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
|
|
|
700
657
|
self.session.execution_dialect = Dialect.get_or_raise(self.execution_dialect)
|
|
701
658
|
if hasattr(self.session, "_connection") and not self.session._connection:
|
|
702
659
|
self.session._connection = self._conn
|
|
703
|
-
if self.session.case_sensitive:
|
|
704
|
-
self.session.input_dialect.NORMALIZATION_STRATEGY = (
|
|
705
|
-
NormalizationStrategy.CASE_SENSITIVE
|
|
706
|
-
)
|
|
707
|
-
self.session.output_dialect.NORMALIZATION_STRATEGY = (
|
|
708
|
-
NormalizationStrategy.CASE_SENSITIVE
|
|
709
|
-
)
|
|
710
|
-
self.session.execution_dialect.NORMALIZATION_STRATEGY = (
|
|
711
|
-
NormalizationStrategy.CASE_SENSITIVE
|
|
712
|
-
)
|
|
713
660
|
|
|
714
661
|
builder = Builder()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import typing as t
|
|
5
|
+
|
|
6
|
+
if t.TYPE_CHECKING:
|
|
7
|
+
from sqlframe.base._typing import DataTypeOrString, UserDefinedFunctionLike
|
|
8
|
+
from sqlframe.base.session import _BaseSession
|
|
9
|
+
|
|
10
|
+
SESSION = t.TypeVar("SESSION", bound=_BaseSession)
|
|
11
|
+
else:
|
|
12
|
+
SESSION = t.TypeVar("SESSION")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _BaseUDFRegistration(t.Generic[SESSION]):
|
|
16
|
+
def __init__(self, sparkSession: SESSION):
|
|
17
|
+
self.sparkSession = sparkSession
|
|
18
|
+
|
|
19
|
+
def register(
|
|
20
|
+
self,
|
|
21
|
+
name: str,
|
|
22
|
+
f: t.Union[t.Callable[..., t.Any], UserDefinedFunctionLike],
|
|
23
|
+
returnType: t.Optional[DataTypeOrString] = None,
|
|
24
|
+
) -> UserDefinedFunctionLike:
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
|
|
27
|
+
def registerJavaFunction(
|
|
28
|
+
self,
|
|
29
|
+
name: str,
|
|
30
|
+
javaClassName: str,
|
|
31
|
+
returnType: t.Optional[DataTypeOrString] = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
raise NotImplementedError
|
|
34
|
+
|
|
35
|
+
def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
|
|
36
|
+
raise NotImplementedError
|
|
@@ -9,6 +9,7 @@ from sqlframe.bigquery.readwriter import (
|
|
|
9
9
|
BigQueryDataFrameReader,
|
|
10
10
|
BigQueryDataFrameWriter,
|
|
11
11
|
)
|
|
12
|
+
from sqlframe.bigquery.udf import BigQueryUDFRegistration
|
|
12
13
|
|
|
13
14
|
if t.TYPE_CHECKING:
|
|
14
15
|
from google.cloud.bigquery.client import Client as BigQueryClient
|
|
@@ -25,12 +26,14 @@ class BigQuerySession(
|
|
|
25
26
|
BigQueryDataFrameWriter,
|
|
26
27
|
BigQueryDataFrame,
|
|
27
28
|
BigQueryConnection,
|
|
29
|
+
BigQueryUDFRegistration,
|
|
28
30
|
],
|
|
29
31
|
):
|
|
30
32
|
_catalog = BigQueryCatalog
|
|
31
33
|
_reader = BigQueryDataFrameReader
|
|
32
34
|
_writer = BigQueryDataFrameWriter
|
|
33
35
|
_df = BigQueryDataFrame
|
|
36
|
+
_udf_registration = BigQueryUDFRegistration
|
|
34
37
|
|
|
35
38
|
QUALIFY_INFO_SCHEMA_WITH_DATABASE = True
|
|
36
39
|
SANITIZE_COLUMN_NAMES = True
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.udf import _BaseUDFRegistration
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlframe.bigquery.session import BigQuerySession
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BigQueryUDFRegistration(_BaseUDFRegistration["BigQuerySession"]): ...
|
|
@@ -11,6 +11,7 @@ from sqlframe.duckdb.readwriter import (
|
|
|
11
11
|
DuckDBDataFrameReader,
|
|
12
12
|
DuckDBDataFrameWriter,
|
|
13
13
|
)
|
|
14
|
+
from sqlframe.duckdb.udf import DuckDBUDFRegistration
|
|
14
15
|
|
|
15
16
|
if t.TYPE_CHECKING:
|
|
16
17
|
from duckdb import DuckDBPyConnection
|
|
@@ -26,12 +27,14 @@ class DuckDBSession(
|
|
|
26
27
|
DuckDBDataFrameWriter,
|
|
27
28
|
DuckDBDataFrame,
|
|
28
29
|
DuckDBPyConnection,
|
|
30
|
+
DuckDBUDFRegistration,
|
|
29
31
|
]
|
|
30
32
|
):
|
|
31
33
|
_catalog = DuckDBCatalog
|
|
32
34
|
_reader = DuckDBDataFrameReader
|
|
33
35
|
_writer = DuckDBDataFrameWriter
|
|
34
36
|
_df = DuckDBDataFrame
|
|
37
|
+
_udf_registration = DuckDBUDFRegistration
|
|
35
38
|
|
|
36
39
|
def __init__(self, conn: t.Optional[DuckDBPyConnection] = None, *args, **kwargs):
|
|
37
40
|
import duckdb
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.udf import _BaseUDFRegistration
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlframe.base._typing import DataTypeOrString, UserDefinedFunctionLike
|
|
9
|
+
from sqlframe.duckdb.session import DuckDBSession
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DuckDBUDFRegistration(_BaseUDFRegistration["DuckDBSession"]):
|
|
13
|
+
def register( # type: ignore
|
|
14
|
+
self,
|
|
15
|
+
name: str,
|
|
16
|
+
f: t.Union[t.Callable[..., t.Any], UserDefinedFunctionLike],
|
|
17
|
+
returnType: t.Optional[DataTypeOrString] = None,
|
|
18
|
+
) -> UserDefinedFunctionLike:
|
|
19
|
+
self.sparkSession._conn.create_function(name, f, return_type=returnType) # type: ignore
|
|
@@ -11,6 +11,7 @@ from sqlframe.postgres.readwriter import (
|
|
|
11
11
|
PostgresDataFrameReader,
|
|
12
12
|
PostgresDataFrameWriter,
|
|
13
13
|
)
|
|
14
|
+
from sqlframe.postgres.udf import PostgresUDFRegistration
|
|
14
15
|
|
|
15
16
|
if t.TYPE_CHECKING:
|
|
16
17
|
from psycopg2.extensions import connection as psycopg2_connection
|
|
@@ -27,12 +28,14 @@ class PostgresSession(
|
|
|
27
28
|
PostgresDataFrameWriter,
|
|
28
29
|
PostgresDataFrame,
|
|
29
30
|
psycopg2_connection,
|
|
31
|
+
PostgresUDFRegistration,
|
|
30
32
|
],
|
|
31
33
|
):
|
|
32
34
|
_catalog = PostgresCatalog
|
|
33
35
|
_reader = PostgresDataFrameReader
|
|
34
36
|
_writer = PostgresDataFrameWriter
|
|
35
37
|
_df = PostgresDataFrame
|
|
38
|
+
_udf_registration = PostgresUDFRegistration
|
|
36
39
|
|
|
37
40
|
def __init__(self, conn: t.Optional[psycopg2_connection] = None):
|
|
38
41
|
if not hasattr(self, "_conn"):
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.udf import _BaseUDFRegistration
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlframe.postgres.session import PostgresSession
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PostgresUDFRegistration(_BaseUDFRegistration["PostgresSession"]): ...
|
|
@@ -10,6 +10,7 @@ from sqlframe.redshift.readwriter import (
|
|
|
10
10
|
RedshiftDataFrameReader,
|
|
11
11
|
RedshiftDataFrameWriter,
|
|
12
12
|
)
|
|
13
|
+
from sqlframe.redshift.udf import RedshiftUDFRegistration
|
|
13
14
|
|
|
14
15
|
if t.TYPE_CHECKING:
|
|
15
16
|
from redshift_connector.core import Connection as RedshiftConnection
|
|
@@ -24,12 +25,14 @@ class RedshiftSession(
|
|
|
24
25
|
RedshiftDataFrameWriter,
|
|
25
26
|
RedshiftDataFrame,
|
|
26
27
|
RedshiftConnection,
|
|
28
|
+
RedshiftUDFRegistration,
|
|
27
29
|
],
|
|
28
30
|
):
|
|
29
31
|
_catalog = RedshiftCatalog
|
|
30
32
|
_reader = RedshiftDataFrameReader
|
|
31
33
|
_writer = RedshiftDataFrameWriter
|
|
32
34
|
_df = RedshiftDataFrame
|
|
35
|
+
_udf_registration = RedshiftUDFRegistration
|
|
33
36
|
|
|
34
37
|
def __init__(self, conn: t.Optional[RedshiftConnection] = None):
|
|
35
38
|
warnings.warn(
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.udf import _BaseUDFRegistration
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlframe.redshift.session import RedshiftSession
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RedshiftUDFRegistration(_BaseUDFRegistration["RedshiftSession"]): ...
|
|
@@ -4,6 +4,8 @@ import json
|
|
|
4
4
|
import typing as t
|
|
5
5
|
import warnings
|
|
6
6
|
|
|
7
|
+
from sqlframe.snowflake.udf import SnowflakeUDFRegistration
|
|
8
|
+
|
|
7
9
|
try:
|
|
8
10
|
from snowflake.connector.converter import SnowflakeConverter
|
|
9
11
|
except ImportError:
|
|
@@ -50,12 +52,14 @@ class SnowflakeSession(
|
|
|
50
52
|
SnowflakeDataFrameWriter,
|
|
51
53
|
SnowflakeDataFrame,
|
|
52
54
|
SnowflakeConnection,
|
|
55
|
+
SnowflakeUDFRegistration,
|
|
53
56
|
],
|
|
54
57
|
):
|
|
55
58
|
_catalog = SnowflakeCatalog
|
|
56
59
|
_reader = SnowflakeDataFrameReader
|
|
57
60
|
_writer = SnowflakeDataFrameWriter
|
|
58
61
|
_df = SnowflakeDataFrame
|
|
62
|
+
_udf_registration = SnowflakeUDFRegistration
|
|
59
63
|
|
|
60
64
|
def __init__(self, conn: t.Optional[SnowflakeConnection] = None):
|
|
61
65
|
import snowflake
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlframe.base.udf import _BaseUDFRegistration
|
|
6
|
+
|
|
7
|
+
if t.TYPE_CHECKING:
|
|
8
|
+
from sqlframe.snowflake.session import SnowflakeSession
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SnowflakeUDFRegistration(_BaseUDFRegistration["SnowflakeSession"]): ...
|
|
@@ -14,6 +14,7 @@ from sqlframe.spark.readwriter import (
|
|
|
14
14
|
SparkDataFrameWriter,
|
|
15
15
|
)
|
|
16
16
|
from sqlframe.spark.types import Row
|
|
17
|
+
from sqlframe.spark.udf import SparkUDFRegistration
|
|
17
18
|
|
|
18
19
|
if t.TYPE_CHECKING:
|
|
19
20
|
import pandas as pd
|
|
@@ -32,12 +33,14 @@ class SparkSession(
|
|
|
32
33
|
SparkDataFrameWriter,
|
|
33
34
|
SparkDataFrame,
|
|
34
35
|
PySparkSession,
|
|
36
|
+
SparkUDFRegistration,
|
|
35
37
|
],
|
|
36
38
|
):
|
|
37
39
|
_catalog = SparkCatalog
|
|
38
40
|
_reader = SparkDataFrameReader
|
|
39
41
|
_writer = SparkDataFrameWriter
|
|
40
42
|
_df = SparkDataFrame
|
|
43
|
+
_udf_registration = SparkUDFRegistration
|
|
41
44
|
|
|
42
45
|
def __init__(self, conn: t.Optional[PySparkSession] = None, *args, **kwargs):
|
|
43
46
|
from pyspark.sql.session import DataFrame, SparkSession
|