sqlframe 0.1.dev3__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe-1.0.0/.github/CODEOWNERS +1 -0
- sqlframe-1.0.0/.github/workflows/main.workflow.yaml +42 -0
- sqlframe-1.0.0/.github/workflows/publish.workflow.yaml +27 -0
- sqlframe-1.0.0/.readthedocs.yaml +13 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/Makefile +3 -3
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/PKG-INFO +28 -60
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/README.md +21 -29
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/blogs/sqlframe_universal_dataframe_api.md +6 -5
- {sqlframe-0.1.dev3/docs → sqlframe-1.0.0}/docs/bigquery.md +20 -13
- sqlframe-1.0.0/docs/requirements.txt +6 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/mkdocs.yml +2 -0
- sqlframe-1.0.0/pytest.ini +7 -0
- sqlframe-1.0.0/renovate.json +17 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/setup.py +32 -24
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/_version.py +2 -2
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/dataframe.py +2 -2
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/decorators.py +6 -6
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/mixins/readwriter_mixins.py +3 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/operations.py +7 -7
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/session.py +3 -3
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/transforms.py +3 -1
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/util.py +4 -3
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/catalog.py +3 -2
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/session.py +3 -2
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/readwriter.py +1 -1
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe.egg-info/PKG-INFO +28 -60
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe.egg-info/SOURCES.txt +9 -3
- sqlframe-1.0.0/sqlframe.egg-info/requires.txt +54 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/common_fixtures.py +13 -10
- sqlframe-1.0.0/tests/conftest.py +29 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/bigquery/test_bigquery_session.py +1 -1
- {sqlframe-0.1.dev3/tests/integration/engines/duckdb → sqlframe-1.0.0/tests/integration/engines/duck}/test_duckdb_session.py +1 -1
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/postgres/test_postgres_session.py +2 -2
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/redshift/test_redshift_session.py +2 -2
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/test_engine_session.py +9 -2
- sqlframe-0.1.dev3/pytest.ini +0 -4
- sqlframe-0.1.dev3/sqlframe.egg-info/requires.txt +0 -42
- sqlframe-0.1.dev3/tests/conftest.py +0 -11
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/.gitignore +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/LICENSE +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/blogs/images/cake.gif +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0/docs}/docs/bigquery.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/docs/postgres.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/duckdb.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/images/SF.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/images/favicon.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/index.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/postgres.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/standalone.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/setup.cfg +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/LICENSE +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/_typing.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/exceptions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/function_alternatives.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/mixins/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/normalize.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/readerwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/base/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/bigquery/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-0.1.dev3/tests/integration/engines/duckdb → sqlframe-1.0.0/tests/integration/engines/duck}/__init__.py +0 -0
- {sqlframe-0.1.dev3/tests/integration/engines/duckdb → sqlframe-1.0.0/tests/integration/engines/duck}/test_duckdb_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/postgres/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/redshift/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/snowflake/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/spark/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/engines/test_int_functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/test_int_dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.0.0}/tests/unit/standalone/test_window.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
* @eakmanrq
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: SQLFrame
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches:
|
|
5
|
+
- main
|
|
6
|
+
paths:
|
|
7
|
+
- 'sqlframe/**'
|
|
8
|
+
- 'tests/**'
|
|
9
|
+
- 'Makefile'
|
|
10
|
+
- 'setup.py'
|
|
11
|
+
pull_request:
|
|
12
|
+
types:
|
|
13
|
+
- synchronize
|
|
14
|
+
- opened
|
|
15
|
+
paths:
|
|
16
|
+
- 'sqlframe/**'
|
|
17
|
+
- 'tests/**'
|
|
18
|
+
- 'Makefile'
|
|
19
|
+
- 'setup.py'
|
|
20
|
+
jobs:
|
|
21
|
+
run-tests:
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
env:
|
|
24
|
+
PYTEST_XDIST_AUTO_NUM_WORKERS: 4
|
|
25
|
+
strategy:
|
|
26
|
+
matrix:
|
|
27
|
+
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
|
|
28
|
+
steps:
|
|
29
|
+
- name: Checkout
|
|
30
|
+
uses: actions/checkout@v4
|
|
31
|
+
- name: Install Python
|
|
32
|
+
uses: actions/setup-python@v5
|
|
33
|
+
with:
|
|
34
|
+
python-version: ${{ matrix.python-version }}
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: make install-dev
|
|
37
|
+
- name: Run Style
|
|
38
|
+
run: make style
|
|
39
|
+
- name: Setup Postgres
|
|
40
|
+
uses: ikalnytskyi/action-setup-postgres@v6
|
|
41
|
+
- name: Run tests
|
|
42
|
+
run: make local-test
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
tags:
|
|
5
|
+
- 'v[0-9]+.[0-9]+.[0-9]+'
|
|
6
|
+
permissions:
|
|
7
|
+
contents: write
|
|
8
|
+
jobs:
|
|
9
|
+
deploy:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
env:
|
|
12
|
+
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
|
|
13
|
+
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
- name: Publish
|
|
18
|
+
run: make publish
|
|
19
|
+
- name: Create release
|
|
20
|
+
env:
|
|
21
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
22
|
+
tag: ${{ github.ref_name }}
|
|
23
|
+
run: |
|
|
24
|
+
gh release create "$tag" \
|
|
25
|
+
--repo="$GITHUB_REPOSITORY" \
|
|
26
|
+
--title="SQLFrame ${tag#v}" \
|
|
27
|
+
--generate-notes
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
install-dev:
|
|
2
|
-
pip install -e ".[dev]"
|
|
2
|
+
pip install -e ".[dev,duckdb,postgres,redshift,bigquery,snowflake,spark]"
|
|
3
3
|
|
|
4
4
|
install-pre-commit:
|
|
5
5
|
pre-commit install
|
|
@@ -8,10 +8,10 @@ slow-test:
|
|
|
8
8
|
pytest -n auto tests
|
|
9
9
|
|
|
10
10
|
fast-test:
|
|
11
|
-
pytest -n auto
|
|
11
|
+
pytest -n auto -m "fast"
|
|
12
12
|
|
|
13
13
|
local-test:
|
|
14
|
-
pytest -n auto -m "local"
|
|
14
|
+
pytest -n auto -m "fast or local"
|
|
15
15
|
|
|
16
16
|
bigquery-test:
|
|
17
17
|
pytest -n auto -m "bigquery"
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: PySpark
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Taking the Spark out of PySpark by converting to SQL
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
7
7
|
Author-email: eakmanrq@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
+
Platform: UNKNOWN
|
|
9
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
10
11
|
Classifier: Intended Audience :: Developers
|
|
11
12
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -15,61 +16,30 @@ Classifier: Programming Language :: SQL
|
|
|
15
16
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
17
|
Requires-Python: >=3.8
|
|
17
18
|
Description-Content-Type: text/markdown
|
|
18
|
-
License-File: LICENSE
|
|
19
|
-
Requires-Dist: prettytable
|
|
20
|
-
Requires-Dist: sqlglot
|
|
21
19
|
Provides-Extra: bigquery
|
|
22
|
-
Requires-Dist: google-cloud-bigquery[pandas]; extra == "bigquery"
|
|
23
|
-
Requires-Dist: google-cloud-bigquery-storage; extra == "bigquery"
|
|
24
20
|
Provides-Extra: dev
|
|
25
|
-
|
|
26
|
-
Requires-Dist: mkdocs==1.4.2; extra == "dev"
|
|
27
|
-
Requires-Dist: mkdocs-include-markdown-plugin==4.0.3; extra == "dev"
|
|
28
|
-
Requires-Dist: mkdocs-material==9.0.5; extra == "dev"
|
|
29
|
-
Requires-Dist: mkdocs-material-extensions==1.1.1; extra == "dev"
|
|
30
|
-
Requires-Dist: mypy; extra == "dev"
|
|
31
|
-
Requires-Dist: pandas; extra == "dev"
|
|
32
|
-
Requires-Dist: pymdown-extensions; extra == "dev"
|
|
33
|
-
Requires-Dist: psycopg; extra == "dev"
|
|
34
|
-
Requires-Dist: pyarrow; extra == "dev"
|
|
35
|
-
Requires-Dist: pyspark; extra == "dev"
|
|
36
|
-
Requires-Dist: pytest; extra == "dev"
|
|
37
|
-
Requires-Dist: pytest-postgresql; extra == "dev"
|
|
38
|
-
Requires-Dist: pytest-xdist; extra == "dev"
|
|
39
|
-
Requires-Dist: pre-commit; extra == "dev"
|
|
40
|
-
Requires-Dist: ruff; extra == "dev"
|
|
41
|
-
Requires-Dist: typing_extensions; extra == "dev"
|
|
42
|
-
Requires-Dist: types-psycopg2; extra == "dev"
|
|
21
|
+
Provides-Extra: docs
|
|
43
22
|
Provides-Extra: duckdb
|
|
44
|
-
Requires-Dist: duckdb; extra == "duckdb"
|
|
45
|
-
Requires-Dist: pandas; extra == "duckdb"
|
|
46
23
|
Provides-Extra: postgres
|
|
47
|
-
Requires-Dist: psycopg2; extra == "postgres"
|
|
48
24
|
Provides-Extra: redshift
|
|
49
|
-
Requires-Dist: redshift_connector; extra == "redshift"
|
|
50
25
|
Provides-Extra: snowflake
|
|
51
|
-
Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "snowflake"
|
|
52
26
|
Provides-Extra: spark
|
|
53
|
-
|
|
27
|
+
License-File: LICENSE
|
|
54
28
|
|
|
55
29
|
<div align="center">
|
|
56
|
-
<img src="docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
30
|
+
<img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
57
31
|
</div>
|
|
58
32
|
|
|
59
|
-

|
|
60
|
-
|
|
61
33
|
SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
|
|
62
34
|
|
|
63
|
-
|
|
35
|
+
SQLFrame currently supports the following engines (many more in development):
|
|
64
36
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
* [
|
|
68
|
-
* [DuckDB](docs/duckdb.md)
|
|
69
|
-
* [Postgres](docs/postgres.md)
|
|
37
|
+
* [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
|
|
38
|
+
* [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
|
|
39
|
+
* [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
|
|
70
40
|
|
|
71
41
|
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
72
|
-
* [Standalone](
|
|
42
|
+
* [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
|
|
73
43
|
|
|
74
44
|
SQLFrame is great for:
|
|
75
45
|
|
|
@@ -101,7 +71,7 @@ from sqlframe.bigquery import Window
|
|
|
101
71
|
|
|
102
72
|
session = BigQuerySession()
|
|
103
73
|
table_path = "bigquery-public-data.samples.natality"
|
|
104
|
-
#
|
|
74
|
+
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
105
75
|
df = (
|
|
106
76
|
session.table(table_path)
|
|
107
77
|
.where(F.col("ever_born") == 1)
|
|
@@ -118,17 +88,15 @@ df = (
|
|
|
118
88
|
)
|
|
119
89
|
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
120
90
|
.select(
|
|
121
|
-
F.col("year").alias("
|
|
122
|
-
F.format_number("num_single_child_families", 0).alias("
|
|
91
|
+
F.col("year").alias("year"),
|
|
92
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
123
93
|
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
124
94
|
)
|
|
125
95
|
.limit(5)
|
|
126
96
|
)
|
|
127
97
|
```
|
|
128
98
|
```python
|
|
129
|
-
df.sql()
|
|
130
|
-
```
|
|
131
|
-
```sql
|
|
99
|
+
>>> df.sql()
|
|
132
100
|
WITH `t94228` AS (
|
|
133
101
|
SELECT
|
|
134
102
|
`natality`.`year` AS `year`,
|
|
@@ -147,7 +115,7 @@ WITH `t94228` AS (
|
|
|
147
115
|
)
|
|
148
116
|
SELECT
|
|
149
117
|
`t39093`.`year` AS `year`,
|
|
150
|
-
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `
|
|
118
|
+
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
|
|
151
119
|
FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
|
|
152
120
|
FROM `t39093` AS `t39093`
|
|
153
121
|
ORDER BY
|
|
@@ -155,16 +123,16 @@ ORDER BY
|
|
|
155
123
|
LIMIT 5
|
|
156
124
|
```
|
|
157
125
|
```python
|
|
158
|
-
df.show()
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
|
163
|
-
|
|
164
|
-
|
|
|
165
|
-
|
|
|
166
|
-
|
|
|
167
|
-
|
|
168
|
-
| 1975 | 868,985 | 10.92 |
|
|
169
|
-
+------+-------------------------------------+----------------+
|
|
126
|
+
>>> df.show()
|
|
127
|
+
+------+---------------------------+----------------+
|
|
128
|
+
| year | new families single child | percent change |
|
|
129
|
+
+------+---------------------------+----------------+
|
|
130
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
131
|
+
| 1974 | 783,448 | 14.49 |
|
|
132
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
133
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
134
|
+
| 1975 | 868,985 | 10.92 |
|
|
135
|
+
+------+---------------------------+----------------+
|
|
170
136
|
```
|
|
137
|
+
|
|
138
|
+
|
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
<div align="center">
|
|
2
|
-
<img src="docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
2
|
+
<img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
3
3
|
</div>
|
|
4
4
|
|
|
5
|
-

|
|
6
|
-
|
|
7
5
|
SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
|
|
8
6
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
SQLFrame currently supports the following engines:
|
|
7
|
+
SQLFrame currently supports the following engines (many more in development):
|
|
12
8
|
|
|
13
|
-
* [BigQuery](
|
|
14
|
-
* [DuckDB](
|
|
15
|
-
* [Postgres](
|
|
9
|
+
* [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
|
|
10
|
+
* [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
|
|
11
|
+
* [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
|
|
16
12
|
|
|
17
13
|
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
18
|
-
* [Standalone](
|
|
14
|
+
* [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
|
|
19
15
|
|
|
20
16
|
SQLFrame is great for:
|
|
21
17
|
|
|
@@ -47,7 +43,7 @@ from sqlframe.bigquery import Window
|
|
|
47
43
|
|
|
48
44
|
session = BigQuerySession()
|
|
49
45
|
table_path = "bigquery-public-data.samples.natality"
|
|
50
|
-
#
|
|
46
|
+
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
51
47
|
df = (
|
|
52
48
|
session.table(table_path)
|
|
53
49
|
.where(F.col("ever_born") == 1)
|
|
@@ -64,17 +60,15 @@ df = (
|
|
|
64
60
|
)
|
|
65
61
|
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
66
62
|
.select(
|
|
67
|
-
F.col("year").alias("
|
|
68
|
-
F.format_number("num_single_child_families", 0).alias("
|
|
63
|
+
F.col("year").alias("year"),
|
|
64
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
69
65
|
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
70
66
|
)
|
|
71
67
|
.limit(5)
|
|
72
68
|
)
|
|
73
69
|
```
|
|
74
70
|
```python
|
|
75
|
-
df.sql()
|
|
76
|
-
```
|
|
77
|
-
```sql
|
|
71
|
+
>>> df.sql()
|
|
78
72
|
WITH `t94228` AS (
|
|
79
73
|
SELECT
|
|
80
74
|
`natality`.`year` AS `year`,
|
|
@@ -93,7 +87,7 @@ WITH `t94228` AS (
|
|
|
93
87
|
)
|
|
94
88
|
SELECT
|
|
95
89
|
`t39093`.`year` AS `year`,
|
|
96
|
-
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `
|
|
90
|
+
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
|
|
97
91
|
FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
|
|
98
92
|
FROM `t39093` AS `t39093`
|
|
99
93
|
ORDER BY
|
|
@@ -101,16 +95,14 @@ ORDER BY
|
|
|
101
95
|
LIMIT 5
|
|
102
96
|
```
|
|
103
97
|
```python
|
|
104
|
-
df.show()
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
|
109
|
-
|
|
110
|
-
|
|
|
111
|
-
|
|
|
112
|
-
|
|
|
113
|
-
|
|
114
|
-
| 1975 | 868,985 | 10.92 |
|
|
115
|
-
+------+-------------------------------------+----------------+
|
|
98
|
+
>>> df.show()
|
|
99
|
+
+------+---------------------------+----------------+
|
|
100
|
+
| year | new families single child | percent change |
|
|
101
|
+
+------+---------------------------+----------------+
|
|
102
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
103
|
+
| 1974 | 783,448 | 14.49 |
|
|
104
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
105
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
106
|
+
| 1975 | 868,985 | 10.92 |
|
|
107
|
+
+------+---------------------------+----------------+
|
|
116
108
|
```
|
|
@@ -9,7 +9,8 @@ SQL is the universal language that unites all data professionals, and it enables
|
|
|
9
9
|
Despite its strengths, SQL often seems ill-suited for maintaining data pipelines.
|
|
10
10
|
The language lacks support for abstracting common operations or unit testing specific segments of code, leading many to use Jinja as a makeshift solution.
|
|
11
11
|
Jinja SQL is to SQL what Pig Latin is to English - can be fun in small doses but impossible to understand at scale.
|
|
12
|
-
|
|
12
|
+
Furthermore, the repetitive nature of SQL, where columns must be specified repeatedly across operations, often leads to fatigue among data professionals.
|
|
13
|
+
This results in data professionals responding to the siren song of `SELECT *`, only to be later found drowning in the sea of non-determinism.
|
|
13
14
|
|
|
14
15
|
This has put data professionals in a tough spot: Do you write your pipelines in SQL to favor accessibility or Python to favor maintainability?
|
|
15
16
|
Well, starting today, you no longer have to choose.
|
|
@@ -25,7 +26,7 @@ You can finally have your cake and eat it too.
|
|
|
25
26
|
<img src="../docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="800"/>
|
|
26
27
|
</div>
|
|
27
28
|
|
|
28
|
-
SQLFrame revolutionizes how data professionals interact with SQL and PySpark DataFrames.
|
|
29
|
+
[SQLFrame](https://github.com/eakmanrq/sqlframe) revolutionizes how data professionals interact with SQL and PySpark DataFrames.
|
|
29
30
|
Unlike traditional PySpark, SQLFrame converts DataFrame operations directly into SQL, enabling real-time SQL script generation during development.
|
|
30
31
|
Here's how it works:
|
|
31
32
|
|
|
@@ -133,7 +134,7 @@ Therefore not only does SQLFrame make your DataFrame pipeline more accessible, i
|
|
|
133
134
|
<img src="images/you_get_pyspark_api.gif" alt="There is more" width="800"/>
|
|
134
135
|
</div>
|
|
135
136
|
|
|
136
|
-
SQLFrame currently supports BigQuery, DuckDB, and
|
|
137
|
-
For those interested in experimenting with SQL generation for other engines, the
|
|
137
|
+
SQLFrame currently supports [BigQuery](https://sqlframe.readthedocs.io/en/stable/bigquery/), [DuckDB](https://sqlframe.readthedocs.io/en/stable/duckdb/), and [Postgres](https://sqlframe.readthedocs.io/en/stable/postgres/), with Redshift, Snowflake, Spark, and Trino in development.
|
|
138
|
+
For those interested in experimenting with SQL generation for other engines, the [StandaloneSession](https://sqlframe.readthedocs.io/en/stable/standalone/) provides a flexible testing ground.
|
|
138
139
|
|
|
139
|
-
|
|
140
|
+
Checkout the [README](https://github.com/eakmanrq/sqlframe) for more information on how to get started with SQLFrame!
|
|
@@ -60,27 +60,34 @@ print(session.catalog.listColumns(table_path))
|
|
|
60
60
|
.where(F.col("ever_born") == 1)
|
|
61
61
|
.groupBy("year")
|
|
62
62
|
.agg(F.count("*").alias("num_single_child_families"))
|
|
63
|
-
.withColumn(
|
|
64
|
-
|
|
63
|
+
.withColumn(
|
|
64
|
+
"last_year_num_single_child_families",
|
|
65
|
+
F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
|
|
66
|
+
)
|
|
67
|
+
.withColumn(
|
|
68
|
+
"percent_change",
|
|
69
|
+
(F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
|
|
70
|
+
/ F.col("last_year_num_single_child_families")
|
|
71
|
+
)
|
|
65
72
|
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
66
73
|
.select(
|
|
67
|
-
F.col("year").alias("
|
|
68
|
-
F.format_number("num_single_child_families", 0).alias("
|
|
74
|
+
F.col("year").alias("year"),
|
|
75
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
69
76
|
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
70
77
|
)
|
|
71
78
|
.limit(5)
|
|
72
79
|
.show()
|
|
73
80
|
)
|
|
74
81
|
"""
|
|
75
|
-
|
|
76
|
-
| year |
|
|
77
|
-
|
|
78
|
-
| 1989 |
|
|
79
|
-
| 1974 |
|
|
80
|
-
| 1977 |
|
|
81
|
-
| 1985 |
|
|
82
|
-
| 1975 |
|
|
83
|
-
|
|
82
|
+
+------+---------------------------+----------------+
|
|
83
|
+
| year | new families single child | percent change |
|
|
84
|
+
+------+---------------------------+----------------+
|
|
85
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
86
|
+
| 1974 | 783,448 | 14.49 |
|
|
87
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
88
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
89
|
+
| 1975 | 868,985 | 10.92 |
|
|
90
|
+
+------+---------------------------+----------------+
|
|
84
91
|
"""
|
|
85
92
|
```
|
|
86
93
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
|
3
|
+
"rangeStrategy": "widen",
|
|
4
|
+
"extends": [
|
|
5
|
+
"config:recommended"
|
|
6
|
+
],
|
|
7
|
+
"ignoreDeps": [
|
|
8
|
+
"mkdocs-include-markdown-plugin",
|
|
9
|
+
"mkdocs",
|
|
10
|
+
"mkdocs-material",
|
|
11
|
+
"mkdocs-material-extensions",
|
|
12
|
+
"mkdocs-include-markdown-plugin"
|
|
13
|
+
],
|
|
14
|
+
"ignorePaths": [
|
|
15
|
+
"docs/**"
|
|
16
|
+
]
|
|
17
|
+
}
|
|
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="sqlframe",
|
|
5
|
-
description="PySpark
|
|
5
|
+
description="Taking the Spark out of PySpark by converting to SQL",
|
|
6
6
|
long_description=open("README.md").read(),
|
|
7
7
|
long_description_content_type="text/markdown",
|
|
8
8
|
url="https://github.com/eakmanrq/sqlframe",
|
|
@@ -19,49 +19,57 @@ setup(
|
|
|
19
19
|
setup_requires=["setuptools_scm"],
|
|
20
20
|
python_requires=">=3.8",
|
|
21
21
|
install_requires=[
|
|
22
|
-
"prettytable",
|
|
23
|
-
"sqlglot",
|
|
22
|
+
"prettytable<3.11.0",
|
|
23
|
+
"sqlglot>=23.14.0,<23.18",
|
|
24
24
|
],
|
|
25
25
|
extras_require={
|
|
26
26
|
"bigquery": [
|
|
27
|
-
"google-cloud-bigquery[pandas]",
|
|
28
|
-
"google-cloud-bigquery-storage",
|
|
27
|
+
"google-cloud-bigquery[pandas]>=3,<4",
|
|
28
|
+
"google-cloud-bigquery-storage>=2,<3",
|
|
29
|
+
"pandas>=2,<3",
|
|
29
30
|
],
|
|
30
31
|
"dev": [
|
|
31
|
-
"duckdb",
|
|
32
|
+
"duckdb>=0.9,<0.11",
|
|
33
|
+
"mypy>=1.10.0,<1.11",
|
|
34
|
+
"pandas>=2,<3",
|
|
35
|
+
"pandas-stubs>=2,<3",
|
|
36
|
+
"psycopg>=3.1,<4",
|
|
37
|
+
"pyarrow>=10,<17",
|
|
38
|
+
"pyspark>=2,<3.6",
|
|
39
|
+
"pytest>=8.2.0,<8.3",
|
|
40
|
+
"pytest-postgresql>=6,<7",
|
|
41
|
+
"pytest-xdist>=3.6,<3.7",
|
|
42
|
+
"pre-commit>=3.5;python_version=='3.8'",
|
|
43
|
+
"pre-commit>=3.7,<3.8;python_version>='3.9'",
|
|
44
|
+
"ruff>=0.4.4,<0.5",
|
|
45
|
+
"typing_extensions>=4.11,<5",
|
|
46
|
+
"types-psycopg2>=2.9,<3",
|
|
47
|
+
],
|
|
48
|
+
"docs": [
|
|
32
49
|
"mkdocs==1.4.2",
|
|
33
50
|
"mkdocs-include-markdown-plugin==4.0.3",
|
|
34
51
|
"mkdocs-material==9.0.5",
|
|
35
52
|
"mkdocs-material-extensions==1.1.1",
|
|
36
|
-
"mypy",
|
|
37
|
-
"pandas",
|
|
38
53
|
"pymdown-extensions",
|
|
39
|
-
"psycopg",
|
|
40
|
-
"pyarrow",
|
|
41
|
-
"pyspark",
|
|
42
|
-
"pytest",
|
|
43
|
-
"pytest-postgresql",
|
|
44
|
-
"pytest-xdist",
|
|
45
|
-
"pre-commit",
|
|
46
|
-
"ruff",
|
|
47
|
-
"typing_extensions",
|
|
48
|
-
"types-psycopg2",
|
|
49
54
|
],
|
|
50
55
|
"duckdb": [
|
|
51
|
-
"duckdb",
|
|
52
|
-
"pandas",
|
|
56
|
+
"duckdb>=0.9,<0.11",
|
|
57
|
+
"pandas>=2,<3",
|
|
53
58
|
],
|
|
54
59
|
"postgres": [
|
|
55
|
-
"
|
|
60
|
+
"pandas>=2,<3",
|
|
61
|
+
"psycopg2>=2.8,<3",
|
|
56
62
|
],
|
|
57
63
|
"redshift": [
|
|
58
|
-
"
|
|
64
|
+
"pandas>=2,<3",
|
|
65
|
+
"redshift_connector>=2.1.1,<2.2.0",
|
|
59
66
|
],
|
|
60
67
|
"snowflake": [
|
|
61
|
-
"
|
|
68
|
+
"pandas>=2,<3",
|
|
69
|
+
"snowflake-connector-python[pandas,secure-local-storage]>=3.10.0,<3.11",
|
|
62
70
|
],
|
|
63
71
|
"spark": [
|
|
64
|
-
"pyspark",
|
|
72
|
+
"pyspark>=2,<3.6",
|
|
65
73
|
],
|
|
66
74
|
},
|
|
67
75
|
classifiers=[
|
|
@@ -662,7 +662,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
662
662
|
| 16| Bob| 85|
|
|
663
663
|
+---+-----+------+
|
|
664
664
|
"""
|
|
665
|
-
return self.join.__wrapped__(self, other, how="cross")
|
|
665
|
+
return self.join.__wrapped__(self, other, how="cross") # type: ignore
|
|
666
666
|
|
|
667
667
|
@operation(Operation.FROM)
|
|
668
668
|
def join(
|
|
@@ -769,7 +769,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
769
769
|
new_df = self.copy(expression=join_expression)
|
|
770
770
|
new_df.pending_join_hints.extend(self.pending_join_hints)
|
|
771
771
|
new_df.pending_hints.extend(other_df.pending_hints)
|
|
772
|
-
new_df = new_df.select.__wrapped__(new_df, *select_column_names)
|
|
772
|
+
new_df = new_df.select.__wrapped__(new_df, *select_column_names) # type: ignore
|
|
773
773
|
return new_df
|
|
774
774
|
|
|
775
775
|
@operation(Operation.ORDER_BY)
|
|
@@ -11,7 +11,7 @@ if t.TYPE_CHECKING:
|
|
|
11
11
|
from sqlframe.base.catalog import _BaseCatalog
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def normalize(normalize_kwargs: t.List[str]):
|
|
14
|
+
def normalize(normalize_kwargs: t.List[str]) -> t.Callable[[t.Callable], t.Callable]:
|
|
15
15
|
"""
|
|
16
16
|
Decorator used around DataFrame methods to indicate what type of operation is being performed from the
|
|
17
17
|
ordered Operation enums. This is used to determine which operations should be performed on a CTE vs.
|
|
@@ -23,9 +23,9 @@ def normalize(normalize_kwargs: t.List[str]):
|
|
|
23
23
|
in cases where there is overlap in names.
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
-
def decorator(func: t.Callable):
|
|
26
|
+
def decorator(func: t.Callable) -> t.Callable:
|
|
27
27
|
@functools.wraps(func)
|
|
28
|
-
def wrapper(self: _BaseCatalog, *args, **kwargs):
|
|
28
|
+
def wrapper(self: _BaseCatalog, *args, **kwargs) -> _BaseCatalog:
|
|
29
29
|
kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
|
|
30
30
|
for kwarg in normalize_kwargs:
|
|
31
31
|
if kwarg in kwargs:
|
|
@@ -43,9 +43,9 @@ def normalize(normalize_kwargs: t.List[str]):
|
|
|
43
43
|
return decorator
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None):
|
|
47
|
-
def _metadata(func):
|
|
48
|
-
func.unsupported_engines = ensure_list(unsupported_engines) if unsupported_engines else []
|
|
46
|
+
def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None) -> t.Callable:
|
|
47
|
+
def _metadata(func: t.Callable) -> t.Callable:
|
|
48
|
+
func.unsupported_engines = ensure_list(unsupported_engines) if unsupported_engines else [] # type: ignore
|
|
49
49
|
return func
|
|
50
50
|
|
|
51
51
|
return _metadata
|
|
@@ -108,6 +108,9 @@ class PandasWriterMixin(_BaseDataFrameWriter, t.Generic[SESSION, DF]):
|
|
|
108
108
|
raise NotImplementedError("Append mode is not supported for parquet.")
|
|
109
109
|
pandas_df.to_parquet(path, **kwargs)
|
|
110
110
|
elif format == "json":
|
|
111
|
+
# Pandas versions are inconsistent on how to handle True/False index so we just remove it
|
|
112
|
+
# since in all versions it will not result in an index column in the output.
|
|
113
|
+
del kwargs["index"]
|
|
111
114
|
kwargs["mode"] = mode
|
|
112
115
|
kwargs["orient"] = "records"
|
|
113
116
|
pandas_df.to_json(path, lines=True, **kwargs)
|