sqlframe 0.1.dev3__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe-1.1.0/.github/CODEOWNERS +1 -0
- sqlframe-1.1.0/.github/workflows/main.workflow.yaml +42 -0
- sqlframe-1.1.0/.github/workflows/publish.workflow.yaml +27 -0
- sqlframe-1.1.0/.readthedocs.yaml +13 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/Makefile +3 -3
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/PKG-INFO +28 -60
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/README.md +21 -29
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/blogs/sqlframe_universal_dataframe_api.md +6 -5
- {sqlframe-0.1.dev3/docs → sqlframe-1.1.0}/docs/bigquery.md +20 -13
- sqlframe-1.1.0/docs/requirements.txt +6 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/mkdocs.yml +2 -0
- sqlframe-1.1.0/pytest.ini +7 -0
- sqlframe-1.1.0/renovate.json +17 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/setup.py +32 -24
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/_version.py +2 -2
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/catalog.py +2 -1
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/dataframe.py +9 -6
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/decorators.py +6 -6
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/mixins/readwriter_mixins.py +3 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/operations.py +7 -7
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/session.py +7 -15
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/transforms.py +3 -1
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/util.py +6 -3
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/catalog.py +3 -2
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/session.py +3 -2
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/readwriter.py +18 -6
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe.egg-info/PKG-INFO +28 -60
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe.egg-info/SOURCES.txt +11 -3
- sqlframe-1.1.0/sqlframe.egg-info/requires.txt +54 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/common_fixtures.py +13 -10
- sqlframe-1.1.0/tests/conftest.py +29 -0
- sqlframe-1.1.0/tests/fixtures/employee_extra_line.csv +7 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/bigquery/test_bigquery_session.py +1 -1
- sqlframe-1.1.0/tests/integration/engines/duck/test_duckdb_reader.py +57 -0
- {sqlframe-0.1.dev3/tests/integration/engines/duckdb → sqlframe-1.1.0/tests/integration/engines/duck}/test_duckdb_session.py +1 -1
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/postgres/test_postgres_session.py +2 -2
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/redshift/test_redshift_session.py +2 -2
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/test_engine_session.py +9 -2
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/test_int_dataframe.py +11 -0
- sqlframe-1.1.0/tests/unit/standalone/test_dataframe.py +57 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/test_dataframe_writer.py +6 -6
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/test_session.py +1 -1
- sqlframe-0.1.dev3/pytest.ini +0 -4
- sqlframe-0.1.dev3/sqlframe.egg-info/requires.txt +0 -42
- sqlframe-0.1.dev3/tests/conftest.py +0 -11
- sqlframe-0.1.dev3/tests/unit/standalone/test_dataframe.py +0 -46
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/.gitignore +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/.pre-commit-config.yaml +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/LICENSE +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/blogs/images/but_wait_theres_more.gif +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/blogs/images/cake.gif +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/blogs/images/you_get_pyspark_api.gif +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0/docs}/docs/bigquery.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/docs/duckdb.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/docs/images/SF.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/docs/images/favicon.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/docs/images/favicon_old.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/docs/postgres.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/duckdb.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/images/SF.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/images/favicon.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/images/favicon_old.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/images/sqlframe_diagram.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/images/sqlframe_logo.png +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/index.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/postgres.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/standalone.md +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/docs/stylesheets/extra.css +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/setup.cfg +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/LICENSE +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/_typing.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/exceptions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/function_alternatives.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/mixins/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/normalize.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/readerwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/base/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/functions.pyi +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/bigquery/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/functions.pyi +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/duckdb/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/functions.pyi +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/postgres/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/redshift/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/snowflake/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/spark/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/group.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/readwriter.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe/standalone/window.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe.egg-info/dependency_links.txt +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/sqlframe.egg-info/top_level.txt +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/fixtures/employee.csv +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/fixtures/employee.json +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/fixtures/employee.parquet +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/bigquery/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
- {sqlframe-0.1.dev3/tests/integration/engines/duckdb → sqlframe-1.1.0/tests/integration/engines/duck}/__init__.py +0 -0
- {sqlframe-0.1.dev3/tests/integration/engines/duckdb → sqlframe-1.1.0/tests/integration/engines/duck}/test_duckdb_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/postgres/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/redshift/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/snowflake/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/spark/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/test_engine_reader.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/test_engine_writer.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/engines/test_int_functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/fixtures.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/test_int_dataframe_stats.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/test_int_grouped_data.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/integration/test_int_session.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/__init__.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/fixtures.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/test_column.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/test_functions.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/test_types.py +0 -0
- {sqlframe-0.1.dev3 → sqlframe-1.1.0}/tests/unit/standalone/test_window.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
* @eakmanrq
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: SQLFrame
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches:
|
|
5
|
+
- main
|
|
6
|
+
paths:
|
|
7
|
+
- 'sqlframe/**'
|
|
8
|
+
- 'tests/**'
|
|
9
|
+
- 'Makefile'
|
|
10
|
+
- 'setup.py'
|
|
11
|
+
pull_request:
|
|
12
|
+
types:
|
|
13
|
+
- synchronize
|
|
14
|
+
- opened
|
|
15
|
+
paths:
|
|
16
|
+
- 'sqlframe/**'
|
|
17
|
+
- 'tests/**'
|
|
18
|
+
- 'Makefile'
|
|
19
|
+
- 'setup.py'
|
|
20
|
+
jobs:
|
|
21
|
+
run-tests:
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
env:
|
|
24
|
+
PYTEST_XDIST_AUTO_NUM_WORKERS: 4
|
|
25
|
+
strategy:
|
|
26
|
+
matrix:
|
|
27
|
+
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
|
|
28
|
+
steps:
|
|
29
|
+
- name: Checkout
|
|
30
|
+
uses: actions/checkout@v4
|
|
31
|
+
- name: Install Python
|
|
32
|
+
uses: actions/setup-python@v5
|
|
33
|
+
with:
|
|
34
|
+
python-version: ${{ matrix.python-version }}
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: make install-dev
|
|
37
|
+
- name: Run Style
|
|
38
|
+
run: make style
|
|
39
|
+
- name: Setup Postgres
|
|
40
|
+
uses: ikalnytskyi/action-setup-postgres@v6
|
|
41
|
+
- name: Run tests
|
|
42
|
+
run: make local-test
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
tags:
|
|
5
|
+
- 'v[0-9]+.[0-9]+.[0-9]+'
|
|
6
|
+
permissions:
|
|
7
|
+
contents: write
|
|
8
|
+
jobs:
|
|
9
|
+
deploy:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
env:
|
|
12
|
+
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
|
|
13
|
+
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
- name: Publish
|
|
18
|
+
run: make publish
|
|
19
|
+
- name: Create release
|
|
20
|
+
env:
|
|
21
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
22
|
+
tag: ${{ github.ref_name }}
|
|
23
|
+
run: |
|
|
24
|
+
gh release create "$tag" \
|
|
25
|
+
--repo="$GITHUB_REPOSITORY" \
|
|
26
|
+
--title="SQLFrame ${tag#v}" \
|
|
27
|
+
--generate-notes
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
install-dev:
|
|
2
|
-
pip install -e ".[dev]"
|
|
2
|
+
pip install -e ".[dev,duckdb,postgres,redshift,bigquery,snowflake,spark]"
|
|
3
3
|
|
|
4
4
|
install-pre-commit:
|
|
5
5
|
pre-commit install
|
|
@@ -8,10 +8,10 @@ slow-test:
|
|
|
8
8
|
pytest -n auto tests
|
|
9
9
|
|
|
10
10
|
fast-test:
|
|
11
|
-
pytest -n auto
|
|
11
|
+
pytest -n auto -m "fast"
|
|
12
12
|
|
|
13
13
|
local-test:
|
|
14
|
-
pytest -n auto -m "local"
|
|
14
|
+
pytest -n auto -m "fast or local"
|
|
15
15
|
|
|
16
16
|
bigquery-test:
|
|
17
17
|
pytest -n auto -m "bigquery"
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version:
|
|
4
|
-
Summary: PySpark
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Taking the Spark out of PySpark by converting to SQL
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
7
7
|
Author-email: eakmanrq@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
+
Platform: UNKNOWN
|
|
9
10
|
Classifier: Development Status :: 5 - Production/Stable
|
|
10
11
|
Classifier: Intended Audience :: Developers
|
|
11
12
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -15,61 +16,30 @@ Classifier: Programming Language :: SQL
|
|
|
15
16
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
17
|
Requires-Python: >=3.8
|
|
17
18
|
Description-Content-Type: text/markdown
|
|
18
|
-
License-File: LICENSE
|
|
19
|
-
Requires-Dist: prettytable
|
|
20
|
-
Requires-Dist: sqlglot
|
|
21
19
|
Provides-Extra: bigquery
|
|
22
|
-
Requires-Dist: google-cloud-bigquery[pandas]; extra == "bigquery"
|
|
23
|
-
Requires-Dist: google-cloud-bigquery-storage; extra == "bigquery"
|
|
24
20
|
Provides-Extra: dev
|
|
25
|
-
|
|
26
|
-
Requires-Dist: mkdocs==1.4.2; extra == "dev"
|
|
27
|
-
Requires-Dist: mkdocs-include-markdown-plugin==4.0.3; extra == "dev"
|
|
28
|
-
Requires-Dist: mkdocs-material==9.0.5; extra == "dev"
|
|
29
|
-
Requires-Dist: mkdocs-material-extensions==1.1.1; extra == "dev"
|
|
30
|
-
Requires-Dist: mypy; extra == "dev"
|
|
31
|
-
Requires-Dist: pandas; extra == "dev"
|
|
32
|
-
Requires-Dist: pymdown-extensions; extra == "dev"
|
|
33
|
-
Requires-Dist: psycopg; extra == "dev"
|
|
34
|
-
Requires-Dist: pyarrow; extra == "dev"
|
|
35
|
-
Requires-Dist: pyspark; extra == "dev"
|
|
36
|
-
Requires-Dist: pytest; extra == "dev"
|
|
37
|
-
Requires-Dist: pytest-postgresql; extra == "dev"
|
|
38
|
-
Requires-Dist: pytest-xdist; extra == "dev"
|
|
39
|
-
Requires-Dist: pre-commit; extra == "dev"
|
|
40
|
-
Requires-Dist: ruff; extra == "dev"
|
|
41
|
-
Requires-Dist: typing_extensions; extra == "dev"
|
|
42
|
-
Requires-Dist: types-psycopg2; extra == "dev"
|
|
21
|
+
Provides-Extra: docs
|
|
43
22
|
Provides-Extra: duckdb
|
|
44
|
-
Requires-Dist: duckdb; extra == "duckdb"
|
|
45
|
-
Requires-Dist: pandas; extra == "duckdb"
|
|
46
23
|
Provides-Extra: postgres
|
|
47
|
-
Requires-Dist: psycopg2; extra == "postgres"
|
|
48
24
|
Provides-Extra: redshift
|
|
49
|
-
Requires-Dist: redshift_connector; extra == "redshift"
|
|
50
25
|
Provides-Extra: snowflake
|
|
51
|
-
Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "snowflake"
|
|
52
26
|
Provides-Extra: spark
|
|
53
|
-
|
|
27
|
+
License-File: LICENSE
|
|
54
28
|
|
|
55
29
|
<div align="center">
|
|
56
|
-
<img src="docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
30
|
+
<img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
57
31
|
</div>
|
|
58
32
|
|
|
59
|
-

|
|
60
|
-
|
|
61
33
|
SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
|
|
62
34
|
|
|
63
|
-
|
|
35
|
+
SQLFrame currently supports the following engines (many more in development):
|
|
64
36
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
* [
|
|
68
|
-
* [DuckDB](docs/duckdb.md)
|
|
69
|
-
* [Postgres](docs/postgres.md)
|
|
37
|
+
* [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
|
|
38
|
+
* [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
|
|
39
|
+
* [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
|
|
70
40
|
|
|
71
41
|
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
72
|
-
* [Standalone](
|
|
42
|
+
* [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
|
|
73
43
|
|
|
74
44
|
SQLFrame is great for:
|
|
75
45
|
|
|
@@ -101,7 +71,7 @@ from sqlframe.bigquery import Window
|
|
|
101
71
|
|
|
102
72
|
session = BigQuerySession()
|
|
103
73
|
table_path = "bigquery-public-data.samples.natality"
|
|
104
|
-
#
|
|
74
|
+
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
105
75
|
df = (
|
|
106
76
|
session.table(table_path)
|
|
107
77
|
.where(F.col("ever_born") == 1)
|
|
@@ -118,17 +88,15 @@ df = (
|
|
|
118
88
|
)
|
|
119
89
|
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
120
90
|
.select(
|
|
121
|
-
F.col("year").alias("
|
|
122
|
-
F.format_number("num_single_child_families", 0).alias("
|
|
91
|
+
F.col("year").alias("year"),
|
|
92
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
123
93
|
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
124
94
|
)
|
|
125
95
|
.limit(5)
|
|
126
96
|
)
|
|
127
97
|
```
|
|
128
98
|
```python
|
|
129
|
-
df.sql()
|
|
130
|
-
```
|
|
131
|
-
```sql
|
|
99
|
+
>>> df.sql()
|
|
132
100
|
WITH `t94228` AS (
|
|
133
101
|
SELECT
|
|
134
102
|
`natality`.`year` AS `year`,
|
|
@@ -147,7 +115,7 @@ WITH `t94228` AS (
|
|
|
147
115
|
)
|
|
148
116
|
SELECT
|
|
149
117
|
`t39093`.`year` AS `year`,
|
|
150
|
-
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `
|
|
118
|
+
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
|
|
151
119
|
FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
|
|
152
120
|
FROM `t39093` AS `t39093`
|
|
153
121
|
ORDER BY
|
|
@@ -155,16 +123,16 @@ ORDER BY
|
|
|
155
123
|
LIMIT 5
|
|
156
124
|
```
|
|
157
125
|
```python
|
|
158
|
-
df.show()
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
|
163
|
-
|
|
164
|
-
|
|
|
165
|
-
|
|
|
166
|
-
|
|
|
167
|
-
|
|
168
|
-
| 1975 | 868,985 | 10.92 |
|
|
169
|
-
+------+-------------------------------------+----------------+
|
|
126
|
+
>>> df.show()
|
|
127
|
+
+------+---------------------------+----------------+
|
|
128
|
+
| year | new families single child | percent change |
|
|
129
|
+
+------+---------------------------+----------------+
|
|
130
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
131
|
+
| 1974 | 783,448 | 14.49 |
|
|
132
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
133
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
134
|
+
| 1975 | 868,985 | 10.92 |
|
|
135
|
+
+------+---------------------------+----------------+
|
|
170
136
|
```
|
|
137
|
+
|
|
138
|
+
|
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
<div align="center">
|
|
2
|
-
<img src="docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
2
|
+
<img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
3
3
|
</div>
|
|
4
4
|
|
|
5
|
-

|
|
6
|
-
|
|
7
5
|
SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
|
|
8
6
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
SQLFrame currently supports the following engines:
|
|
7
|
+
SQLFrame currently supports the following engines (many more in development):
|
|
12
8
|
|
|
13
|
-
* [BigQuery](
|
|
14
|
-
* [DuckDB](
|
|
15
|
-
* [Postgres](
|
|
9
|
+
* [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
|
|
10
|
+
* [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
|
|
11
|
+
* [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
|
|
16
12
|
|
|
17
13
|
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
18
|
-
* [Standalone](
|
|
14
|
+
* [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
|
|
19
15
|
|
|
20
16
|
SQLFrame is great for:
|
|
21
17
|
|
|
@@ -47,7 +43,7 @@ from sqlframe.bigquery import Window
|
|
|
47
43
|
|
|
48
44
|
session = BigQuerySession()
|
|
49
45
|
table_path = "bigquery-public-data.samples.natality"
|
|
50
|
-
#
|
|
46
|
+
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
51
47
|
df = (
|
|
52
48
|
session.table(table_path)
|
|
53
49
|
.where(F.col("ever_born") == 1)
|
|
@@ -64,17 +60,15 @@ df = (
|
|
|
64
60
|
)
|
|
65
61
|
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
66
62
|
.select(
|
|
67
|
-
F.col("year").alias("
|
|
68
|
-
F.format_number("num_single_child_families", 0).alias("
|
|
63
|
+
F.col("year").alias("year"),
|
|
64
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
69
65
|
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
70
66
|
)
|
|
71
67
|
.limit(5)
|
|
72
68
|
)
|
|
73
69
|
```
|
|
74
70
|
```python
|
|
75
|
-
df.sql()
|
|
76
|
-
```
|
|
77
|
-
```sql
|
|
71
|
+
>>> df.sql()
|
|
78
72
|
WITH `t94228` AS (
|
|
79
73
|
SELECT
|
|
80
74
|
`natality`.`year` AS `year`,
|
|
@@ -93,7 +87,7 @@ WITH `t94228` AS (
|
|
|
93
87
|
)
|
|
94
88
|
SELECT
|
|
95
89
|
`t39093`.`year` AS `year`,
|
|
96
|
-
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `
|
|
90
|
+
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
|
|
97
91
|
FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
|
|
98
92
|
FROM `t39093` AS `t39093`
|
|
99
93
|
ORDER BY
|
|
@@ -101,16 +95,14 @@ ORDER BY
|
|
|
101
95
|
LIMIT 5
|
|
102
96
|
```
|
|
103
97
|
```python
|
|
104
|
-
df.show()
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
|
109
|
-
|
|
110
|
-
|
|
|
111
|
-
|
|
|
112
|
-
|
|
|
113
|
-
|
|
114
|
-
| 1975 | 868,985 | 10.92 |
|
|
115
|
-
+------+-------------------------------------+----------------+
|
|
98
|
+
>>> df.show()
|
|
99
|
+
+------+---------------------------+----------------+
|
|
100
|
+
| year | new families single child | percent change |
|
|
101
|
+
+------+---------------------------+----------------+
|
|
102
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
103
|
+
| 1974 | 783,448 | 14.49 |
|
|
104
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
105
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
106
|
+
| 1975 | 868,985 | 10.92 |
|
|
107
|
+
+------+---------------------------+----------------+
|
|
116
108
|
```
|
|
@@ -9,7 +9,8 @@ SQL is the universal language that unites all data professionals, and it enables
|
|
|
9
9
|
Despite its strengths, SQL often seems ill-suited for maintaining data pipelines.
|
|
10
10
|
The language lacks support for abstracting common operations or unit testing specific segments of code, leading many to use Jinja as a makeshift solution.
|
|
11
11
|
Jinja SQL is to SQL what Pig Latin is to English - can be fun in small doses but impossible to understand at scale.
|
|
12
|
-
|
|
12
|
+
Furthermore, the repetitive nature of SQL, where columns must be specified repeatedly across operations, often leads to fatigue among data professionals.
|
|
13
|
+
This results in data professionals responding to the siren song of `SELECT *`, only to be later found drowning in the sea of non-determinism.
|
|
13
14
|
|
|
14
15
|
This has put data professionals in a tough spot: Do you write your pipelines in SQL to favor accessibility or Python to favor maintainability?
|
|
15
16
|
Well, starting today, you no longer have to choose.
|
|
@@ -25,7 +26,7 @@ You can finally have your cake and eat it too.
|
|
|
25
26
|
<img src="../docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="800"/>
|
|
26
27
|
</div>
|
|
27
28
|
|
|
28
|
-
SQLFrame revolutionizes how data professionals interact with SQL and PySpark DataFrames.
|
|
29
|
+
[SQLFrame](https://github.com/eakmanrq/sqlframe) revolutionizes how data professionals interact with SQL and PySpark DataFrames.
|
|
29
30
|
Unlike traditional PySpark, SQLFrame converts DataFrame operations directly into SQL, enabling real-time SQL script generation during development.
|
|
30
31
|
Here's how it works:
|
|
31
32
|
|
|
@@ -133,7 +134,7 @@ Therefore not only does SQLFrame make your DataFrame pipeline more accessible, i
|
|
|
133
134
|
<img src="images/you_get_pyspark_api.gif" alt="There is more" width="800"/>
|
|
134
135
|
</div>
|
|
135
136
|
|
|
136
|
-
SQLFrame currently supports BigQuery, DuckDB, and
|
|
137
|
-
For those interested in experimenting with SQL generation for other engines, the
|
|
137
|
+
SQLFrame currently supports [BigQuery](https://sqlframe.readthedocs.io/en/stable/bigquery/), [DuckDB](https://sqlframe.readthedocs.io/en/stable/duckdb/), and [Postgres](https://sqlframe.readthedocs.io/en/stable/postgres/), with Redshift, Snowflake, Spark, and Trino in development.
|
|
138
|
+
For those interested in experimenting with SQL generation for other engines, the [StandaloneSession](https://sqlframe.readthedocs.io/en/stable/standalone/) provides a flexible testing ground.
|
|
138
139
|
|
|
139
|
-
|
|
140
|
+
Checkout the [README](https://github.com/eakmanrq/sqlframe) for more information on how to get started with SQLFrame!
|
|
@@ -60,27 +60,34 @@ print(session.catalog.listColumns(table_path))
|
|
|
60
60
|
.where(F.col("ever_born") == 1)
|
|
61
61
|
.groupBy("year")
|
|
62
62
|
.agg(F.count("*").alias("num_single_child_families"))
|
|
63
|
-
.withColumn(
|
|
64
|
-
|
|
63
|
+
.withColumn(
|
|
64
|
+
"last_year_num_single_child_families",
|
|
65
|
+
F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
|
|
66
|
+
)
|
|
67
|
+
.withColumn(
|
|
68
|
+
"percent_change",
|
|
69
|
+
(F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
|
|
70
|
+
/ F.col("last_year_num_single_child_families")
|
|
71
|
+
)
|
|
65
72
|
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
66
73
|
.select(
|
|
67
|
-
F.col("year").alias("
|
|
68
|
-
F.format_number("num_single_child_families", 0).alias("
|
|
74
|
+
F.col("year").alias("year"),
|
|
75
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
69
76
|
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
70
77
|
)
|
|
71
78
|
.limit(5)
|
|
72
79
|
.show()
|
|
73
80
|
)
|
|
74
81
|
"""
|
|
75
|
-
|
|
76
|
-
| year |
|
|
77
|
-
|
|
78
|
-
| 1989 |
|
|
79
|
-
| 1974 |
|
|
80
|
-
| 1977 |
|
|
81
|
-
| 1985 |
|
|
82
|
-
| 1975 |
|
|
83
|
-
|
|
82
|
+
+------+---------------------------+----------------+
|
|
83
|
+
| year | new families single child | percent change |
|
|
84
|
+
+------+---------------------------+----------------+
|
|
85
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
86
|
+
| 1974 | 783,448 | 14.49 |
|
|
87
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
88
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
89
|
+
| 1975 | 868,985 | 10.92 |
|
|
90
|
+
+------+---------------------------+----------------+
|
|
84
91
|
"""
|
|
85
92
|
```
|
|
86
93
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
|
3
|
+
"rangeStrategy": "widen",
|
|
4
|
+
"extends": [
|
|
5
|
+
"config:recommended"
|
|
6
|
+
],
|
|
7
|
+
"ignoreDeps": [
|
|
8
|
+
"mkdocs-include-markdown-plugin",
|
|
9
|
+
"mkdocs",
|
|
10
|
+
"mkdocs-material",
|
|
11
|
+
"mkdocs-material-extensions",
|
|
12
|
+
"mkdocs-include-markdown-plugin"
|
|
13
|
+
],
|
|
14
|
+
"ignorePaths": [
|
|
15
|
+
"docs/**"
|
|
16
|
+
]
|
|
17
|
+
}
|
|
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="sqlframe",
|
|
5
|
-
description="PySpark
|
|
5
|
+
description="Taking the Spark out of PySpark by converting to SQL",
|
|
6
6
|
long_description=open("README.md").read(),
|
|
7
7
|
long_description_content_type="text/markdown",
|
|
8
8
|
url="https://github.com/eakmanrq/sqlframe",
|
|
@@ -19,49 +19,57 @@ setup(
|
|
|
19
19
|
setup_requires=["setuptools_scm"],
|
|
20
20
|
python_requires=">=3.8",
|
|
21
21
|
install_requires=[
|
|
22
|
-
"prettytable",
|
|
23
|
-
"sqlglot",
|
|
22
|
+
"prettytable<3.11.0",
|
|
23
|
+
"sqlglot>=24.0.0,<24.1",
|
|
24
24
|
],
|
|
25
25
|
extras_require={
|
|
26
26
|
"bigquery": [
|
|
27
|
-
"google-cloud-bigquery[pandas]",
|
|
28
|
-
"google-cloud-bigquery-storage",
|
|
27
|
+
"google-cloud-bigquery[pandas]>=3,<4",
|
|
28
|
+
"google-cloud-bigquery-storage>=2,<3",
|
|
29
|
+
"pandas>=2,<3",
|
|
29
30
|
],
|
|
30
31
|
"dev": [
|
|
31
|
-
"duckdb",
|
|
32
|
+
"duckdb>=0.9,<0.11",
|
|
33
|
+
"mypy>=1.10.0,<1.11",
|
|
34
|
+
"pandas>=2,<3",
|
|
35
|
+
"pandas-stubs>=2,<3",
|
|
36
|
+
"psycopg>=3.1,<4",
|
|
37
|
+
"pyarrow>=10,<17",
|
|
38
|
+
"pyspark>=2,<3.6",
|
|
39
|
+
"pytest>=8.2.0,<8.3",
|
|
40
|
+
"pytest-postgresql>=6,<7",
|
|
41
|
+
"pytest-xdist>=3.6,<3.7",
|
|
42
|
+
"pre-commit>=3.5;python_version=='3.8'",
|
|
43
|
+
"pre-commit>=3.7,<3.8;python_version>='3.9'",
|
|
44
|
+
"ruff>=0.4.4,<0.5",
|
|
45
|
+
"typing_extensions>=4.11,<5",
|
|
46
|
+
"types-psycopg2>=2.9,<3",
|
|
47
|
+
],
|
|
48
|
+
"docs": [
|
|
32
49
|
"mkdocs==1.4.2",
|
|
33
50
|
"mkdocs-include-markdown-plugin==4.0.3",
|
|
34
51
|
"mkdocs-material==9.0.5",
|
|
35
52
|
"mkdocs-material-extensions==1.1.1",
|
|
36
|
-
"mypy",
|
|
37
|
-
"pandas",
|
|
38
53
|
"pymdown-extensions",
|
|
39
|
-
"psycopg",
|
|
40
|
-
"pyarrow",
|
|
41
|
-
"pyspark",
|
|
42
|
-
"pytest",
|
|
43
|
-
"pytest-postgresql",
|
|
44
|
-
"pytest-xdist",
|
|
45
|
-
"pre-commit",
|
|
46
|
-
"ruff",
|
|
47
|
-
"typing_extensions",
|
|
48
|
-
"types-psycopg2",
|
|
49
54
|
],
|
|
50
55
|
"duckdb": [
|
|
51
|
-
"duckdb",
|
|
52
|
-
"pandas",
|
|
56
|
+
"duckdb>=0.9,<0.11",
|
|
57
|
+
"pandas>=2,<3",
|
|
53
58
|
],
|
|
54
59
|
"postgres": [
|
|
55
|
-
"
|
|
60
|
+
"pandas>=2,<3",
|
|
61
|
+
"psycopg2>=2.8,<3",
|
|
56
62
|
],
|
|
57
63
|
"redshift": [
|
|
58
|
-
"
|
|
64
|
+
"pandas>=2,<3",
|
|
65
|
+
"redshift_connector>=2.1.1,<2.2.0",
|
|
59
66
|
],
|
|
60
67
|
"snowflake": [
|
|
61
|
-
"
|
|
68
|
+
"pandas>=2,<3",
|
|
69
|
+
"snowflake-connector-python[pandas,secure-local-storage]>=3.10.0,<3.11",
|
|
62
70
|
],
|
|
63
71
|
"spark": [
|
|
64
|
-
"pyspark",
|
|
72
|
+
"pyspark>=2,<3.6",
|
|
65
73
|
],
|
|
66
74
|
},
|
|
67
75
|
classifiers=[
|
|
@@ -8,7 +8,7 @@ from sqlglot import MappingSchema, exp
|
|
|
8
8
|
|
|
9
9
|
from sqlframe.base.decorators import normalize
|
|
10
10
|
from sqlframe.base.exceptions import TableSchemaError
|
|
11
|
-
from sqlframe.base.util import to_schema
|
|
11
|
+
from sqlframe.base.util import ensure_column_mapping, to_schema
|
|
12
12
|
|
|
13
13
|
if t.TYPE_CHECKING:
|
|
14
14
|
from sqlglot.schema import ColumnMapping
|
|
@@ -82,6 +82,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
|
|
|
82
82
|
raise TableSchemaError(
|
|
83
83
|
"This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
|
|
84
84
|
)
|
|
85
|
+
column_mapping = ensure_column_mapping(column_mapping) # type: ignore
|
|
85
86
|
self._schema.add_table(table, column_mapping, dialect=self.session.input_dialect)
|
|
86
87
|
|
|
87
88
|
@normalize(["dbName"])
|
|
@@ -417,7 +417,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
417
417
|
from sqlframe.base.session import _BaseSession
|
|
418
418
|
|
|
419
419
|
value = expression.sql(dialect=_BaseSession().input_dialect).encode("utf-8")
|
|
420
|
-
hash = f"t{zlib.crc32(value)}"[:
|
|
420
|
+
hash = f"t{zlib.crc32(value)}"[:9]
|
|
421
421
|
return self.session._normalize_string(hash)
|
|
422
422
|
|
|
423
423
|
def _get_select_expressions(
|
|
@@ -606,8 +606,11 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
606
606
|
return df._convert_leaf_to_cte(sequence_id=new_sequence_id)
|
|
607
607
|
|
|
608
608
|
@operation(Operation.WHERE)
|
|
609
|
-
def where(self, column: t.Union[Column, bool], **kwargs) -> Self:
|
|
610
|
-
|
|
609
|
+
def where(self, column: t.Union[Column, str, bool], **kwargs) -> Self:
|
|
610
|
+
if isinstance(column, str):
|
|
611
|
+
col = sqlglot.parse_one(column, dialect=self.session.input_dialect)
|
|
612
|
+
else:
|
|
613
|
+
col = self._ensure_and_normalize_col(column)
|
|
611
614
|
return self.copy(expression=self.expression.where(col.expression))
|
|
612
615
|
|
|
613
616
|
filter = where
|
|
@@ -662,7 +665,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
662
665
|
| 16| Bob| 85|
|
|
663
666
|
+---+-----+------+
|
|
664
667
|
"""
|
|
665
|
-
return self.join.__wrapped__(self, other, how="cross")
|
|
668
|
+
return self.join.__wrapped__(self, other, how="cross") # type: ignore
|
|
666
669
|
|
|
667
670
|
@operation(Operation.FROM)
|
|
668
671
|
def join(
|
|
@@ -769,7 +772,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
769
772
|
new_df = self.copy(expression=join_expression)
|
|
770
773
|
new_df.pending_join_hints.extend(self.pending_join_hints)
|
|
771
774
|
new_df.pending_hints.extend(other_df.pending_hints)
|
|
772
|
-
new_df = new_df.select.__wrapped__(new_df, *select_column_names)
|
|
775
|
+
new_df = new_df.select.__wrapped__(new_df, *select_column_names) # type: ignore
|
|
773
776
|
return new_df
|
|
774
777
|
|
|
775
778
|
@operation(Operation.ORDER_BY)
|
|
@@ -1094,7 +1097,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
1094
1097
|
)
|
|
1095
1098
|
if existing_col_index:
|
|
1096
1099
|
expression = self.expression.copy()
|
|
1097
|
-
expression.expressions[existing_col_index] = col.expression
|
|
1100
|
+
expression.expressions[existing_col_index] = col.alias(colName).expression
|
|
1098
1101
|
return self.copy(expression=expression)
|
|
1099
1102
|
return self.copy().select(col.alias(colName), append=True)
|
|
1100
1103
|
|