sqlframe 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe-1.1.3/.github/CODEOWNERS +1 -0
- sqlframe-1.1.3/.github/workflows/main.workflow.yaml +42 -0
- sqlframe-1.1.3/.github/workflows/publish.workflow.yaml +27 -0
- sqlframe-1.1.3/.gitignore +145 -0
- sqlframe-1.1.3/.pre-commit-config.yaml +33 -0
- sqlframe-1.1.3/.readthedocs.yaml +13 -0
- sqlframe-1.1.3/LICENSE +21 -0
- sqlframe-1.1.3/Makefile +37 -0
- sqlframe-1.1.3/PKG-INFO +138 -0
- sqlframe-1.1.3/README.md +108 -0
- sqlframe-1.1.3/blogs/images/but_wait_theres_more.gif +0 -0
- sqlframe-1.1.3/blogs/images/cake.gif +0 -0
- sqlframe-1.1.3/blogs/images/you_get_pyspark_api.gif +0 -0
- sqlframe-1.1.3/blogs/sqlframe_universal_dataframe_api.md +140 -0
- sqlframe-1.1.3/docs/bigquery.md +505 -0
- sqlframe-1.1.3/docs/docs/bigquery.md +479 -0
- sqlframe-1.1.3/docs/docs/duckdb.md +466 -0
- sqlframe-1.1.3/docs/docs/images/SF.png +0 -0
- sqlframe-1.1.3/docs/docs/images/favicon.png +0 -0
- sqlframe-1.1.3/docs/docs/images/favicon_old.png +0 -0
- sqlframe-1.1.3/docs/docs/images/sqlframe_diagram.png +0 -0
- sqlframe-1.1.3/docs/docs/images/sqlframe_logo.png +0 -0
- sqlframe-1.1.3/docs/docs/postgres.md +430 -0
- sqlframe-1.1.3/docs/duckdb.md +468 -0
- sqlframe-1.1.3/docs/images/SF.png +0 -0
- sqlframe-1.1.3/docs/images/favicon.png +0 -0
- sqlframe-1.1.3/docs/images/favicon_old.png +0 -0
- sqlframe-1.1.3/docs/images/sqlframe_diagram.png +0 -0
- sqlframe-1.1.3/docs/images/sqlframe_logo.png +0 -0
- sqlframe-1.1.3/docs/index.md +1 -0
- sqlframe-1.1.3/docs/postgres.md +448 -0
- sqlframe-1.1.3/docs/requirements.txt +6 -0
- sqlframe-1.1.3/docs/standalone.md +468 -0
- sqlframe-1.1.3/docs/stylesheets/extra.css +17 -0
- sqlframe-1.1.3/mkdocs.yml +52 -0
- sqlframe-1.1.3/pytest.ini +7 -0
- sqlframe-1.1.3/renovate.json +17 -0
- sqlframe-1.1.3/setup.cfg +7 -0
- sqlframe-1.1.3/setup.py +84 -0
- sqlframe-1.1.3/sqlframe/LICENSE +260 -0
- sqlframe-1.1.3/sqlframe/__init__.py +0 -0
- sqlframe-1.1.3/sqlframe/_version.py +16 -0
- sqlframe-1.1.3/sqlframe/base/__init__.py +0 -0
- sqlframe-1.1.3/sqlframe/base/_typing.py +39 -0
- sqlframe-1.1.3/sqlframe/base/catalog.py +1163 -0
- sqlframe-1.1.3/sqlframe/base/column.py +388 -0
- sqlframe-1.1.3/sqlframe/base/dataframe.py +1519 -0
- sqlframe-1.1.3/sqlframe/base/decorators.py +51 -0
- sqlframe-1.1.3/sqlframe/base/exceptions.py +14 -0
- sqlframe-1.1.3/sqlframe/base/function_alternatives.py +1055 -0
- sqlframe-1.1.3/sqlframe/base/functions.py +1678 -0
- sqlframe-1.1.3/sqlframe/base/group.py +102 -0
- sqlframe-1.1.3/sqlframe/base/mixins/__init__.py +0 -0
- sqlframe-1.1.3/sqlframe/base/mixins/catalog_mixins.py +419 -0
- sqlframe-1.1.3/sqlframe/base/mixins/readwriter_mixins.py +118 -0
- sqlframe-1.1.3/sqlframe/base/normalize.py +84 -0
- sqlframe-1.1.3/sqlframe/base/operations.py +87 -0
- sqlframe-1.1.3/sqlframe/base/readerwriter.py +679 -0
- sqlframe-1.1.3/sqlframe/base/session.py +585 -0
- sqlframe-1.1.3/sqlframe/base/transforms.py +13 -0
- sqlframe-1.1.3/sqlframe/base/types.py +418 -0
- sqlframe-1.1.3/sqlframe/base/util.py +242 -0
- sqlframe-1.1.3/sqlframe/base/window.py +139 -0
- sqlframe-1.1.3/sqlframe/bigquery/__init__.py +23 -0
- sqlframe-1.1.3/sqlframe/bigquery/catalog.py +255 -0
- sqlframe-1.1.3/sqlframe/bigquery/column.py +1 -0
- sqlframe-1.1.3/sqlframe/bigquery/dataframe.py +54 -0
- sqlframe-1.1.3/sqlframe/bigquery/functions.py +378 -0
- sqlframe-1.1.3/sqlframe/bigquery/functions.pyi +269 -0
- sqlframe-1.1.3/sqlframe/bigquery/group.py +14 -0
- sqlframe-1.1.3/sqlframe/bigquery/readwriter.py +29 -0
- sqlframe-1.1.3/sqlframe/bigquery/session.py +89 -0
- sqlframe-1.1.3/sqlframe/bigquery/types.py +1 -0
- sqlframe-1.1.3/sqlframe/bigquery/window.py +1 -0
- sqlframe-1.1.3/sqlframe/duckdb/__init__.py +20 -0
- sqlframe-1.1.3/sqlframe/duckdb/catalog.py +108 -0
- sqlframe-1.1.3/sqlframe/duckdb/column.py +1 -0
- sqlframe-1.1.3/sqlframe/duckdb/dataframe.py +55 -0
- sqlframe-1.1.3/sqlframe/duckdb/functions.py +47 -0
- sqlframe-1.1.3/sqlframe/duckdb/functions.pyi +183 -0
- sqlframe-1.1.3/sqlframe/duckdb/group.py +14 -0
- sqlframe-1.1.3/sqlframe/duckdb/readwriter.py +111 -0
- sqlframe-1.1.3/sqlframe/duckdb/session.py +65 -0
- sqlframe-1.1.3/sqlframe/duckdb/types.py +1 -0
- sqlframe-1.1.3/sqlframe/duckdb/window.py +1 -0
- sqlframe-1.1.3/sqlframe/postgres/__init__.py +23 -0
- sqlframe-1.1.3/sqlframe/postgres/catalog.py +106 -0
- sqlframe-1.1.3/sqlframe/postgres/column.py +1 -0
- sqlframe-1.1.3/sqlframe/postgres/dataframe.py +54 -0
- sqlframe-1.1.3/sqlframe/postgres/functions.py +61 -0
- sqlframe-1.1.3/sqlframe/postgres/functions.pyi +167 -0
- sqlframe-1.1.3/sqlframe/postgres/group.py +14 -0
- sqlframe-1.1.3/sqlframe/postgres/readwriter.py +29 -0
- sqlframe-1.1.3/sqlframe/postgres/session.py +68 -0
- sqlframe-1.1.3/sqlframe/postgres/types.py +1 -0
- sqlframe-1.1.3/sqlframe/postgres/window.py +1 -0
- sqlframe-1.1.3/sqlframe/redshift/__init__.py +23 -0
- sqlframe-1.1.3/sqlframe/redshift/catalog.py +127 -0
- sqlframe-1.1.3/sqlframe/redshift/column.py +1 -0
- sqlframe-1.1.3/sqlframe/redshift/dataframe.py +54 -0
- sqlframe-1.1.3/sqlframe/redshift/functions.py +18 -0
- sqlframe-1.1.3/sqlframe/redshift/group.py +14 -0
- sqlframe-1.1.3/sqlframe/redshift/readwriter.py +29 -0
- sqlframe-1.1.3/sqlframe/redshift/session.py +53 -0
- sqlframe-1.1.3/sqlframe/redshift/types.py +1 -0
- sqlframe-1.1.3/sqlframe/redshift/window.py +1 -0
- sqlframe-1.1.3/sqlframe/snowflake/__init__.py +26 -0
- sqlframe-1.1.3/sqlframe/snowflake/catalog.py +134 -0
- sqlframe-1.1.3/sqlframe/snowflake/column.py +1 -0
- sqlframe-1.1.3/sqlframe/snowflake/dataframe.py +54 -0
- sqlframe-1.1.3/sqlframe/snowflake/functions.py +18 -0
- sqlframe-1.1.3/sqlframe/snowflake/group.py +14 -0
- sqlframe-1.1.3/sqlframe/snowflake/readwriter.py +29 -0
- sqlframe-1.1.3/sqlframe/snowflake/session.py +53 -0
- sqlframe-1.1.3/sqlframe/snowflake/types.py +1 -0
- sqlframe-1.1.3/sqlframe/snowflake/window.py +1 -0
- sqlframe-1.1.3/sqlframe/spark/__init__.py +23 -0
- sqlframe-1.1.3/sqlframe/spark/catalog.py +1028 -0
- sqlframe-1.1.3/sqlframe/spark/column.py +1 -0
- sqlframe-1.1.3/sqlframe/spark/dataframe.py +54 -0
- sqlframe-1.1.3/sqlframe/spark/functions.py +22 -0
- sqlframe-1.1.3/sqlframe/spark/group.py +14 -0
- sqlframe-1.1.3/sqlframe/spark/readwriter.py +29 -0
- sqlframe-1.1.3/sqlframe/spark/session.py +90 -0
- sqlframe-1.1.3/sqlframe/spark/types.py +1 -0
- sqlframe-1.1.3/sqlframe/spark/window.py +1 -0
- sqlframe-1.1.3/sqlframe/standalone/__init__.py +26 -0
- sqlframe-1.1.3/sqlframe/standalone/catalog.py +13 -0
- sqlframe-1.1.3/sqlframe/standalone/column.py +1 -0
- sqlframe-1.1.3/sqlframe/standalone/dataframe.py +36 -0
- sqlframe-1.1.3/sqlframe/standalone/functions.py +1 -0
- sqlframe-1.1.3/sqlframe/standalone/group.py +14 -0
- sqlframe-1.1.3/sqlframe/standalone/readwriter.py +19 -0
- sqlframe-1.1.3/sqlframe/standalone/session.py +40 -0
- sqlframe-1.1.3/sqlframe/standalone/types.py +1 -0
- sqlframe-1.1.3/sqlframe/standalone/window.py +1 -0
- sqlframe-1.1.3/sqlframe.egg-info/PKG-INFO +138 -0
- sqlframe-1.1.3/sqlframe.egg-info/SOURCES.txt +189 -0
- sqlframe-1.1.3/sqlframe.egg-info/dependency_links.txt +1 -0
- sqlframe-1.1.3/sqlframe.egg-info/requires.txt +54 -0
- sqlframe-1.1.3/sqlframe.egg-info/top_level.txt +1 -0
- sqlframe-1.1.3/tests/__init__.py +0 -0
- sqlframe-1.1.3/tests/common_fixtures.py +208 -0
- sqlframe-1.1.3/tests/conftest.py +29 -0
- sqlframe-1.1.3/tests/fixtures/employee.csv +6 -0
- sqlframe-1.1.3/tests/fixtures/employee.json +5 -0
- sqlframe-1.1.3/tests/fixtures/employee.parquet +0 -0
- sqlframe-1.1.3/tests/fixtures/employee_extra_line.csv +7 -0
- sqlframe-1.1.3/tests/integration/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/bigquery/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/bigquery/test_bigquery_catalog.py +343 -0
- sqlframe-1.1.3/tests/integration/engines/bigquery/test_bigquery_session.py +20 -0
- sqlframe-1.1.3/tests/integration/engines/duck/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/duck/test_duckdb_catalog.py +356 -0
- sqlframe-1.1.3/tests/integration/engines/duck/test_duckdb_reader.py +100 -0
- sqlframe-1.1.3/tests/integration/engines/duck/test_duckdb_session.py +13 -0
- sqlframe-1.1.3/tests/integration/engines/postgres/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/postgres/test_postgres_catalog.py +317 -0
- sqlframe-1.1.3/tests/integration/engines/postgres/test_postgres_session.py +19 -0
- sqlframe-1.1.3/tests/integration/engines/redshift/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/redshift/test_redshift_catalog.py +306 -0
- sqlframe-1.1.3/tests/integration/engines/redshift/test_redshift_session.py +47 -0
- sqlframe-1.1.3/tests/integration/engines/snowflake/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/snowflake/test_snowflake_catalog.py +333 -0
- sqlframe-1.1.3/tests/integration/engines/snowflake/test_snowflake_session.py +47 -0
- sqlframe-1.1.3/tests/integration/engines/spark/__init__.py +0 -0
- sqlframe-1.1.3/tests/integration/engines/spark/test_spark_catalog.py +244 -0
- sqlframe-1.1.3/tests/integration/engines/test_engine_dataframe.py +87 -0
- sqlframe-1.1.3/tests/integration/engines/test_engine_reader.py +131 -0
- sqlframe-1.1.3/tests/integration/engines/test_engine_session.py +47 -0
- sqlframe-1.1.3/tests/integration/engines/test_engine_writer.py +176 -0
- sqlframe-1.1.3/tests/integration/engines/test_int_functions.py +2688 -0
- sqlframe-1.1.3/tests/integration/fixtures.py +712 -0
- sqlframe-1.1.3/tests/integration/test_int_dataframe.py +1980 -0
- sqlframe-1.1.3/tests/integration/test_int_dataframe_stats.py +28 -0
- sqlframe-1.1.3/tests/integration/test_int_grouped_data.py +165 -0
- sqlframe-1.1.3/tests/integration/test_int_session.py +70 -0
- sqlframe-1.1.3/tests/types.py +6 -0
- sqlframe-1.1.3/tests/unit/__init__.py +0 -0
- sqlframe-1.1.3/tests/unit/standalone/__init__.py +0 -0
- sqlframe-1.1.3/tests/unit/standalone/fixtures.py +71 -0
- sqlframe-1.1.3/tests/unit/standalone/test_column.py +218 -0
- sqlframe-1.1.3/tests/unit/standalone/test_dataframe.py +85 -0
- sqlframe-1.1.3/tests/unit/standalone/test_dataframe_writer.py +107 -0
- sqlframe-1.1.3/tests/unit/standalone/test_functions.py +2792 -0
- sqlframe-1.1.3/tests/unit/standalone/test_session.py +138 -0
- sqlframe-1.1.3/tests/unit/standalone/test_session_case_sensitivity.py +110 -0
- sqlframe-1.1.3/tests/unit/standalone/test_types.py +38 -0
- sqlframe-1.1.3/tests/unit/standalone/test_window.py +45 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
* @eakmanrq
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
name: SQLFrame
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches:
|
|
5
|
+
- main
|
|
6
|
+
paths:
|
|
7
|
+
- 'sqlframe/**'
|
|
8
|
+
- 'tests/**'
|
|
9
|
+
- 'Makefile'
|
|
10
|
+
- 'setup.py'
|
|
11
|
+
pull_request:
|
|
12
|
+
types:
|
|
13
|
+
- synchronize
|
|
14
|
+
- opened
|
|
15
|
+
paths:
|
|
16
|
+
- 'sqlframe/**'
|
|
17
|
+
- 'tests/**'
|
|
18
|
+
- 'Makefile'
|
|
19
|
+
- 'setup.py'
|
|
20
|
+
jobs:
|
|
21
|
+
run-tests:
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
env:
|
|
24
|
+
PYTEST_XDIST_AUTO_NUM_WORKERS: 4
|
|
25
|
+
strategy:
|
|
26
|
+
matrix:
|
|
27
|
+
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
|
|
28
|
+
steps:
|
|
29
|
+
- name: Checkout
|
|
30
|
+
uses: actions/checkout@v4
|
|
31
|
+
- name: Install Python
|
|
32
|
+
uses: actions/setup-python@v5
|
|
33
|
+
with:
|
|
34
|
+
python-version: ${{ matrix.python-version }}
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: make install-dev
|
|
37
|
+
- name: Run Style
|
|
38
|
+
run: make style
|
|
39
|
+
- name: Setup Postgres
|
|
40
|
+
uses: ikalnytskyi/action-setup-postgres@v6
|
|
41
|
+
- name: Run tests
|
|
42
|
+
run: make local-test
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
tags:
|
|
5
|
+
- 'v[0-9]+.[0-9]+.[0-9]+'
|
|
6
|
+
permissions:
|
|
7
|
+
contents: write
|
|
8
|
+
jobs:
|
|
9
|
+
deploy:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
env:
|
|
12
|
+
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
|
|
13
|
+
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
- name: Publish
|
|
18
|
+
run: make publish
|
|
19
|
+
- name: Create release
|
|
20
|
+
env:
|
|
21
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
22
|
+
tag: ${{ github.ref_name }}
|
|
23
|
+
run: |
|
|
24
|
+
gh release create "$tag" \
|
|
25
|
+
--repo="$GITHUB_REPOSITORY" \
|
|
26
|
+
--title="SQLFrame ${tag#v}" \
|
|
27
|
+
--generate-notes
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
pip-wheel-metadata/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
# Usually these files are written by a python script from a template
|
|
32
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
33
|
+
*.manifest
|
|
34
|
+
*.spec
|
|
35
|
+
|
|
36
|
+
# Installer logs
|
|
37
|
+
pip-log.txt
|
|
38
|
+
pip-delete-this-directory.txt
|
|
39
|
+
|
|
40
|
+
# Unit test / coverage reports
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
.coverage
|
|
45
|
+
.coverage.*
|
|
46
|
+
.cache
|
|
47
|
+
nosetests.xml
|
|
48
|
+
coverage.xml
|
|
49
|
+
*.cover
|
|
50
|
+
*.py,cover
|
|
51
|
+
.hypothesis/
|
|
52
|
+
.pytest_cache/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
target/
|
|
76
|
+
|
|
77
|
+
# Jupyter Notebook
|
|
78
|
+
.ipynb_checkpoints
|
|
79
|
+
|
|
80
|
+
# IPython
|
|
81
|
+
profile_default/
|
|
82
|
+
ipython_config.py
|
|
83
|
+
|
|
84
|
+
# pyenv
|
|
85
|
+
.python-version
|
|
86
|
+
|
|
87
|
+
# pipenv
|
|
88
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
89
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
90
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
91
|
+
# install all needed dependencies.
|
|
92
|
+
#Pipfile.lock
|
|
93
|
+
|
|
94
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
95
|
+
__pypackages__/
|
|
96
|
+
|
|
97
|
+
# Celery stuff
|
|
98
|
+
celerybeat-schedule
|
|
99
|
+
celerybeat.pid
|
|
100
|
+
|
|
101
|
+
# SageMath parsed files
|
|
102
|
+
*.sage.py
|
|
103
|
+
|
|
104
|
+
# Environments
|
|
105
|
+
.env
|
|
106
|
+
.venv
|
|
107
|
+
env/
|
|
108
|
+
venv/
|
|
109
|
+
ENV/
|
|
110
|
+
env.bak/
|
|
111
|
+
venv.bak/
|
|
112
|
+
|
|
113
|
+
# Spyder project settings
|
|
114
|
+
.spyderproject
|
|
115
|
+
.spyproject
|
|
116
|
+
|
|
117
|
+
# Rope project settings
|
|
118
|
+
.ropeproject
|
|
119
|
+
|
|
120
|
+
# mkdocs documentation
|
|
121
|
+
/site
|
|
122
|
+
|
|
123
|
+
# mypy
|
|
124
|
+
.mypy_cache/
|
|
125
|
+
.dmypy.json
|
|
126
|
+
dmypy.json
|
|
127
|
+
|
|
128
|
+
# Pyre type checker
|
|
129
|
+
.pyre/
|
|
130
|
+
|
|
131
|
+
# PyCharm
|
|
132
|
+
.idea/
|
|
133
|
+
|
|
134
|
+
# Visual Studio Code
|
|
135
|
+
.vscode
|
|
136
|
+
|
|
137
|
+
.DS_STORE
|
|
138
|
+
metastore_db
|
|
139
|
+
spark_warehouse
|
|
140
|
+
|
|
141
|
+
# Version file
|
|
142
|
+
sqlframe/_version.py
|
|
143
|
+
|
|
144
|
+
# Emacs files
|
|
145
|
+
*~
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: local
|
|
3
|
+
hooks:
|
|
4
|
+
- id: ruff
|
|
5
|
+
name: ruff
|
|
6
|
+
description: "Run 'ruff' for extremely fast Python linting"
|
|
7
|
+
entry: ruff check
|
|
8
|
+
--force-exclude
|
|
9
|
+
--fix
|
|
10
|
+
--select I
|
|
11
|
+
--ignore E721
|
|
12
|
+
--ignore E741
|
|
13
|
+
language: python
|
|
14
|
+
types_or: [python, pyi]
|
|
15
|
+
require_serial: true
|
|
16
|
+
additional_dependencies: []
|
|
17
|
+
files: ^(sqlframe/|tests/|setup.py)
|
|
18
|
+
- id: ruff-format
|
|
19
|
+
name: ruff-format
|
|
20
|
+
description: "Run 'ruff format' for extremely fast Python formatting"
|
|
21
|
+
entry: ruff format
|
|
22
|
+
--force-exclude
|
|
23
|
+
--line-length 100
|
|
24
|
+
language: python
|
|
25
|
+
types_or: [python, pyi]
|
|
26
|
+
require_serial: true
|
|
27
|
+
- id: mypy
|
|
28
|
+
name: mypy
|
|
29
|
+
entry: mypy sqlframe tests
|
|
30
|
+
language: system
|
|
31
|
+
types: [ python ]
|
|
32
|
+
files: ^(sqlframe/|tests/)
|
|
33
|
+
pass_filenames: false
|
sqlframe-1.1.3/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Ryan Eakman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sqlframe-1.1.3/Makefile
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
install-dev:
|
|
2
|
+
pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
|
|
3
|
+
|
|
4
|
+
install-pre-commit:
|
|
5
|
+
pre-commit install
|
|
6
|
+
|
|
7
|
+
slow-test:
|
|
8
|
+
pytest -n auto tests
|
|
9
|
+
|
|
10
|
+
fast-test:
|
|
11
|
+
pytest -n auto -m "fast"
|
|
12
|
+
|
|
13
|
+
local-test:
|
|
14
|
+
pytest -n auto -m "fast or local"
|
|
15
|
+
|
|
16
|
+
bigquery-test:
|
|
17
|
+
pytest -n auto -m "bigquery"
|
|
18
|
+
|
|
19
|
+
duckdb-test:
|
|
20
|
+
pytest -n auto -m "duckdb"
|
|
21
|
+
|
|
22
|
+
style:
|
|
23
|
+
pre-commit run --all-files
|
|
24
|
+
|
|
25
|
+
docs-serve:
|
|
26
|
+
mkdocs serve
|
|
27
|
+
|
|
28
|
+
stubs:
|
|
29
|
+
stubgen sqlframe/bigquery/functions.py --output ./ --inspect-mode
|
|
30
|
+
stubgen sqlframe/duckdb/functions.py --output ./ --inspect-mode
|
|
31
|
+
stubgen sqlframe/postgres/functions.py --output ./ --inspect-mode
|
|
32
|
+
|
|
33
|
+
package:
|
|
34
|
+
pip3 install wheel && python3 setup.py sdist bdist_wheel
|
|
35
|
+
|
|
36
|
+
publish: package
|
|
37
|
+
pip3 install twine && python3 -m twine upload dist/*
|
sqlframe-1.1.3/PKG-INFO
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: sqlframe
|
|
3
|
+
Version: 1.1.3
|
|
4
|
+
Summary: Taking the Spark out of PySpark by converting to SQL
|
|
5
|
+
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
|
+
Author: Ryan Eakman
|
|
7
|
+
Author-email: eakmanrq@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Platform: UNKNOWN
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: SQL
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Requires-Python: >=3.8
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Provides-Extra: bigquery
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Provides-Extra: docs
|
|
22
|
+
Provides-Extra: duckdb
|
|
23
|
+
Provides-Extra: postgres
|
|
24
|
+
Provides-Extra: redshift
|
|
25
|
+
Provides-Extra: snowflake
|
|
26
|
+
Provides-Extra: spark
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
|
|
29
|
+
<div align="center">
|
|
30
|
+
<img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
31
|
+
</div>
|
|
32
|
+
|
|
33
|
+
SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
|
|
34
|
+
|
|
35
|
+
SQLFrame currently supports the following engines (many more in development):
|
|
36
|
+
|
|
37
|
+
* [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
|
|
38
|
+
* [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
|
|
39
|
+
* [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
|
|
40
|
+
|
|
41
|
+
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
42
|
+
* [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
|
|
43
|
+
|
|
44
|
+
SQLFrame is great for:
|
|
45
|
+
|
|
46
|
+
* Users who want to run PySpark DataFrame code without having to use a Spark cluster
|
|
47
|
+
* Users who want a SQL representation of their DataFrame code for debugging or sharing with others
|
|
48
|
+
* Users who want a DataFrame API that leverages the full power of their engine to do the processing
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# BigQuery
|
|
54
|
+
pip install "sqlframe[bigquery]"
|
|
55
|
+
# DuckDB
|
|
56
|
+
pip install "sqlframe[duckdb]"
|
|
57
|
+
# Postgres
|
|
58
|
+
pip install "sqlframe[postgres]"
|
|
59
|
+
# Standalone
|
|
60
|
+
pip install sqlframe
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
See specific engine documentation for additional setup instructions.
|
|
64
|
+
|
|
65
|
+
## Example Usage
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from sqlframe.bigquery import BigQuerySession
|
|
69
|
+
from sqlframe.bigquery import functions as F
|
|
70
|
+
from sqlframe.bigquery import Window
|
|
71
|
+
|
|
72
|
+
session = BigQuerySession()
|
|
73
|
+
table_path = "bigquery-public-data.samples.natality"
|
|
74
|
+
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
75
|
+
df = (
|
|
76
|
+
session.table(table_path)
|
|
77
|
+
.where(F.col("ever_born") == 1)
|
|
78
|
+
.groupBy("year")
|
|
79
|
+
.agg(F.count("*").alias("num_single_child_families"))
|
|
80
|
+
.withColumn(
|
|
81
|
+
"last_year_num_single_child_families",
|
|
82
|
+
F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
|
|
83
|
+
)
|
|
84
|
+
.withColumn(
|
|
85
|
+
"percent_change",
|
|
86
|
+
(F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
|
|
87
|
+
/ F.col("last_year_num_single_child_families")
|
|
88
|
+
)
|
|
89
|
+
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
90
|
+
.select(
|
|
91
|
+
F.col("year").alias("year"),
|
|
92
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
93
|
+
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
94
|
+
)
|
|
95
|
+
.limit(5)
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
```python
|
|
99
|
+
>>> df.sql()
|
|
100
|
+
WITH `t94228` AS (
|
|
101
|
+
SELECT
|
|
102
|
+
`natality`.`year` AS `year`,
|
|
103
|
+
COUNT(*) AS `num_single_child_families`
|
|
104
|
+
FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
|
|
105
|
+
WHERE
|
|
106
|
+
`natality`.`ever_born` = 1
|
|
107
|
+
GROUP BY
|
|
108
|
+
`natality`.`year`
|
|
109
|
+
), `t39093` AS (
|
|
110
|
+
SELECT
|
|
111
|
+
`t94228`.`year` AS `year`,
|
|
112
|
+
`t94228`.`num_single_child_families` AS `num_single_child_families`,
|
|
113
|
+
LAG(`t94228`.`num_single_child_families`, 1) OVER (ORDER BY `t94228`.`year`) AS `last_year_num_single_child_families`
|
|
114
|
+
FROM `t94228` AS `t94228`
|
|
115
|
+
)
|
|
116
|
+
SELECT
|
|
117
|
+
`t39093`.`year` AS `year`,
|
|
118
|
+
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
|
|
119
|
+
FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
|
|
120
|
+
FROM `t39093` AS `t39093`
|
|
121
|
+
ORDER BY
|
|
122
|
+
ABS(`percent_change`) DESC
|
|
123
|
+
LIMIT 5
|
|
124
|
+
```
|
|
125
|
+
```python
|
|
126
|
+
>>> df.show()
|
|
127
|
+
+------+---------------------------+----------------+
|
|
128
|
+
| year | new families single child | percent change |
|
|
129
|
+
+------+---------------------------+----------------+
|
|
130
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
131
|
+
| 1974 | 783,448 | 14.49 |
|
|
132
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
133
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
134
|
+
| 1975 | 868,985 | 10.92 |
|
|
135
|
+
+------+---------------------------+----------------+
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
|
sqlframe-1.1.3/README.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
|
|
3
|
+
</div>
|
|
4
|
+
|
|
5
|
+
SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
|
|
6
|
+
|
|
7
|
+
SQLFrame currently supports the following engines (many more in development):
|
|
8
|
+
|
|
9
|
+
* [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
|
|
10
|
+
* [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
|
|
11
|
+
* [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
|
|
12
|
+
|
|
13
|
+
SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
|
|
14
|
+
* [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
|
|
15
|
+
|
|
16
|
+
SQLFrame is great for:
|
|
17
|
+
|
|
18
|
+
* Users who want to run PySpark DataFrame code without having to use a Spark cluster
|
|
19
|
+
* Users who want a SQL representation of their DataFrame code for debugging or sharing with others
|
|
20
|
+
* Users who want a DataFrame API that leverages the full power of their engine to do the processing
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# BigQuery
|
|
26
|
+
pip install "sqlframe[bigquery]"
|
|
27
|
+
# DuckDB
|
|
28
|
+
pip install "sqlframe[duckdb]"
|
|
29
|
+
# Postgres
|
|
30
|
+
pip install "sqlframe[postgres]"
|
|
31
|
+
# Standalone
|
|
32
|
+
pip install sqlframe
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
See specific engine documentation for additional setup instructions.
|
|
36
|
+
|
|
37
|
+
## Example Usage
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from sqlframe.bigquery import BigQuerySession
|
|
41
|
+
from sqlframe.bigquery import functions as F
|
|
42
|
+
from sqlframe.bigquery import Window
|
|
43
|
+
|
|
44
|
+
session = BigQuerySession()
|
|
45
|
+
table_path = "bigquery-public-data.samples.natality"
|
|
46
|
+
# Top 5 years with the greatest year-over-year % change in new families with single child
|
|
47
|
+
df = (
|
|
48
|
+
session.table(table_path)
|
|
49
|
+
.where(F.col("ever_born") == 1)
|
|
50
|
+
.groupBy("year")
|
|
51
|
+
.agg(F.count("*").alias("num_single_child_families"))
|
|
52
|
+
.withColumn(
|
|
53
|
+
"last_year_num_single_child_families",
|
|
54
|
+
F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
|
|
55
|
+
)
|
|
56
|
+
.withColumn(
|
|
57
|
+
"percent_change",
|
|
58
|
+
(F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
|
|
59
|
+
/ F.col("last_year_num_single_child_families")
|
|
60
|
+
)
|
|
61
|
+
.orderBy(F.abs(F.col("percent_change")).desc())
|
|
62
|
+
.select(
|
|
63
|
+
F.col("year").alias("year"),
|
|
64
|
+
F.format_number("num_single_child_families", 0).alias("new families single child"),
|
|
65
|
+
F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
|
|
66
|
+
)
|
|
67
|
+
.limit(5)
|
|
68
|
+
)
|
|
69
|
+
```
|
|
70
|
+
```python
|
|
71
|
+
>>> df.sql()
|
|
72
|
+
WITH `t94228` AS (
|
|
73
|
+
SELECT
|
|
74
|
+
`natality`.`year` AS `year`,
|
|
75
|
+
COUNT(*) AS `num_single_child_families`
|
|
76
|
+
FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
|
|
77
|
+
WHERE
|
|
78
|
+
`natality`.`ever_born` = 1
|
|
79
|
+
GROUP BY
|
|
80
|
+
`natality`.`year`
|
|
81
|
+
), `t39093` AS (
|
|
82
|
+
SELECT
|
|
83
|
+
`t94228`.`year` AS `year`,
|
|
84
|
+
`t94228`.`num_single_child_families` AS `num_single_child_families`,
|
|
85
|
+
LAG(`t94228`.`num_single_child_families`, 1) OVER (ORDER BY `t94228`.`year`) AS `last_year_num_single_child_families`
|
|
86
|
+
FROM `t94228` AS `t94228`
|
|
87
|
+
)
|
|
88
|
+
SELECT
|
|
89
|
+
`t39093`.`year` AS `year`,
|
|
90
|
+
FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
|
|
91
|
+
FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
|
|
92
|
+
FROM `t39093` AS `t39093`
|
|
93
|
+
ORDER BY
|
|
94
|
+
ABS(`percent_change`) DESC
|
|
95
|
+
LIMIT 5
|
|
96
|
+
```
|
|
97
|
+
```python
|
|
98
|
+
>>> df.show()
|
|
99
|
+
+------+---------------------------+----------------+
|
|
100
|
+
| year | new families single child | percent change |
|
|
101
|
+
+------+---------------------------+----------------+
|
|
102
|
+
| 1989 | 1,650,246 | 25.02 |
|
|
103
|
+
| 1974 | 783,448 | 14.49 |
|
|
104
|
+
| 1977 | 1,057,379 | 11.38 |
|
|
105
|
+
| 1985 | 1,308,476 | 11.15 |
|
|
106
|
+
| 1975 | 868,985 | 10.92 |
|
|
107
|
+
+------+---------------------------+----------------+
|
|
108
|
+
```
|
|
Binary file
|
|
Binary file
|
|
Binary file
|