sqlframe 1.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. sqlframe-1.1.3/.github/CODEOWNERS +1 -0
  2. sqlframe-1.1.3/.github/workflows/main.workflow.yaml +42 -0
  3. sqlframe-1.1.3/.github/workflows/publish.workflow.yaml +27 -0
  4. sqlframe-1.1.3/.gitignore +145 -0
  5. sqlframe-1.1.3/.pre-commit-config.yaml +33 -0
  6. sqlframe-1.1.3/.readthedocs.yaml +13 -0
  7. sqlframe-1.1.3/LICENSE +21 -0
  8. sqlframe-1.1.3/Makefile +37 -0
  9. sqlframe-1.1.3/PKG-INFO +138 -0
  10. sqlframe-1.1.3/README.md +108 -0
  11. sqlframe-1.1.3/blogs/images/but_wait_theres_more.gif +0 -0
  12. sqlframe-1.1.3/blogs/images/cake.gif +0 -0
  13. sqlframe-1.1.3/blogs/images/you_get_pyspark_api.gif +0 -0
  14. sqlframe-1.1.3/blogs/sqlframe_universal_dataframe_api.md +140 -0
  15. sqlframe-1.1.3/docs/bigquery.md +505 -0
  16. sqlframe-1.1.3/docs/docs/bigquery.md +479 -0
  17. sqlframe-1.1.3/docs/docs/duckdb.md +466 -0
  18. sqlframe-1.1.3/docs/docs/images/SF.png +0 -0
  19. sqlframe-1.1.3/docs/docs/images/favicon.png +0 -0
  20. sqlframe-1.1.3/docs/docs/images/favicon_old.png +0 -0
  21. sqlframe-1.1.3/docs/docs/images/sqlframe_diagram.png +0 -0
  22. sqlframe-1.1.3/docs/docs/images/sqlframe_logo.png +0 -0
  23. sqlframe-1.1.3/docs/docs/postgres.md +430 -0
  24. sqlframe-1.1.3/docs/duckdb.md +468 -0
  25. sqlframe-1.1.3/docs/images/SF.png +0 -0
  26. sqlframe-1.1.3/docs/images/favicon.png +0 -0
  27. sqlframe-1.1.3/docs/images/favicon_old.png +0 -0
  28. sqlframe-1.1.3/docs/images/sqlframe_diagram.png +0 -0
  29. sqlframe-1.1.3/docs/images/sqlframe_logo.png +0 -0
  30. sqlframe-1.1.3/docs/index.md +1 -0
  31. sqlframe-1.1.3/docs/postgres.md +448 -0
  32. sqlframe-1.1.3/docs/requirements.txt +6 -0
  33. sqlframe-1.1.3/docs/standalone.md +468 -0
  34. sqlframe-1.1.3/docs/stylesheets/extra.css +17 -0
  35. sqlframe-1.1.3/mkdocs.yml +52 -0
  36. sqlframe-1.1.3/pytest.ini +7 -0
  37. sqlframe-1.1.3/renovate.json +17 -0
  38. sqlframe-1.1.3/setup.cfg +7 -0
  39. sqlframe-1.1.3/setup.py +84 -0
  40. sqlframe-1.1.3/sqlframe/LICENSE +260 -0
  41. sqlframe-1.1.3/sqlframe/__init__.py +0 -0
  42. sqlframe-1.1.3/sqlframe/_version.py +16 -0
  43. sqlframe-1.1.3/sqlframe/base/__init__.py +0 -0
  44. sqlframe-1.1.3/sqlframe/base/_typing.py +39 -0
  45. sqlframe-1.1.3/sqlframe/base/catalog.py +1163 -0
  46. sqlframe-1.1.3/sqlframe/base/column.py +388 -0
  47. sqlframe-1.1.3/sqlframe/base/dataframe.py +1519 -0
  48. sqlframe-1.1.3/sqlframe/base/decorators.py +51 -0
  49. sqlframe-1.1.3/sqlframe/base/exceptions.py +14 -0
  50. sqlframe-1.1.3/sqlframe/base/function_alternatives.py +1055 -0
  51. sqlframe-1.1.3/sqlframe/base/functions.py +1678 -0
  52. sqlframe-1.1.3/sqlframe/base/group.py +102 -0
  53. sqlframe-1.1.3/sqlframe/base/mixins/__init__.py +0 -0
  54. sqlframe-1.1.3/sqlframe/base/mixins/catalog_mixins.py +419 -0
  55. sqlframe-1.1.3/sqlframe/base/mixins/readwriter_mixins.py +118 -0
  56. sqlframe-1.1.3/sqlframe/base/normalize.py +84 -0
  57. sqlframe-1.1.3/sqlframe/base/operations.py +87 -0
  58. sqlframe-1.1.3/sqlframe/base/readerwriter.py +679 -0
  59. sqlframe-1.1.3/sqlframe/base/session.py +585 -0
  60. sqlframe-1.1.3/sqlframe/base/transforms.py +13 -0
  61. sqlframe-1.1.3/sqlframe/base/types.py +418 -0
  62. sqlframe-1.1.3/sqlframe/base/util.py +242 -0
  63. sqlframe-1.1.3/sqlframe/base/window.py +139 -0
  64. sqlframe-1.1.3/sqlframe/bigquery/__init__.py +23 -0
  65. sqlframe-1.1.3/sqlframe/bigquery/catalog.py +255 -0
  66. sqlframe-1.1.3/sqlframe/bigquery/column.py +1 -0
  67. sqlframe-1.1.3/sqlframe/bigquery/dataframe.py +54 -0
  68. sqlframe-1.1.3/sqlframe/bigquery/functions.py +378 -0
  69. sqlframe-1.1.3/sqlframe/bigquery/functions.pyi +269 -0
  70. sqlframe-1.1.3/sqlframe/bigquery/group.py +14 -0
  71. sqlframe-1.1.3/sqlframe/bigquery/readwriter.py +29 -0
  72. sqlframe-1.1.3/sqlframe/bigquery/session.py +89 -0
  73. sqlframe-1.1.3/sqlframe/bigquery/types.py +1 -0
  74. sqlframe-1.1.3/sqlframe/bigquery/window.py +1 -0
  75. sqlframe-1.1.3/sqlframe/duckdb/__init__.py +20 -0
  76. sqlframe-1.1.3/sqlframe/duckdb/catalog.py +108 -0
  77. sqlframe-1.1.3/sqlframe/duckdb/column.py +1 -0
  78. sqlframe-1.1.3/sqlframe/duckdb/dataframe.py +55 -0
  79. sqlframe-1.1.3/sqlframe/duckdb/functions.py +47 -0
  80. sqlframe-1.1.3/sqlframe/duckdb/functions.pyi +183 -0
  81. sqlframe-1.1.3/sqlframe/duckdb/group.py +14 -0
  82. sqlframe-1.1.3/sqlframe/duckdb/readwriter.py +111 -0
  83. sqlframe-1.1.3/sqlframe/duckdb/session.py +65 -0
  84. sqlframe-1.1.3/sqlframe/duckdb/types.py +1 -0
  85. sqlframe-1.1.3/sqlframe/duckdb/window.py +1 -0
  86. sqlframe-1.1.3/sqlframe/postgres/__init__.py +23 -0
  87. sqlframe-1.1.3/sqlframe/postgres/catalog.py +106 -0
  88. sqlframe-1.1.3/sqlframe/postgres/column.py +1 -0
  89. sqlframe-1.1.3/sqlframe/postgres/dataframe.py +54 -0
  90. sqlframe-1.1.3/sqlframe/postgres/functions.py +61 -0
  91. sqlframe-1.1.3/sqlframe/postgres/functions.pyi +167 -0
  92. sqlframe-1.1.3/sqlframe/postgres/group.py +14 -0
  93. sqlframe-1.1.3/sqlframe/postgres/readwriter.py +29 -0
  94. sqlframe-1.1.3/sqlframe/postgres/session.py +68 -0
  95. sqlframe-1.1.3/sqlframe/postgres/types.py +1 -0
  96. sqlframe-1.1.3/sqlframe/postgres/window.py +1 -0
  97. sqlframe-1.1.3/sqlframe/redshift/__init__.py +23 -0
  98. sqlframe-1.1.3/sqlframe/redshift/catalog.py +127 -0
  99. sqlframe-1.1.3/sqlframe/redshift/column.py +1 -0
  100. sqlframe-1.1.3/sqlframe/redshift/dataframe.py +54 -0
  101. sqlframe-1.1.3/sqlframe/redshift/functions.py +18 -0
  102. sqlframe-1.1.3/sqlframe/redshift/group.py +14 -0
  103. sqlframe-1.1.3/sqlframe/redshift/readwriter.py +29 -0
  104. sqlframe-1.1.3/sqlframe/redshift/session.py +53 -0
  105. sqlframe-1.1.3/sqlframe/redshift/types.py +1 -0
  106. sqlframe-1.1.3/sqlframe/redshift/window.py +1 -0
  107. sqlframe-1.1.3/sqlframe/snowflake/__init__.py +26 -0
  108. sqlframe-1.1.3/sqlframe/snowflake/catalog.py +134 -0
  109. sqlframe-1.1.3/sqlframe/snowflake/column.py +1 -0
  110. sqlframe-1.1.3/sqlframe/snowflake/dataframe.py +54 -0
  111. sqlframe-1.1.3/sqlframe/snowflake/functions.py +18 -0
  112. sqlframe-1.1.3/sqlframe/snowflake/group.py +14 -0
  113. sqlframe-1.1.3/sqlframe/snowflake/readwriter.py +29 -0
  114. sqlframe-1.1.3/sqlframe/snowflake/session.py +53 -0
  115. sqlframe-1.1.3/sqlframe/snowflake/types.py +1 -0
  116. sqlframe-1.1.3/sqlframe/snowflake/window.py +1 -0
  117. sqlframe-1.1.3/sqlframe/spark/__init__.py +23 -0
  118. sqlframe-1.1.3/sqlframe/spark/catalog.py +1028 -0
  119. sqlframe-1.1.3/sqlframe/spark/column.py +1 -0
  120. sqlframe-1.1.3/sqlframe/spark/dataframe.py +54 -0
  121. sqlframe-1.1.3/sqlframe/spark/functions.py +22 -0
  122. sqlframe-1.1.3/sqlframe/spark/group.py +14 -0
  123. sqlframe-1.1.3/sqlframe/spark/readwriter.py +29 -0
  124. sqlframe-1.1.3/sqlframe/spark/session.py +90 -0
  125. sqlframe-1.1.3/sqlframe/spark/types.py +1 -0
  126. sqlframe-1.1.3/sqlframe/spark/window.py +1 -0
  127. sqlframe-1.1.3/sqlframe/standalone/__init__.py +26 -0
  128. sqlframe-1.1.3/sqlframe/standalone/catalog.py +13 -0
  129. sqlframe-1.1.3/sqlframe/standalone/column.py +1 -0
  130. sqlframe-1.1.3/sqlframe/standalone/dataframe.py +36 -0
  131. sqlframe-1.1.3/sqlframe/standalone/functions.py +1 -0
  132. sqlframe-1.1.3/sqlframe/standalone/group.py +14 -0
  133. sqlframe-1.1.3/sqlframe/standalone/readwriter.py +19 -0
  134. sqlframe-1.1.3/sqlframe/standalone/session.py +40 -0
  135. sqlframe-1.1.3/sqlframe/standalone/types.py +1 -0
  136. sqlframe-1.1.3/sqlframe/standalone/window.py +1 -0
  137. sqlframe-1.1.3/sqlframe.egg-info/PKG-INFO +138 -0
  138. sqlframe-1.1.3/sqlframe.egg-info/SOURCES.txt +189 -0
  139. sqlframe-1.1.3/sqlframe.egg-info/dependency_links.txt +1 -0
  140. sqlframe-1.1.3/sqlframe.egg-info/requires.txt +54 -0
  141. sqlframe-1.1.3/sqlframe.egg-info/top_level.txt +1 -0
  142. sqlframe-1.1.3/tests/__init__.py +0 -0
  143. sqlframe-1.1.3/tests/common_fixtures.py +208 -0
  144. sqlframe-1.1.3/tests/conftest.py +29 -0
  145. sqlframe-1.1.3/tests/fixtures/employee.csv +6 -0
  146. sqlframe-1.1.3/tests/fixtures/employee.json +5 -0
  147. sqlframe-1.1.3/tests/fixtures/employee.parquet +0 -0
  148. sqlframe-1.1.3/tests/fixtures/employee_extra_line.csv +7 -0
  149. sqlframe-1.1.3/tests/integration/__init__.py +0 -0
  150. sqlframe-1.1.3/tests/integration/engines/__init__.py +0 -0
  151. sqlframe-1.1.3/tests/integration/engines/bigquery/__init__.py +0 -0
  152. sqlframe-1.1.3/tests/integration/engines/bigquery/test_bigquery_catalog.py +343 -0
  153. sqlframe-1.1.3/tests/integration/engines/bigquery/test_bigquery_session.py +20 -0
  154. sqlframe-1.1.3/tests/integration/engines/duck/__init__.py +0 -0
  155. sqlframe-1.1.3/tests/integration/engines/duck/test_duckdb_catalog.py +356 -0
  156. sqlframe-1.1.3/tests/integration/engines/duck/test_duckdb_reader.py +100 -0
  157. sqlframe-1.1.3/tests/integration/engines/duck/test_duckdb_session.py +13 -0
  158. sqlframe-1.1.3/tests/integration/engines/postgres/__init__.py +0 -0
  159. sqlframe-1.1.3/tests/integration/engines/postgres/test_postgres_catalog.py +317 -0
  160. sqlframe-1.1.3/tests/integration/engines/postgres/test_postgres_session.py +19 -0
  161. sqlframe-1.1.3/tests/integration/engines/redshift/__init__.py +0 -0
  162. sqlframe-1.1.3/tests/integration/engines/redshift/test_redshift_catalog.py +306 -0
  163. sqlframe-1.1.3/tests/integration/engines/redshift/test_redshift_session.py +47 -0
  164. sqlframe-1.1.3/tests/integration/engines/snowflake/__init__.py +0 -0
  165. sqlframe-1.1.3/tests/integration/engines/snowflake/test_snowflake_catalog.py +333 -0
  166. sqlframe-1.1.3/tests/integration/engines/snowflake/test_snowflake_session.py +47 -0
  167. sqlframe-1.1.3/tests/integration/engines/spark/__init__.py +0 -0
  168. sqlframe-1.1.3/tests/integration/engines/spark/test_spark_catalog.py +244 -0
  169. sqlframe-1.1.3/tests/integration/engines/test_engine_dataframe.py +87 -0
  170. sqlframe-1.1.3/tests/integration/engines/test_engine_reader.py +131 -0
  171. sqlframe-1.1.3/tests/integration/engines/test_engine_session.py +47 -0
  172. sqlframe-1.1.3/tests/integration/engines/test_engine_writer.py +176 -0
  173. sqlframe-1.1.3/tests/integration/engines/test_int_functions.py +2688 -0
  174. sqlframe-1.1.3/tests/integration/fixtures.py +712 -0
  175. sqlframe-1.1.3/tests/integration/test_int_dataframe.py +1980 -0
  176. sqlframe-1.1.3/tests/integration/test_int_dataframe_stats.py +28 -0
  177. sqlframe-1.1.3/tests/integration/test_int_grouped_data.py +165 -0
  178. sqlframe-1.1.3/tests/integration/test_int_session.py +70 -0
  179. sqlframe-1.1.3/tests/types.py +6 -0
  180. sqlframe-1.1.3/tests/unit/__init__.py +0 -0
  181. sqlframe-1.1.3/tests/unit/standalone/__init__.py +0 -0
  182. sqlframe-1.1.3/tests/unit/standalone/fixtures.py +71 -0
  183. sqlframe-1.1.3/tests/unit/standalone/test_column.py +218 -0
  184. sqlframe-1.1.3/tests/unit/standalone/test_dataframe.py +85 -0
  185. sqlframe-1.1.3/tests/unit/standalone/test_dataframe_writer.py +107 -0
  186. sqlframe-1.1.3/tests/unit/standalone/test_functions.py +2792 -0
  187. sqlframe-1.1.3/tests/unit/standalone/test_session.py +138 -0
  188. sqlframe-1.1.3/tests/unit/standalone/test_session_case_sensitivity.py +110 -0
  189. sqlframe-1.1.3/tests/unit/standalone/test_types.py +38 -0
  190. sqlframe-1.1.3/tests/unit/standalone/test_window.py +45 -0
@@ -0,0 +1 @@
1
+ * @eakmanrq
@@ -0,0 +1,42 @@
1
+ name: SQLFrame
2
+ on:
3
+ push:
4
+ branches:
5
+ - main
6
+ paths:
7
+ - 'sqlframe/**'
8
+ - 'tests/**'
9
+ - 'Makefile'
10
+ - 'setup.py'
11
+ pull_request:
12
+ types:
13
+ - synchronize
14
+ - opened
15
+ paths:
16
+ - 'sqlframe/**'
17
+ - 'tests/**'
18
+ - 'Makefile'
19
+ - 'setup.py'
20
+ jobs:
21
+ run-tests:
22
+ runs-on: ubuntu-latest
23
+ env:
24
+ PYTEST_XDIST_AUTO_NUM_WORKERS: 4
25
+ strategy:
26
+ matrix:
27
+ python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
28
+ steps:
29
+ - name: Checkout
30
+ uses: actions/checkout@v4
31
+ - name: Install Python
32
+ uses: actions/setup-python@v5
33
+ with:
34
+ python-version: ${{ matrix.python-version }}
35
+ - name: Install dependencies
36
+ run: make install-dev
37
+ - name: Run Style
38
+ run: make style
39
+ - name: Setup Postgres
40
+ uses: ikalnytskyi/action-setup-postgres@v6
41
+ - name: Run tests
42
+ run: make local-test
@@ -0,0 +1,27 @@
1
+ name: Publish
2
+ on:
3
+ push:
4
+ tags:
5
+ - 'v[0-9]+.[0-9]+.[0-9]+'
6
+ permissions:
7
+ contents: write
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ env:
12
+ TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
13
+ TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
14
+ steps:
15
+ - name: Checkout
16
+ uses: actions/checkout@v4
17
+ - name: Publish
18
+ run: make publish
19
+ - name: Create release
20
+ env:
21
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22
+ tag: ${{ github.ref_name }}
23
+ run: |
24
+ gh release create "$tag" \
25
+ --repo="$GITHUB_REPOSITORY" \
26
+ --title="SQLFrame ${tag#v}" \
27
+ --generate-notes
@@ -0,0 +1,145 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # PyCharm
132
+ .idea/
133
+
134
+ # Visual Studio Code
135
+ .vscode
136
+
137
+ .DS_STORE
138
+ metastore_db
139
+ spark_warehouse
140
+
141
+ # Version file
142
+ sqlframe/_version.py
143
+
144
+ # Emacs files
145
+ *~
@@ -0,0 +1,33 @@
1
+ repos:
2
+ - repo: local
3
+ hooks:
4
+ - id: ruff
5
+ name: ruff
6
+ description: "Run 'ruff' for extremely fast Python linting"
7
+ entry: ruff check
8
+ --force-exclude
9
+ --fix
10
+ --select I
11
+ --ignore E721
12
+ --ignore E741
13
+ language: python
14
+ types_or: [python, pyi]
15
+ require_serial: true
16
+ additional_dependencies: []
17
+ files: ^(sqlframe/|tests/|setup.py)
18
+ - id: ruff-format
19
+ name: ruff-format
20
+ description: "Run 'ruff format' for extremely fast Python formatting"
21
+ entry: ruff format
22
+ --force-exclude
23
+ --line-length 100
24
+ language: python
25
+ types_or: [python, pyi]
26
+ require_serial: true
27
+ - id: mypy
28
+ name: mypy
29
+ entry: mypy sqlframe tests
30
+ language: system
31
+ types: [ python ]
32
+ files: ^(sqlframe/|tests/)
33
+ pass_filenames: false
@@ -0,0 +1,13 @@
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-22.04
5
+ tools:
6
+ python: "3.8"
7
+
8
+ mkdocs:
9
+ configuration: mkdocs.yml
10
+
11
+ python:
12
+ install:
13
+ - requirements: docs/requirements.txt
sqlframe-1.1.3/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Ryan Eakman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,37 @@
1
+ install-dev:
2
+ pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
3
+
4
+ install-pre-commit:
5
+ pre-commit install
6
+
7
+ slow-test:
8
+ pytest -n auto tests
9
+
10
+ fast-test:
11
+ pytest -n auto -m "fast"
12
+
13
+ local-test:
14
+ pytest -n auto -m "fast or local"
15
+
16
+ bigquery-test:
17
+ pytest -n auto -m "bigquery"
18
+
19
+ duckdb-test:
20
+ pytest -n auto -m "duckdb"
21
+
22
+ style:
23
+ pre-commit run --all-files
24
+
25
+ docs-serve:
26
+ mkdocs serve
27
+
28
+ stubs:
29
+ stubgen sqlframe/bigquery/functions.py --output ./ --inspect-mode
30
+ stubgen sqlframe/duckdb/functions.py --output ./ --inspect-mode
31
+ stubgen sqlframe/postgres/functions.py --output ./ --inspect-mode
32
+
33
+ package:
34
+ pip3 install wheel && python3 setup.py sdist bdist_wheel
35
+
36
+ publish: package
37
+ pip3 install twine && python3 -m twine upload dist/*
@@ -0,0 +1,138 @@
1
+ Metadata-Version: 2.1
2
+ Name: sqlframe
3
+ Version: 1.1.3
4
+ Summary: Taking the Spark out of PySpark by converting to SQL
5
+ Home-page: https://github.com/eakmanrq/sqlframe
6
+ Author: Ryan Eakman
7
+ Author-email: eakmanrq@gmail.com
8
+ License: MIT
9
+ Platform: UNKNOWN
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: SQL
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+ Provides-Extra: bigquery
20
+ Provides-Extra: dev
21
+ Provides-Extra: docs
22
+ Provides-Extra: duckdb
23
+ Provides-Extra: postgres
24
+ Provides-Extra: redshift
25
+ Provides-Extra: snowflake
26
+ Provides-Extra: spark
27
+ License-File: LICENSE
28
+
29
+ <div align="center">
30
+ <img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
31
+ </div>
32
+
33
+ SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
34
+
35
+ SQLFrame currently supports the following engines (many more in development):
36
+
37
+ * [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
38
+ * [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
39
+ * [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
40
+
41
+ SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
42
+ * [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
43
+
44
+ SQLFrame is great for:
45
+
46
+ * Users who want to run PySpark DataFrame code without having to use a Spark cluster
47
+ * Users who want a SQL representation of their DataFrame code for debugging or sharing with others
48
+ * Users who want a DataFrame API that leverages the full power of their engine to do the processing
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ # BigQuery
54
+ pip install "sqlframe[bigquery]"
55
+ # DuckDB
56
+ pip install "sqlframe[duckdb]"
57
+ # Postgres
58
+ pip install "sqlframe[postgres]"
59
+ # Standalone
60
+ pip install sqlframe
61
+ ```
62
+
63
+ See specific engine documentation for additional setup instructions.
64
+
65
+ ## Example Usage
66
+
67
+ ```python
68
+ from sqlframe.bigquery import BigQuerySession
69
+ from sqlframe.bigquery import functions as F
70
+ from sqlframe.bigquery import Window
71
+
72
+ session = BigQuerySession()
73
+ table_path = "bigquery-public-data.samples.natality"
74
+ # Top 5 years with the greatest year-over-year % change in new families with single child
75
+ df = (
76
+ session.table(table_path)
77
+ .where(F.col("ever_born") == 1)
78
+ .groupBy("year")
79
+ .agg(F.count("*").alias("num_single_child_families"))
80
+ .withColumn(
81
+ "last_year_num_single_child_families",
82
+ F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
83
+ )
84
+ .withColumn(
85
+ "percent_change",
86
+ (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
87
+ / F.col("last_year_num_single_child_families")
88
+ )
89
+ .orderBy(F.abs(F.col("percent_change")).desc())
90
+ .select(
91
+ F.col("year").alias("year"),
92
+ F.format_number("num_single_child_families", 0).alias("new families single child"),
93
+ F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
94
+ )
95
+ .limit(5)
96
+ )
97
+ ```
98
+ ```python
99
+ >>> df.sql()
100
+ WITH `t94228` AS (
101
+ SELECT
102
+ `natality`.`year` AS `year`,
103
+ COUNT(*) AS `num_single_child_families`
104
+ FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
105
+ WHERE
106
+ `natality`.`ever_born` = 1
107
+ GROUP BY
108
+ `natality`.`year`
109
+ ), `t39093` AS (
110
+ SELECT
111
+ `t94228`.`year` AS `year`,
112
+ `t94228`.`num_single_child_families` AS `num_single_child_families`,
113
+ LAG(`t94228`.`num_single_child_families`, 1) OVER (ORDER BY `t94228`.`year`) AS `last_year_num_single_child_families`
114
+ FROM `t94228` AS `t94228`
115
+ )
116
+ SELECT
117
+ `t39093`.`year` AS `year`,
118
+ FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
119
+ FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
120
+ FROM `t39093` AS `t39093`
121
+ ORDER BY
122
+ ABS(`percent_change`) DESC
123
+ LIMIT 5
124
+ ```
125
+ ```python
126
+ >>> df.show()
127
+ +------+---------------------------+----------------+
128
+ | year | new families single child | percent change |
129
+ +------+---------------------------+----------------+
130
+ | 1989 | 1,650,246 | 25.02 |
131
+ | 1974 | 783,448 | 14.49 |
132
+ | 1977 | 1,057,379 | 11.38 |
133
+ | 1985 | 1,308,476 | 11.15 |
134
+ | 1975 | 868,985 | 10.92 |
135
+ +------+---------------------------+----------------+
136
+ ```
137
+
138
+
@@ -0,0 +1,108 @@
1
+ <div align="center">
2
+ <img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
3
+ </div>
4
+
5
+ SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
6
+
7
+ SQLFrame currently supports the following engines (many more in development):
8
+
9
+ * [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
10
+ * [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
11
+ * [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
12
+
13
+ SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
14
+ * [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
15
+
16
+ SQLFrame is great for:
17
+
18
+ * Users who want to run PySpark DataFrame code without having to use a Spark cluster
19
+ * Users who want a SQL representation of their DataFrame code for debugging or sharing with others
20
+ * Users who want a DataFrame API that leverages the full power of their engine to do the processing
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ # BigQuery
26
+ pip install "sqlframe[bigquery]"
27
+ # DuckDB
28
+ pip install "sqlframe[duckdb]"
29
+ # Postgres
30
+ pip install "sqlframe[postgres]"
31
+ # Standalone
32
+ pip install sqlframe
33
+ ```
34
+
35
+ See specific engine documentation for additional setup instructions.
36
+
37
+ ## Example Usage
38
+
39
+ ```python
40
+ from sqlframe.bigquery import BigQuerySession
41
+ from sqlframe.bigquery import functions as F
42
+ from sqlframe.bigquery import Window
43
+
44
+ session = BigQuerySession()
45
+ table_path = "bigquery-public-data.samples.natality"
46
+ # Top 5 years with the greatest year-over-year % change in new families with single child
47
+ df = (
48
+ session.table(table_path)
49
+ .where(F.col("ever_born") == 1)
50
+ .groupBy("year")
51
+ .agg(F.count("*").alias("num_single_child_families"))
52
+ .withColumn(
53
+ "last_year_num_single_child_families",
54
+ F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
55
+ )
56
+ .withColumn(
57
+ "percent_change",
58
+ (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
59
+ / F.col("last_year_num_single_child_families")
60
+ )
61
+ .orderBy(F.abs(F.col("percent_change")).desc())
62
+ .select(
63
+ F.col("year").alias("year"),
64
+ F.format_number("num_single_child_families", 0).alias("new families single child"),
65
+ F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
66
+ )
67
+ .limit(5)
68
+ )
69
+ ```
70
+ ```python
71
+ >>> df.sql()
72
+ WITH `t94228` AS (
73
+ SELECT
74
+ `natality`.`year` AS `year`,
75
+ COUNT(*) AS `num_single_child_families`
76
+ FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
77
+ WHERE
78
+ `natality`.`ever_born` = 1
79
+ GROUP BY
80
+ `natality`.`year`
81
+ ), `t39093` AS (
82
+ SELECT
83
+ `t94228`.`year` AS `year`,
84
+ `t94228`.`num_single_child_families` AS `num_single_child_families`,
85
+ LAG(`t94228`.`num_single_child_families`, 1) OVER (ORDER BY `t94228`.`year`) AS `last_year_num_single_child_families`
86
+ FROM `t94228` AS `t94228`
87
+ )
88
+ SELECT
89
+ `t39093`.`year` AS `year`,
90
+ FORMAT('%\'.0f', ROUND(CAST(`t39093`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
91
+ FORMAT('%\'.2f', ROUND(CAST((((`t39093`.`num_single_child_families` - `t39093`.`last_year_num_single_child_families`) / `t39093`.`last_year_num_single_child_families`) * 100) AS FLOAT64), 2)) AS `percent change`
92
+ FROM `t39093` AS `t39093`
93
+ ORDER BY
94
+ ABS(`percent_change`) DESC
95
+ LIMIT 5
96
+ ```
97
+ ```python
98
+ >>> df.show()
99
+ +------+---------------------------+----------------+
100
+ | year | new families single child | percent change |
101
+ +------+---------------------------+----------------+
102
+ | 1989 | 1,650,246 | 25.02 |
103
+ | 1974 | 783,448 | 14.49 |
104
+ | 1977 | 1,057,379 | 11.38 |
105
+ | 1985 | 1,308,476 | 11.15 |
106
+ | 1975 | 868,985 | 10.92 |
107
+ +------+---------------------------+----------------+
108
+ ```
Binary file