sqlframe 2.4.0__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. {sqlframe-2.4.0 → sqlframe-3.0.0}/PKG-INFO +57 -29
  2. {sqlframe-2.4.0 → sqlframe-3.0.0}/README.md +56 -28
  3. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/bigquery.md +72 -16
  4. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/configuration.md +47 -0
  5. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/duckdb.md +65 -16
  6. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/postgres.md +68 -17
  7. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/snowflake.md +78 -21
  8. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/spark.md +58 -12
  9. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/standalone.md +42 -12
  10. {sqlframe-2.4.0 → sqlframe-3.0.0}/setup.py +1 -0
  11. sqlframe-3.0.0/sqlframe/__init__.py +83 -0
  12. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/_version.py +2 -2
  13. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/dataframe.py +11 -1
  14. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/session.py +4 -0
  15. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/__init__.py +11 -2
  16. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/session.py +1 -2
  17. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/__init__.py +12 -3
  18. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/dataframe.py +11 -5
  19. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/session.py +1 -2
  20. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/__init__.py +11 -2
  21. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/session.py +1 -2
  22. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/__init__.py +11 -2
  23. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/session.py +1 -2
  24. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/__init__.py +7 -1
  25. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/session.py +1 -2
  26. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/__init__.py +11 -2
  27. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/session.py +1 -2
  28. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/__init__.py +7 -1
  29. sqlframe-3.0.0/sqlframe/standalone/column.py +1 -0
  30. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/session.py +1 -2
  31. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/PKG-INFO +57 -29
  32. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/SOURCES.txt +17 -0
  33. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/requires.txt +1 -0
  34. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/conftest.py +1 -0
  35. sqlframe-3.0.0/tests/integration/engines/duck/test_duckdb_activate.py +37 -0
  36. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_dataframe.py +53 -0
  37. sqlframe-3.0.0/tests/integration/engines/postgres/test_postgres_activate.py +37 -0
  38. sqlframe-3.0.0/tests/unit/bigquery/test_activate.py +51 -0
  39. sqlframe-3.0.0/tests/unit/conftest.py +124 -0
  40. sqlframe-3.0.0/tests/unit/duck/test_activate.py +41 -0
  41. sqlframe-3.0.0/tests/unit/postgres/__init__.py +0 -0
  42. sqlframe-3.0.0/tests/unit/postgres/test_activate.py +41 -0
  43. sqlframe-3.0.0/tests/unit/redshift/__init__.py +0 -0
  44. sqlframe-3.0.0/tests/unit/redshift/test_activate.py +41 -0
  45. sqlframe-3.0.0/tests/unit/snowflake/__init__.py +0 -0
  46. sqlframe-3.0.0/tests/unit/snowflake/test_activate.py +41 -0
  47. sqlframe-3.0.0/tests/unit/spark/__init__.py +0 -0
  48. sqlframe-3.0.0/tests/unit/spark/test_activate.py +41 -0
  49. sqlframe-3.0.0/tests/unit/standalone/__init__.py +0 -0
  50. sqlframe-3.0.0/tests/unit/standalone/test_activate.py +41 -0
  51. sqlframe-3.0.0/tests/unit/test_activate.py +37 -0
  52. sqlframe-2.4.0/sqlframe/duckdb/column.py +0 -1
  53. {sqlframe-2.4.0 → sqlframe-3.0.0}/.github/CODEOWNERS +0 -0
  54. {sqlframe-2.4.0 → sqlframe-3.0.0}/.github/workflows/main.workflow.yaml +0 -0
  55. {sqlframe-2.4.0 → sqlframe-3.0.0}/.github/workflows/publish.workflow.yaml +0 -0
  56. {sqlframe-2.4.0 → sqlframe-3.0.0}/.gitignore +0 -0
  57. {sqlframe-2.4.0 → sqlframe-3.0.0}/.pre-commit-config.yaml +0 -0
  58. {sqlframe-2.4.0 → sqlframe-3.0.0}/.readthedocs.yaml +0 -0
  59. {sqlframe-2.4.0 → sqlframe-3.0.0}/LICENSE +0 -0
  60. {sqlframe-2.4.0 → sqlframe-3.0.0}/Makefile +0 -0
  61. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/add_chatgpt_support.md +0 -0
  62. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/adding_ai_to_meal.jpeg +0 -0
  63. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/hype_train.gif +0 -0
  64. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/marvin_paranoid_robot.gif +0 -0
  65. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/nonsense_sql.png +0 -0
  66. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/openai_full_rewrite.png +0 -0
  67. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/openai_replacing_cte_names.png +0 -0
  68. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/sqlglot_optimized_code.png +0 -0
  69. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/add_chatgpt_support/sunny_shake_head_no.gif +0 -0
  70. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/but_wait_theres_more.gif +0 -0
  71. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/cake.gif +0 -0
  72. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/images/you_get_pyspark_api.gif +0 -0
  73. {sqlframe-2.4.0 → sqlframe-3.0.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
  74. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/bigquery.md +0 -0
  75. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/duckdb.md +0 -0
  76. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/SF.png +0 -0
  77. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/favicon.png +0 -0
  78. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/favicon_old.png +0 -0
  79. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/sqlframe_diagram.png +0 -0
  80. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/images/sqlframe_logo.png +0 -0
  81. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/docs/postgres.md +0 -0
  82. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/SF.png +0 -0
  83. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/favicon.png +0 -0
  84. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/favicon_old.png +0 -0
  85. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/sqlframe_diagram.png +0 -0
  86. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/images/sqlframe_logo.png +0 -0
  87. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/index.md +0 -0
  88. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/requirements.txt +0 -0
  89. {sqlframe-2.4.0 → sqlframe-3.0.0}/docs/stylesheets/extra.css +0 -0
  90. {sqlframe-2.4.0 → sqlframe-3.0.0}/mkdocs.yml +0 -0
  91. {sqlframe-2.4.0 → sqlframe-3.0.0}/pytest.ini +0 -0
  92. {sqlframe-2.4.0 → sqlframe-3.0.0}/renovate.json +0 -0
  93. {sqlframe-2.4.0 → sqlframe-3.0.0}/setup.cfg +0 -0
  94. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/LICENSE +0 -0
  95. {sqlframe-2.4.0/sqlframe → sqlframe-3.0.0/sqlframe/base}/__init__.py +0 -0
  96. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/_typing.py +0 -0
  97. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/catalog.py +0 -0
  98. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/column.py +0 -0
  99. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/decorators.py +0 -0
  100. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/exceptions.py +0 -0
  101. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/function_alternatives.py +0 -0
  102. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/functions.py +0 -0
  103. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/group.py +0 -0
  104. {sqlframe-2.4.0/sqlframe/base → sqlframe-3.0.0/sqlframe/base/mixins}/__init__.py +0 -0
  105. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
  106. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/mixins/dataframe_mixins.py +0 -0
  107. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
  108. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/normalize.py +0 -0
  109. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/operations.py +0 -0
  110. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/readerwriter.py +0 -0
  111. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/transforms.py +0 -0
  112. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/types.py +0 -0
  113. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/udf.py +0 -0
  114. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/util.py +0 -0
  115. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/base/window.py +0 -0
  116. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/catalog.py +0 -0
  117. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/column.py +0 -0
  118. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/dataframe.py +0 -0
  119. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/functions.py +0 -0
  120. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/functions.pyi +0 -0
  121. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/group.py +0 -0
  122. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/readwriter.py +0 -0
  123. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/types.py +0 -0
  124. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/udf.py +0 -0
  125. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/bigquery/window.py +0 -0
  126. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/catalog.py +0 -0
  127. {sqlframe-2.4.0/sqlframe/postgres → sqlframe-3.0.0/sqlframe/duckdb}/column.py +0 -0
  128. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/functions.py +0 -0
  129. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/functions.pyi +0 -0
  130. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/group.py +0 -0
  131. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/readwriter.py +0 -0
  132. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/types.py +0 -0
  133. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/udf.py +0 -0
  134. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/duckdb/window.py +0 -0
  135. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/catalog.py +0 -0
  136. {sqlframe-2.4.0/sqlframe/redshift → sqlframe-3.0.0/sqlframe/postgres}/column.py +0 -0
  137. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/dataframe.py +0 -0
  138. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/functions.py +0 -0
  139. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/functions.pyi +0 -0
  140. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/group.py +0 -0
  141. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/readwriter.py +0 -0
  142. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/types.py +0 -0
  143. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/udf.py +0 -0
  144. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/postgres/window.py +0 -0
  145. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/catalog.py +0 -0
  146. {sqlframe-2.4.0/sqlframe/snowflake → sqlframe-3.0.0/sqlframe/redshift}/column.py +0 -0
  147. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/dataframe.py +0 -0
  148. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/functions.py +0 -0
  149. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/group.py +0 -0
  150. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/readwriter.py +0 -0
  151. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/types.py +0 -0
  152. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/udf.py +0 -0
  153. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/redshift/window.py +0 -0
  154. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/catalog.py +0 -0
  155. {sqlframe-2.4.0/sqlframe/spark → sqlframe-3.0.0/sqlframe/snowflake}/column.py +0 -0
  156. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/dataframe.py +0 -0
  157. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/functions.py +0 -0
  158. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/functions.pyi +0 -0
  159. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/group.py +0 -0
  160. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/readwriter.py +0 -0
  161. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/types.py +0 -0
  162. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/udf.py +0 -0
  163. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/snowflake/window.py +0 -0
  164. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/catalog.py +0 -0
  165. {sqlframe-2.4.0/sqlframe/standalone → sqlframe-3.0.0/sqlframe/spark}/column.py +0 -0
  166. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/dataframe.py +0 -0
  167. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/functions.py +0 -0
  168. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/functions.pyi +0 -0
  169. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/group.py +0 -0
  170. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/readwriter.py +0 -0
  171. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/types.py +0 -0
  172. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/udf.py +0 -0
  173. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/spark/window.py +0 -0
  174. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/catalog.py +0 -0
  175. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/dataframe.py +0 -0
  176. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/functions.py +0 -0
  177. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/group.py +0 -0
  178. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/readwriter.py +0 -0
  179. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/types.py +0 -0
  180. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/udf.py +0 -0
  181. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/standalone/window.py +0 -0
  182. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/testing/__init__.py +0 -0
  183. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe/testing/utils.py +0 -0
  184. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/dependency_links.txt +0 -0
  185. {sqlframe-2.4.0 → sqlframe-3.0.0}/sqlframe.egg-info/top_level.txt +0 -0
  186. {sqlframe-2.4.0/sqlframe/base/mixins → sqlframe-3.0.0/tests}/__init__.py +0 -0
  187. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/common_fixtures.py +0 -0
  188. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee.csv +0 -0
  189. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee.json +0 -0
  190. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee.parquet +0 -0
  191. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/fixtures/employee_extra_line.csv +0 -0
  192. {sqlframe-2.4.0/tests → sqlframe-3.0.0/tests/integration}/__init__.py +0 -0
  193. {sqlframe-2.4.0/tests/integration → sqlframe-3.0.0/tests/integration/engines}/__init__.py +0 -0
  194. {sqlframe-2.4.0/tests/integration/engines → sqlframe-3.0.0/tests/integration/engines/bigquery}/__init__.py +0 -0
  195. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
  196. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/bigquery/test_bigquery_dataframe.py +0 -0
  197. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
  198. {sqlframe-2.4.0/tests/integration/engines/bigquery → sqlframe-3.0.0/tests/integration/engines/duck}/__init__.py +0 -0
  199. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
  200. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
  201. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
  202. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/duck/test_duckdb_udf.py +0 -0
  203. {sqlframe-2.4.0/tests/integration/engines/duck → sqlframe-3.0.0/tests/integration/engines/postgres}/__init__.py +0 -0
  204. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
  205. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/postgres/test_postgres_dataframe.py +0 -0
  206. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
  207. {sqlframe-2.4.0/tests/integration/engines/postgres → sqlframe-3.0.0/tests/integration/engines/redshift}/__init__.py +0 -0
  208. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
  209. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
  210. {sqlframe-2.4.0/tests/integration/engines/redshift → sqlframe-3.0.0/tests/integration/engines/snowflake}/__init__.py +0 -0
  211. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
  212. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/snowflake/test_snowflake_dataframe.py +0 -0
  213. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
  214. {sqlframe-2.4.0/tests/integration/engines/snowflake → sqlframe-3.0.0/tests/integration/engines/spark}/__init__.py +0 -0
  215. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
  216. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/spark/test_spark_dataframe.py +0 -0
  217. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_column.py +0 -0
  218. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
  219. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_reader.py +0 -0
  220. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_session.py +0 -0
  221. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_engine_writer.py +0 -0
  222. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_int_functions.py +0 -0
  223. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/engines/test_int_testing.py +0 -0
  224. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/fixtures.py +0 -0
  225. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_dataframe.py +0 -0
  226. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_dataframe_stats.py +0 -0
  227. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_grouped_data.py +0 -0
  228. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/integration/test_int_session.py +0 -0
  229. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/types.py +0 -0
  230. {sqlframe-2.4.0/tests/integration/engines/spark → sqlframe-3.0.0/tests/unit}/__init__.py +0 -0
  231. {sqlframe-2.4.0/tests/unit → sqlframe-3.0.0/tests/unit/bigquery}/__init__.py +0 -0
  232. {sqlframe-2.4.0/tests/unit/standalone → sqlframe-3.0.0/tests/unit/duck}/__init__.py +0 -0
  233. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/fixtures.py +0 -0
  234. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_column.py +0 -0
  235. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_dataframe.py +0 -0
  236. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
  237. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_functions.py +0 -0
  238. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_session.py +0 -0
  239. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
  240. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_types.py +0 -0
  241. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/standalone/test_window.py +0 -0
  242. {sqlframe-2.4.0 → sqlframe-3.0.0}/tests/unit/test_util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 2.4.0
3
+ Version: 3.0.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -48,10 +48,10 @@ SQLFrame also has a "Standalone" session that be used to generate SQL without an
48
48
 
49
49
  SQLFrame is great for:
50
50
 
51
- * Users who want to run PySpark DataFrame code without having to use a Spark cluster
51
+ * Users who want a DataFrame API that leverages the full power of their engine to do the processing
52
+ * Users who want to run PySpark code quickly locally without the overhead of starting a Spark session
52
53
  * Users who want a SQL representation of their DataFrame code for debugging or sharing with others
53
- * See [Spark Engine](https://sqlframe.readthedocs.io/en/stable/spark/) for more details
54
- * Users who want a DataFrame API that leverages the full power of their engine to do the processing
54
+ * Users who want to run PySpark DataFrame code without the complexity of using Spark for processing
55
55
 
56
56
  ## Installation
57
57
 
@@ -75,44 +75,72 @@ See specific engine documentation for additional setup instructions.
75
75
  ## Configuration
76
76
 
77
77
  SQLFrame generates consistently accurate yet complex SQL for engine execution.
78
- However, when using df.sql(), it produces more human-readable SQL.
78
+ However, when using df.sql(optimize=True), it produces more human-readable SQL.
79
79
  For details on how to configure this output and leverage OpenAI to enhance the SQL, see [Generated SQL Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#generated-sql).
80
80
 
81
81
  SQLFrame by default uses the Spark dialect for input and output.
82
82
  This can be changed to make SQLFrame feel more like a native DataFrame API for the engine you are using.
83
83
  See [Input and Output Dialect Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#input-and-output-dialect).
84
84
 
85
+ ## Activating SQLFrame
86
+
87
+ SQLFrame can either replace pyspark imports or be used alongside them.
88
+ To replace pyspark imports, use the [activate function](https://sqlframe.readthedocs.io/en/stable/configuration/#activating-sqlframe) to set the engine to use.
89
+
90
+ ```python
91
+ from sqlframe import activate
92
+
93
+ # Activate SQLFrame to run directly on DuckDB
94
+ activate(engine="duckdb")
95
+
96
+ from pyspark.sql import SparkSession
97
+ session = SparkSession.builder.getOrCreate()
98
+ ```
99
+
100
+ SQLFrame can also be directly imported which both maintains pyspark imports but also allows for a more engine-native DataFrame API:
101
+
102
+ ```python
103
+ from sqlframe.duckdb import DuckDBSession
104
+
105
+ session = DuckDBSession.builder.getOrCreate()
106
+ ```
107
+
85
108
  ## Example Usage
86
109
 
87
110
  ```python
88
- from sqlframe.bigquery import BigQuerySession
89
- from sqlframe.bigquery import functions as F
90
- from sqlframe.bigquery import Window
111
+ from sqlframe import activate
112
+
113
+ # Activate SQLFrame to run directly on BigQuery
114
+ activate(engine="bigquery")
115
+
116
+ from pyspark.sql import SparkSession
117
+ from pyspark.sql import functions as F
118
+ from pyspark.sql import Window
91
119
 
92
- session = BigQuerySession()
120
+ session = SparkSession.builder.getOrCreate()
93
121
  table_path = '"bigquery-public-data".samples.natality'
94
122
  # Top 5 years with the greatest year-over-year % change in new families with single child
95
123
  df = (
96
- session.table(table_path)
97
- .where(F.col("ever_born") == 1)
98
- .groupBy("year")
99
- .agg(F.count("*").alias("num_single_child_families"))
100
- .withColumn(
101
- "last_year_num_single_child_families",
102
- F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
103
- )
104
- .withColumn(
105
- "percent_change",
106
- (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
107
- / F.col("last_year_num_single_child_families")
108
- )
109
- .orderBy(F.abs(F.col("percent_change")).desc())
110
- .select(
111
- F.col("year").alias("year"),
112
- F.format_number("num_single_child_families", 0).alias("new families single child"),
113
- F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
114
- )
115
- .limit(5)
124
+ session.table(table_path)
125
+ .where(F.col("ever_born") == 1)
126
+ .groupBy("year")
127
+ .agg(F.count("*").alias("num_single_child_families"))
128
+ .withColumn(
129
+ "last_year_num_single_child_families",
130
+ F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
131
+ )
132
+ .withColumn(
133
+ "percent_change",
134
+ (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
135
+ / F.col("last_year_num_single_child_families")
136
+ )
137
+ .orderBy(F.abs(F.col("percent_change")).desc())
138
+ .select(
139
+ F.col("year").alias("year"),
140
+ F.format_number("num_single_child_families", 0).alias("new families single child"),
141
+ F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
142
+ )
143
+ .limit(5)
116
144
  )
117
145
  ```
118
146
  ```python
@@ -18,10 +18,10 @@ SQLFrame also has a "Standalone" session that be used to generate SQL without an
18
18
 
19
19
  SQLFrame is great for:
20
20
 
21
- * Users who want to run PySpark DataFrame code without having to use a Spark cluster
21
+ * Users who want a DataFrame API that leverages the full power of their engine to do the processing
22
+ * Users who want to run PySpark code quickly locally without the overhead of starting a Spark session
22
23
  * Users who want a SQL representation of their DataFrame code for debugging or sharing with others
23
- * See [Spark Engine](https://sqlframe.readthedocs.io/en/stable/spark/) for more details
24
- * Users who want a DataFrame API that leverages the full power of their engine to do the processing
24
+ * Users who want to run PySpark DataFrame code without the complexity of using Spark for processing
25
25
 
26
26
  ## Installation
27
27
 
@@ -45,44 +45,72 @@ See specific engine documentation for additional setup instructions.
45
45
  ## Configuration
46
46
 
47
47
  SQLFrame generates consistently accurate yet complex SQL for engine execution.
48
- However, when using df.sql(), it produces more human-readable SQL.
48
+ However, when using df.sql(optimize=True), it produces more human-readable SQL.
49
49
  For details on how to configure this output and leverage OpenAI to enhance the SQL, see [Generated SQL Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#generated-sql).
50
50
 
51
51
  SQLFrame by default uses the Spark dialect for input and output.
52
52
  This can be changed to make SQLFrame feel more like a native DataFrame API for the engine you are using.
53
53
  See [Input and Output Dialect Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#input-and-output-dialect).
54
54
 
55
+ ## Activating SQLFrame
56
+
57
+ SQLFrame can either replace pyspark imports or be used alongside them.
58
+ To replace pyspark imports, use the [activate function](https://sqlframe.readthedocs.io/en/stable/configuration/#activating-sqlframe) to set the engine to use.
59
+
60
+ ```python
61
+ from sqlframe import activate
62
+
63
+ # Activate SQLFrame to run directly on DuckDB
64
+ activate(engine="duckdb")
65
+
66
+ from pyspark.sql import SparkSession
67
+ session = SparkSession.builder.getOrCreate()
68
+ ```
69
+
70
+ SQLFrame can also be directly imported which both maintains pyspark imports but also allows for a more engine-native DataFrame API:
71
+
72
+ ```python
73
+ from sqlframe.duckdb import DuckDBSession
74
+
75
+ session = DuckDBSession.builder.getOrCreate()
76
+ ```
77
+
55
78
  ## Example Usage
56
79
 
57
80
  ```python
58
- from sqlframe.bigquery import BigQuerySession
59
- from sqlframe.bigquery import functions as F
60
- from sqlframe.bigquery import Window
81
+ from sqlframe import activate
82
+
83
+ # Activate SQLFrame to run directly on BigQuery
84
+ activate(engine="bigquery")
85
+
86
+ from pyspark.sql import SparkSession
87
+ from pyspark.sql import functions as F
88
+ from pyspark.sql import Window
61
89
 
62
- session = BigQuerySession()
90
+ session = SparkSession.builder.getOrCreate()
63
91
  table_path = '"bigquery-public-data".samples.natality'
64
92
  # Top 5 years with the greatest year-over-year % change in new families with single child
65
93
  df = (
66
- session.table(table_path)
67
- .where(F.col("ever_born") == 1)
68
- .groupBy("year")
69
- .agg(F.count("*").alias("num_single_child_families"))
70
- .withColumn(
71
- "last_year_num_single_child_families",
72
- F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
73
- )
74
- .withColumn(
75
- "percent_change",
76
- (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
77
- / F.col("last_year_num_single_child_families")
78
- )
79
- .orderBy(F.abs(F.col("percent_change")).desc())
80
- .select(
81
- F.col("year").alias("year"),
82
- F.format_number("num_single_child_families", 0).alias("new families single child"),
83
- F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
84
- )
85
- .limit(5)
94
+ session.table(table_path)
95
+ .where(F.col("ever_born") == 1)
96
+ .groupBy("year")
97
+ .agg(F.count("*").alias("num_single_child_families"))
98
+ .withColumn(
99
+ "last_year_num_single_child_families",
100
+ F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
101
+ )
102
+ .withColumn(
103
+ "percent_change",
104
+ (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
105
+ / F.col("last_year_num_single_child_families")
106
+ )
107
+ .orderBy(F.abs(F.col("percent_change")).desc())
108
+ .select(
109
+ F.col("year").alias("year"),
110
+ F.format_number("num_single_child_families", 0).alias("new families single child"),
111
+ F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
112
+ )
113
+ .limit(5)
86
114
  )
87
115
  ```
88
116
  ```python
@@ -6,6 +6,46 @@
6
6
  pip install "sqlframe[bigquery]"
7
7
  ```
8
8
 
9
+ ## Enabling SQLFrame
10
+
11
+ SQLFrame can be used in two ways:
12
+
13
+ * Directly importing the `sqlframe.bigquery` package
14
+ * Using the [activate](./configuration.md#activating-sqlframe) function to allow for continuing to use `pyspark.sql` but have it use SQLFrame behind the scenes.
15
+
16
+ ### Import
17
+
18
+ If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.bigquery`.
19
+ In addition, many classes will have a `BigQuery` prefix.
20
+ For example, `BigQueryDataFrame` instead of `DataFrame`.
21
+
22
+
23
+ ```python
24
+ # PySpark import
25
+ # from pyspark.sql import SparkSession
26
+ # from pyspark.sql import functions as F
27
+ # from pyspark.sql.dataframe import DataFrame
28
+ # SQLFrame import
29
+ from sqlframe.bigquery import BigQuerySession
30
+ from sqlframe.bigquery import functions as F
31
+ from sqlframe.bigquery import BigQueryDataFrame
32
+ ```
33
+
34
+ ### Activate
35
+
36
+ If you would like to continue using `pyspark.sql` but have it use SQLFrame behind the scenes, you can use the [activate](./configuration.md#activating-sqlframe) function.
37
+
38
+ ```python
39
+ from sqlframe import activate
40
+ activate("bigquery", config={"default_dataset": "sqlframe.db1"})
41
+
42
+ from pyspark.sql import SparkSession
43
+ ```
44
+
45
+ `SparkSession` will now be a SQLFrame `BigQuerySession` object and everything will be run on BigQuery directly.
46
+
47
+ See [activate configuration](./configuration.md#activating-sqlframe) for information on how to pass in a connection and config options.
48
+
9
49
  ## Creating a Session
10
50
 
11
51
  SQLFrame uses the [BigQuery DBAPI Connection](https://cloud.google.com/python/docs/reference/bigquery/latest/dbapi#class-googlecloudbigquerydbapiconnectionclientnone-bqstorageclientnone) to connect to BigQuery.
@@ -13,7 +53,7 @@ A BigQuerySession, which implements the PySpark Session API, can be created by p
13
53
  By default, SQLFrame will create a connection by inferring it from the environment (for example using gcloud auth).
14
54
  Regardless of approach, it is recommended to configure `default_dataset` in the `BigQuerySession` constructor in order to make it easier to use the catalog methods (see example below).
15
55
 
16
- === "Without Providing Connection"
56
+ === "Import + Without Providing Connection"
17
57
 
18
58
  ```python
19
59
  from sqlframe.bigquery import BigQuerySession
@@ -21,7 +61,7 @@ Regardless of approach, it is recommended to configure `default_dataset` in the
21
61
  session = BigQuerySession(default_dataset="sqlframe.db1")
22
62
  ```
23
63
 
24
- === "With Providing Connection"
64
+ === "Import + With Providing Connection"
25
65
 
26
66
  ```python
27
67
  import google.auth
@@ -43,23 +83,39 @@ Regardless of approach, it is recommended to configure `default_dataset` in the
43
83
  session = BigQuerySession(conn=conn, default_dataset="sqlframe.db1")
44
84
  ```
45
85
 
46
- ## Imports
86
+ === "Activate + Without Providing Connection"
47
87
 
48
- If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.bigquery`.
49
- In addition, many classes will have a `BigQuery` prefix.
50
- For example, `BigQueryDataFrame` instead of `DataFrame`.
88
+ ```python
89
+ from sqlframe import activate
90
+ activate("bigquery", config={"default_dataset": "sqlframe.db1"})
91
+
92
+ from pyspark.sql import SparkSession
93
+ session = SparkSession.builder.getOrCreate()
94
+ ```
51
95
 
96
+ === "Activate + With Providing Connection"
52
97
 
53
- ```python
54
- # PySpark import
55
- # from pyspark.sql import SparkSession
56
- # from pyspark.sql import functions as F
57
- # from pyspark.sql.dataframe import DataFrame
58
- # SQLFrame import
59
- from sqlframe.bigquery import BigQuerySession
60
- from sqlframe.bigquery import functions as F
61
- from sqlframe.bigquery import BigQueryDataFrame
62
- ```
98
+ ```python
99
+ import google.auth
100
+ from google.api_core import client_info
101
+ from google.oauth2 import service_account
102
+ from google.cloud.bigquery.dbapi import connect
103
+ from sqlframe import activate
104
+ creds = service_account.Credentials.from_service_account_file("path/to/credentials.json")
105
+
106
+ client = google.cloud.bigquery.Client(
107
+ project="my-project",
108
+ credentials=creds,
109
+ location="us-central1",
110
+ client_info=client_info.ClientInfo(user_agent="sqlframe"),
111
+ )
112
+
113
+ conn = connect(client=client)
114
+ activate("bigquery", conn=conn, config={"default_dataset": "sqlframe.db1"})
115
+
116
+ from pyspark.sql import SparkSession
117
+ session = SparkSession.builder.getOrCreate()
118
+ ```
63
119
 
64
120
  ## Using BigQuery Unique Functions
65
121
 
@@ -24,6 +24,53 @@ In this configuration, you can use BigQuery syntax for elements such as date for
24
24
 
25
25
  SQLFrame supports multiple dialects, all of which can be specific as the `input_dialect` and `output_dialect`.
26
26
 
27
+ ## Activating SQLFrame
28
+
29
+ SQLFrame can be activated in order to replace `pyspark` imports with `sqlframe` imports for the given engine.
30
+ This allows you to use SQLFrame as a drop-in replacement for PySpark by just adding two lines of code.
31
+
32
+ ### Activate with Engine
33
+
34
+ If you just provide an engine to `activate` then it will create a connection for that engine with default settings (if the engine supports it).
35
+
36
+ ```python
37
+
38
+ from sqlframe import activate
39
+ activate("duckdb")
40
+
41
+ from pyspark.sql import SparkSession
42
+ spark = SparkSession.builder.getOrCreate()
43
+ # "spark" is not a SQLFrame DuckDBSession and will run directly on DuckDB
44
+ ```
45
+
46
+ ### Activate with Connection
47
+
48
+ If you provide a connection to `activate` then it will use that connection for the engine.
49
+
50
+ ```python
51
+ import duckdb
52
+ from sqlframe import activate
53
+ connection = duckdb.connect("file.duckdb")
54
+ activate("duckdb", conn=connection)
55
+
56
+ from pyspark.sql import SparkSession
57
+ spark = SparkSession.builder.getOrCreate()
58
+ # "spark" is a SQLFrame DuckDBSession and will run directly on DuckDB using `file.duckdb` for persistence
59
+ ```
60
+
61
+ ### Activate with Configuration
62
+
63
+ If you provide a configuration to `activate` then it will use that configuration to create a connection for the engine.
64
+
65
+ ```python
66
+ from sqlframe import activate
67
+ activate("duckdb", config={"sqlframe.input.dialect": "duckdb"})
68
+
69
+ from pyspark.sql import SparkSession
70
+ spark = SparkSession.builder.getOrCreate()
71
+ # "spark" is a SQLFrame DuckDBSession and will run directly on DuckDB with input dialect set to DuckDB
72
+ ```
73
+
27
74
  ## Generated SQL
28
75
 
29
76
  ### Pretty
@@ -6,6 +6,46 @@
6
6
  pip install "sqlframe[duckdb]"
7
7
  ```
8
8
 
9
+ ## Enabling SQLFrame
10
+
11
+ SQLFrame can be used in two ways:
12
+
13
+ * Directly importing the `sqlframe.duckdb` package
14
+ * Using the [activate](./configuration.md#activating-sqlframe) function to allow for continuing to use `pyspark.sql` but have it use SQLFrame behind the scenes.
15
+
16
+ ### Import
17
+
18
+ If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.duckdb`.
19
+ In addition, many classes will have a `DuckDB` prefix.
20
+ For example, `DuckDBDataFrame` instead of `DataFrame`.
21
+
22
+
23
+ ```python
24
+ # PySpark import
25
+ # from pyspark.sql import SparkSession
26
+ # from pyspark.sql import functions as F
27
+ # from pyspark.sql.dataframe import DataFrame
28
+ # SQLFrame import
29
+ from sqlframe.duckdb import DuckDBSession
30
+ from sqlframe.duckdb import functions as F
31
+ from sqlframe.duckdb import DuckDBDataFrame
32
+ ```
33
+
34
+ ### Activate
35
+
36
+ If you would like to continue using `pyspark.sql` but have it use SQLFrame behind the scenes, you can use the [activate](./configuration.md#activating-sqlframe) function.
37
+
38
+ ```python
39
+ from sqlframe import activate
40
+ activate("duckdb")
41
+
42
+ from pyspark.sql import SparkSession
43
+ ```
44
+
45
+ `SparkSession` will now be a SQLFrame `DuckDBSession` object and everything will be run on DuckDB directly.
46
+
47
+ See [activate configuration](./configuration.md#activating-sqlframe) for information on how to pass in a connection and config options.
48
+
9
49
  ## Creating a Session
10
50
 
11
51
  SQLFrame uses the `duckdb` package to connect to DuckDB.
@@ -13,7 +53,7 @@ A DuckDBSession, which implements the PySpark Session API, can be created by pas
13
53
  By default, SQLFrame will create a connection to an in-memory database.
14
54
 
15
55
 
16
- === "Without Providing Connection"
56
+ === "Import + Without Providing Connection"
17
57
 
18
58
  ```python
19
59
  from sqlframe.duckdb import DuckDBSession
@@ -21,7 +61,7 @@ By default, SQLFrame will create a connection to an in-memory database.
21
61
  session = DuckDBSession()
22
62
  ```
23
63
 
24
- === "With Providing Connection"
64
+ === "Import + With Providing Connection"
25
65
 
26
66
  ```python
27
67
  import duckdb
@@ -30,23 +70,30 @@ By default, SQLFrame will create a connection to an in-memory database.
30
70
  conn = duckdb.connect(database=":memory:")
31
71
  session = DuckDBSession(conn=conn)
32
72
  ```
33
- ## Imports
34
73
 
35
- If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.duckdb`.
36
- In addition, many classes will have a `DuckDB` prefix.
37
- For example, `DuckDBDataFrame` instead of `DataFrame`.
74
+ === "Activate + Without Providing Connection"
38
75
 
76
+ ```python
77
+ from sqlframe import activate
78
+ activate("duckdb")
39
79
 
40
- ```python
41
- # PySpark import
42
- # from pyspark.sql import SparkSession
43
- # from pyspark.sql import functions as F
44
- # from pyspark.sql.dataframe import DataFrame
45
- # SQLFrame import
46
- from sqlframe.duckdb import DuckDBSession
47
- from sqlframe.duckdb import functions as F
48
- from sqlframe.duckdb import DuckDBDataFrame
49
- ```
80
+ from pyspark.sql import SparkSession
81
+
82
+ session = SparkSession.builder.getOrCreate()
83
+ ```
84
+
85
+ === "Activate + With Providing Connection"
86
+
87
+ ```python
88
+ import duckdb
89
+ from sqlframe import activate
90
+ conn = duckdb.connect(database=":memory:")
91
+ activate("duckdb", conn=conn)
92
+
93
+ from pyspark.sql import SparkSession
94
+
95
+ session = SparkSession.builder.getOrCreate()
96
+ ```
50
97
 
51
98
  ## Using DuckDB Unique Functions
52
99
 
@@ -202,6 +249,8 @@ See something that you would like to see supported? [Open an issue](https://gith
202
249
  * sql
203
250
  * SQLFrame Specific: Get the SQL representation of the WindowSpec
204
251
  * [stat](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.stat.html)
252
+ * [toArrow](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.toArrow.html)
253
+ * SQLFrame Specific Argument: `batch_size` sets the number of rows to read per-batch and returns a `RecrodBatchReader`
205
254
  * [toDF](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.toDF.html)
206
255
  * [toPandas](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.toPandas.html)
207
256
  * [union](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.union.html)
@@ -6,26 +6,14 @@
6
6
  pip install "sqlframe[postgres]"
7
7
  ```
8
8
 
9
- ## Creating a Session
9
+ ## Enabling SQLFrame
10
10
 
11
- SQLFrame uses the `psycopg2` package to connect to Postgres.
12
- A PostgresSession, which implements the PySpark Session API, is created by passing in a `psycopg2.Connection` object.
13
-
14
- ```python
15
- from psycopg2 import connect
16
- from sqlframe.postgres import PostgresSession
11
+ SQLFrame can be used in two ways:
17
12
 
18
- conn = connect(
19
- dbname="postgres",
20
- user="postgres",
21
- password="password",
22
- host="localhost",
23
- port="5432",
24
- )
25
- session = PostgresSession(conn=conn)
26
- ```
13
+ * Directly importing the `sqlframe.postgres` package
14
+ * Using the [activate](./configuration.md#activating-sqlframe) function to allow for continuing to use `pyspark.sql` but have it use SQLFrame behind the scenes.
27
15
 
28
- ## Imports
16
+ ### Import
29
17
 
30
18
  If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.postgres`.
31
19
  In addition, many classes will have a `Postgres` prefix.
@@ -43,6 +31,69 @@ from sqlframe.postgres import functions as F
43
31
  from sqlframe.postgres import PostgresDataFrame
44
32
  ```
45
33
 
34
+ ### Activate
35
+
36
+ If you would like to continue using `pyspark.sql` but have it use SQLFrame behind the scenes, you can use the [activate](./configuration.md#activating-sqlframe) function.
37
+
38
+ ```python
39
+ from psycopg2 import connect
40
+ from sqlframe import activate
41
+ conn = connect(
42
+ dbname="postgres",
43
+ user="postgres",
44
+ password="password",
45
+ host="localhost",
46
+ port="5432",
47
+ )
48
+ activate("postgres", conn=conn)
49
+
50
+ from pyspark.sql import SparkSession
51
+ ```
52
+
53
+ `SparkSession` will now be a SQLFrame `PostgresSession` object and everything will be run on Postgres directly.
54
+
55
+ See [activate configuration](./configuration.md#activating-sqlframe) for information on how to pass in a connection and config options.
56
+
57
+ ## Creating a Session
58
+
59
+ SQLFrame uses the `psycopg2` package to connect to Postgres.
60
+ A PostgresSession, which implements the PySpark Session API, is created by passing in a `psycopg2.Connection` object.
61
+
62
+ === "Import"
63
+
64
+ ```python
65
+ from psycopg2 import connect
66
+ from sqlframe.postgres import PostgresSession
67
+
68
+ conn = connect(
69
+ dbname="postgres",
70
+ user="postgres",
71
+ password="password",
72
+ host="localhost",
73
+ port="5432",
74
+ )
75
+ session = PostgresSession(conn=conn)
76
+ ```
77
+
78
+ === "Activate"
79
+
80
+ ```python
81
+ from sqlframe import activate
82
+
83
+ conn = connect(
84
+ dbname="postgres",
85
+ user="postgres",
86
+ password="password",
87
+ host="localhost",
88
+ port="5432",
89
+ )
90
+ activate("postgres", conn=conn)
91
+
92
+ from pyspark.sql import SparkSession
93
+ session = SparkSession.builder.getOrCreate()
94
+ ```
95
+
96
+
46
97
  ## Using Postgres Unique Functions
47
98
 
48
99
  Postgres may have a function that isn't represented within the PySpark API.