sqlframe 1.2.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. {sqlframe-1.2.0 → sqlframe-1.4.0}/Makefile +2 -2
  2. {sqlframe-1.2.0 → sqlframe-1.4.0}/PKG-INFO +14 -6
  3. {sqlframe-1.2.0 → sqlframe-1.4.0}/README.md +11 -5
  4. sqlframe-1.4.0/docs/configuration.md +242 -0
  5. {sqlframe-1.2.0 → sqlframe-1.4.0}/mkdocs.yml +1 -0
  6. {sqlframe-1.2.0 → sqlframe-1.4.0}/setup.py +8 -5
  7. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/_version.py +2 -2
  8. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/column.py +7 -3
  9. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/dataframe.py +94 -7
  10. sqlframe-1.4.0/sqlframe/base/decorators.py +53 -0
  11. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/mixins/catalog_mixins.py +1 -1
  12. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/mixins/readwriter_mixins.py +4 -3
  13. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/readerwriter.py +3 -0
  14. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/session.py +6 -9
  15. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/util.py +38 -1
  16. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/catalog.py +3 -1
  17. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/session.py +31 -0
  18. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/session.py +3 -1
  19. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe.egg-info/PKG-INFO +14 -6
  20. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe.egg-info/SOURCES.txt +2 -0
  21. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe.egg-info/requires.txt +8 -5
  22. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/snowflake/test_snowflake_session.py +2 -2
  23. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/test_int_functions.py +4 -0
  24. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_session.py +1 -1
  25. sqlframe-1.4.0/tests/unit/test_util.py +26 -0
  26. sqlframe-1.2.0/sqlframe/base/decorators.py +0 -51
  27. {sqlframe-1.2.0 → sqlframe-1.4.0}/.github/CODEOWNERS +0 -0
  28. {sqlframe-1.2.0 → sqlframe-1.4.0}/.github/workflows/main.workflow.yaml +0 -0
  29. {sqlframe-1.2.0 → sqlframe-1.4.0}/.github/workflows/publish.workflow.yaml +0 -0
  30. {sqlframe-1.2.0 → sqlframe-1.4.0}/.gitignore +0 -0
  31. {sqlframe-1.2.0 → sqlframe-1.4.0}/.pre-commit-config.yaml +0 -0
  32. {sqlframe-1.2.0 → sqlframe-1.4.0}/.readthedocs.yaml +0 -0
  33. {sqlframe-1.2.0 → sqlframe-1.4.0}/LICENSE +0 -0
  34. {sqlframe-1.2.0 → sqlframe-1.4.0}/blogs/images/but_wait_theres_more.gif +0 -0
  35. {sqlframe-1.2.0 → sqlframe-1.4.0}/blogs/images/cake.gif +0 -0
  36. {sqlframe-1.2.0 → sqlframe-1.4.0}/blogs/images/you_get_pyspark_api.gif +0 -0
  37. {sqlframe-1.2.0 → sqlframe-1.4.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
  38. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/bigquery.md +0 -0
  39. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/bigquery.md +0 -0
  40. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/duckdb.md +0 -0
  41. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/images/SF.png +0 -0
  42. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/images/favicon.png +0 -0
  43. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/images/favicon_old.png +0 -0
  44. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/images/sqlframe_diagram.png +0 -0
  45. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/images/sqlframe_logo.png +0 -0
  46. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/docs/postgres.md +0 -0
  47. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/duckdb.md +0 -0
  48. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/images/SF.png +0 -0
  49. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/images/favicon.png +0 -0
  50. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/images/favicon_old.png +0 -0
  51. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/images/sqlframe_diagram.png +0 -0
  52. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/images/sqlframe_logo.png +0 -0
  53. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/index.md +0 -0
  54. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/postgres.md +0 -0
  55. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/requirements.txt +0 -0
  56. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/standalone.md +0 -0
  57. {sqlframe-1.2.0 → sqlframe-1.4.0}/docs/stylesheets/extra.css +0 -0
  58. {sqlframe-1.2.0 → sqlframe-1.4.0}/pytest.ini +0 -0
  59. {sqlframe-1.2.0 → sqlframe-1.4.0}/renovate.json +0 -0
  60. {sqlframe-1.2.0 → sqlframe-1.4.0}/setup.cfg +0 -0
  61. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/LICENSE +0 -0
  62. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/__init__.py +0 -0
  63. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/__init__.py +0 -0
  64. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/_typing.py +0 -0
  65. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/catalog.py +0 -0
  66. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/exceptions.py +0 -0
  67. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/function_alternatives.py +0 -0
  68. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/functions.py +0 -0
  69. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/group.py +0 -0
  70. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/mixins/__init__.py +0 -0
  71. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/mixins/dataframe_mixins.py +0 -0
  72. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/normalize.py +0 -0
  73. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/operations.py +0 -0
  74. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/transforms.py +0 -0
  75. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/types.py +0 -0
  76. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/base/window.py +0 -0
  77. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/__init__.py +0 -0
  78. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/catalog.py +0 -0
  79. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/column.py +0 -0
  80. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/dataframe.py +0 -0
  81. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/functions.py +0 -0
  82. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/functions.pyi +0 -0
  83. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/group.py +0 -0
  84. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/readwriter.py +0 -0
  85. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/session.py +0 -0
  86. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/types.py +0 -0
  87. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/bigquery/window.py +0 -0
  88. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/__init__.py +0 -0
  89. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/catalog.py +0 -0
  90. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/column.py +0 -0
  91. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/dataframe.py +0 -0
  92. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/functions.py +0 -0
  93. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/functions.pyi +0 -0
  94. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/group.py +0 -0
  95. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/readwriter.py +0 -0
  96. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/session.py +0 -0
  97. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/types.py +0 -0
  98. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/duckdb/window.py +0 -0
  99. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/__init__.py +0 -0
  100. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/catalog.py +0 -0
  101. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/column.py +0 -0
  102. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/dataframe.py +0 -0
  103. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/functions.py +0 -0
  104. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/functions.pyi +0 -0
  105. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/group.py +0 -0
  106. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/readwriter.py +0 -0
  107. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/session.py +0 -0
  108. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/types.py +0 -0
  109. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/postgres/window.py +0 -0
  110. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/__init__.py +0 -0
  111. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/catalog.py +0 -0
  112. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/column.py +0 -0
  113. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/dataframe.py +0 -0
  114. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/functions.py +0 -0
  115. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/group.py +0 -0
  116. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/readwriter.py +0 -0
  117. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/session.py +0 -0
  118. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/types.py +0 -0
  119. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/redshift/window.py +0 -0
  120. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/__init__.py +0 -0
  121. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/column.py +0 -0
  122. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/dataframe.py +0 -0
  123. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/functions.py +0 -0
  124. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/group.py +0 -0
  125. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/readwriter.py +0 -0
  126. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/types.py +0 -0
  127. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/snowflake/window.py +0 -0
  128. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/__init__.py +0 -0
  129. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/catalog.py +0 -0
  130. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/column.py +0 -0
  131. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/dataframe.py +0 -0
  132. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/functions.py +0 -0
  133. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/group.py +0 -0
  134. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/readwriter.py +0 -0
  135. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/types.py +0 -0
  136. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/spark/window.py +0 -0
  137. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/__init__.py +0 -0
  138. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/catalog.py +0 -0
  139. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/column.py +0 -0
  140. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/dataframe.py +0 -0
  141. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/functions.py +0 -0
  142. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/group.py +0 -0
  143. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/readwriter.py +0 -0
  144. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/session.py +0 -0
  145. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/types.py +0 -0
  146. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe/standalone/window.py +0 -0
  147. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe.egg-info/dependency_links.txt +0 -0
  148. {sqlframe-1.2.0 → sqlframe-1.4.0}/sqlframe.egg-info/top_level.txt +0 -0
  149. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/__init__.py +0 -0
  150. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/common_fixtures.py +0 -0
  151. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/conftest.py +0 -0
  152. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/fixtures/employee.csv +0 -0
  153. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/fixtures/employee.json +0 -0
  154. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/fixtures/employee.parquet +0 -0
  155. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/fixtures/employee_extra_line.csv +0 -0
  156. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/__init__.py +0 -0
  157. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/__init__.py +0 -0
  158. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/bigquery/__init__.py +0 -0
  159. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
  160. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
  161. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/duck/__init__.py +0 -0
  162. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
  163. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/duck/test_duckdb_dataframe.py +0 -0
  164. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
  165. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
  166. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/postgres/__init__.py +0 -0
  167. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
  168. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/postgres/test_postgres_dataframe.py +0 -0
  169. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
  170. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/redshift/__init__.py +0 -0
  171. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
  172. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
  173. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/snowflake/__init__.py +0 -0
  174. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
  175. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/spark/__init__.py +0 -0
  176. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
  177. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
  178. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/test_engine_reader.py +0 -0
  179. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/test_engine_session.py +0 -0
  180. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/engines/test_engine_writer.py +0 -0
  181. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/fixtures.py +0 -0
  182. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/test_int_dataframe.py +0 -0
  183. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/test_int_dataframe_stats.py +0 -0
  184. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/test_int_grouped_data.py +0 -0
  185. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/integration/test_int_session.py +0 -0
  186. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/types.py +0 -0
  187. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/__init__.py +0 -0
  188. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/__init__.py +0 -0
  189. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/fixtures.py +0 -0
  190. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_column.py +0 -0
  191. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_dataframe.py +0 -0
  192. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
  193. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_functions.py +0 -0
  194. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
  195. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_types.py +0 -0
  196. {sqlframe-1.2.0 → sqlframe-1.4.0}/tests/unit/standalone/test_window.py +0 -0
@@ -1,5 +1,5 @@
1
1
  install-dev:
2
- pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
2
+ pip install -e ".[bigquery,dev,docs,duckdb,pandas,postgres,redshift,snowflake,spark]"
3
3
 
4
4
  install-pre-commit:
5
5
  pre-commit install
@@ -8,7 +8,7 @@ slow-test:
8
8
  pytest -n auto tests
9
9
 
10
10
  fast-test:
11
- pytest -n auto -m "fast"
11
+ pytest -n auto tests/unit
12
12
 
13
13
  local-test:
14
14
  pytest -n auto -m "fast or local"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.2.0
3
+ Version: 1.4.0
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -20,6 +20,8 @@ Provides-Extra: bigquery
20
20
  Provides-Extra: dev
21
21
  Provides-Extra: docs
22
22
  Provides-Extra: duckdb
23
+ Provides-Extra: openai
24
+ Provides-Extra: pandas
23
25
  Provides-Extra: postgres
24
26
  Provides-Extra: redshift
25
27
  Provides-Extra: snowflake
@@ -27,19 +29,19 @@ Provides-Extra: spark
27
29
  License-File: LICENSE
28
30
 
29
31
  <div align="center">
30
- <img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
32
+ <img src="https://sqlframe.readthedocs.io/en/stable/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
31
33
  </div>
32
34
 
33
35
  SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
34
36
 
35
37
  SQLFrame currently supports the following engines (many more in development):
36
38
 
37
- * [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
38
- * [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
39
- * [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
39
+ * [BigQuery](https://sqlframe.readthedocs.io/en/stable/bigquery/)
40
+ * [DuckDB](https://sqlframe.readthedocs.io/en/stable/duckdb)
41
+ * [Postgres](https://sqlframe.readthedocs.io/en/stable/postgres)
40
42
 
41
43
  SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
42
- * [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
44
+ * [Standalone](https://sqlframe.readthedocs.io/en/stable/standalone)
43
45
 
44
46
  SQLFrame is great for:
45
47
 
@@ -62,6 +64,12 @@ pip install sqlframe
62
64
 
63
65
  See specific engine documentation for additional setup instructions.
64
66
 
67
+ ## Configuration
68
+
69
+ SQLFrame generates consistently accurate yet complex SQL for engine execution.
70
+ However, when using df.sql(), it produces more human-readable SQL.
71
+ For details on how to configure this output and leverage OpenAI to enhance the SQL, see [Generated SQL Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#generated-sql).
72
+
65
73
  ## Example Usage
66
74
 
67
75
  ```python
@@ -1,17 +1,17 @@
1
1
  <div align="center">
2
- <img src="https://sqlframe.readthedocs.io/en/latest/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
2
+ <img src="https://sqlframe.readthedocs.io/en/stable/docs/images/sqlframe_logo.png" alt="SQLFrame Logo" width="400"/>
3
3
  </div>
4
4
 
5
5
  SQLFrame implements the PySpark DataFrame API in order to enable running transformation pipelines directly on database engines - no Spark clusters or dependencies required.
6
6
 
7
7
  SQLFrame currently supports the following engines (many more in development):
8
8
 
9
- * [BigQuery](https://sqlframe.readthedocs.io/en/latest/bigquery/)
10
- * [DuckDB](https://sqlframe.readthedocs.io/en/latest/duckdb)
11
- * [Postgres](https://sqlframe.readthedocs.io/en/latest/postgres)
9
+ * [BigQuery](https://sqlframe.readthedocs.io/en/stable/bigquery/)
10
+ * [DuckDB](https://sqlframe.readthedocs.io/en/stable/duckdb)
11
+ * [Postgres](https://sqlframe.readthedocs.io/en/stable/postgres)
12
12
 
13
13
  SQLFrame also has a "Standalone" session that be used to generate SQL without any connection to a database engine.
14
- * [Standalone](https://sqlframe.readthedocs.io/en/latest/standalone)
14
+ * [Standalone](https://sqlframe.readthedocs.io/en/stable/standalone)
15
15
 
16
16
  SQLFrame is great for:
17
17
 
@@ -34,6 +34,12 @@ pip install sqlframe
34
34
 
35
35
  See specific engine documentation for additional setup instructions.
36
36
 
37
+ ## Configuration
38
+
39
+ SQLFrame generates consistently accurate yet complex SQL for engine execution.
40
+ However, when using df.sql(), it produces more human-readable SQL.
41
+ For details on how to configure this output and leverage OpenAI to enhance the SQL, see [Generated SQL Configuration](https://sqlframe.readthedocs.io/en/stable/configuration/#generated-sql).
42
+
37
43
  ## Example Usage
38
44
 
39
45
  ```python
@@ -0,0 +1,242 @@
1
+ # General Configuration
2
+
3
+ ## Generated SQL
4
+
5
+ ### Pretty
6
+
7
+ If the SQL should be returned in a "pretty" format meaning it has newlines and indentation. Defaults to `True`.
8
+
9
+ ```python
10
+ from sqlframe.standalone import StandaloneSession
11
+
12
+ session = StandaloneSession()
13
+
14
+ df = session.createDataFrame([{'a': 1, 'b': 2}])
15
+ ```
16
+ ```python
17
+ >>> print(df.sql())
18
+ SELECT
19
+ CAST(`a1`.`a` AS BIGINT) AS `a`,
20
+ CAST(`a1`.`b` AS BIGINT) AS `b`
21
+ FROM VALUES
22
+ (1, 2) AS `a1`(`a`, `b`)
23
+ ```
24
+ ```python
25
+ >>> print(df.sql(pretty=False))
26
+ SELECT CAST(`a3`.`a` AS BIGINT) AS `a`, CAST(`a3`.`b` AS BIGINT) AS `b` FROM VALUES (1, 2) AS `a3`(`a`, `b`)
27
+ ```
28
+
29
+ ### Optimized
30
+
31
+ Optimized SQL is SQL that has been processed by SQLGlot's optimizer. For complex queries this will significantly reduce the number of CTEs produced and remove extra unused columns. Defaults to `True`.
32
+
33
+ ```python
34
+ from sqlframe.bigquery import BigQuerySession
35
+ from sqlframe.bigquery import functions as F
36
+ from sqlframe.bigquery import Window
37
+
38
+ session = BigQuerySession()
39
+ table_path = "bigquery-public-data.samples.natality"
40
+ # Top 5 years with the greatest year-over-year % change in new families with single child
41
+ df = (
42
+ session.table(table_path)
43
+ .where(F.col("ever_born") == 1)
44
+ .groupBy("year")
45
+ .agg(F.count("*").alias("num_single_child_families"))
46
+ .withColumn(
47
+ "last_year_num_single_child_families",
48
+ F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
49
+ )
50
+ .withColumn(
51
+ "percent_change",
52
+ (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
53
+ / F.col("last_year_num_single_child_families")
54
+ )
55
+ .orderBy(F.abs(F.col("percent_change")).desc())
56
+ .select(
57
+ F.col("year").alias("year"),
58
+ F.format_number("num_single_child_families", 0).alias("new families single child"),
59
+ F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
60
+ )
61
+ .limit(5)
62
+ )
63
+ ```
64
+ ```python
65
+ >>> print(df.sql(optimize=True))
66
+ WITH `t94228042` AS (
67
+ SELECT
68
+ `natality`.`year` AS `year`,
69
+ COUNT(*) AS `num_single_child_families`
70
+ FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
71
+ WHERE
72
+ `natality`.`ever_born` = 1
73
+ GROUP BY
74
+ `natality`.`year`
75
+ ), `t30206548` AS (
76
+ SELECT
77
+ `t94228042`.`year` AS `year`,
78
+ `t94228042`.`num_single_child_families` AS `num_single_child_families`,
79
+ LAG(`t94228042`.`num_single_child_families`, 1) OVER (ORDER BY `t94228042`.`year`) AS `last_year_num_single_child_families`
80
+ FROM `t94228042` AS `t94228042`
81
+ )
82
+ SELECT
83
+ `t30206548`.`year` AS `year`,
84
+ FORMAT('%\'.0f', ROUND(CAST(`t30206548`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
85
+ FORMAT(
86
+ '%\'.2f',
87
+ ROUND(
88
+ CAST((
89
+ (
90
+ (
91
+ `t30206548`.`num_single_child_families` - `t30206548`.`last_year_num_single_child_families`
92
+ ) / `t30206548`.`last_year_num_single_child_families`
93
+ ) * 100
94
+ ) AS FLOAT64),
95
+ 2
96
+ )
97
+ ) AS `percent change`
98
+ FROM `t30206548` AS `t30206548`
99
+ ORDER BY
100
+ ABS(`percent_change`) DESC
101
+ LIMIT 5
102
+ ```
103
+ ```python
104
+ >>> print(df.sql(optimize=False))
105
+ WITH t14183493 AS (
106
+ SELECT
107
+ `source_year`,
108
+ `year`,
109
+ `month`,
110
+ `day`,
111
+ `wday`,
112
+ `state`,
113
+ `is_male`,
114
+ `child_race`,
115
+ `weight_pounds`,
116
+ `plurality`,
117
+ `apgar_1min`,
118
+ `apgar_5min`,
119
+ `mother_residence_state`,
120
+ `mother_race`,
121
+ `mother_age`,
122
+ `gestation_weeks`,
123
+ `lmp`,
124
+ `mother_married`,
125
+ `mother_birth_state`,
126
+ `cigarette_use`,
127
+ `cigarettes_per_day`,
128
+ `alcohol_use`,
129
+ `drinks_per_week`,
130
+ `weight_gain_pounds`,
131
+ `born_alive_alive`,
132
+ `born_alive_dead`,
133
+ `born_dead`,
134
+ `ever_born`,
135
+ `father_race`,
136
+ `father_age`,
137
+ `record_weight`
138
+ FROM bigquery-public-data.samples.natality
139
+ ), t17633417 AS (
140
+ SELECT
141
+ year,
142
+ COUNT(*) AS num_single_child_families
143
+ FROM t14183493
144
+ WHERE
145
+ ever_born = 1
146
+ GROUP BY
147
+ year
148
+ ), t32066970 AS (
149
+ SELECT
150
+ year,
151
+ num_single_child_families,
152
+ LAG(num_single_child_families, 1) OVER (ORDER BY year) AS last_year_num_single_child_families
153
+ FROM t17633417
154
+ ), t21362690 AS (
155
+ SELECT
156
+ year,
157
+ num_single_child_families,
158
+ last_year_num_single_child_families,
159
+ (
160
+ (
161
+ num_single_child_families - last_year_num_single_child_families
162
+ ) / last_year_num_single_child_families
163
+ ) AS percent_change
164
+ FROM t32066970
165
+ ORDER BY
166
+ ABS(percent_change) DESC
167
+ )
168
+ SELECT
169
+ year AS year,
170
+ FORMAT('%\'.0f', ROUND(CAST(num_single_child_families AS FLOAT64), 0)) AS `new families single child`,
171
+ FORMAT('%\'.2f', ROUND(CAST((
172
+ percent_change * 100
173
+ ) AS FLOAT64), 2)) AS `percent change`
174
+ FROM t21362690
175
+ LIMIT 5
176
+ ```
177
+
178
+ ### Override Dialect
179
+
180
+ The dialect of the generated SQL will be based on the session's dialect. However, you can override the dialect by passing a string to the `dialect` parameter. This is useful when you want to generate SQL for a different database.
181
+
182
+ ```python
183
+ # create session and `df` like normal
184
+ df.sql(dialect="bigquery")
185
+ ```
186
+
187
+ ### OpenAI Enrichment
188
+
189
+ OpenAI's models can be used to enrich the generated SQL to make it more human-like.
190
+ You can have it just provide more readable CTE names or you can have it try to make the whole SQL statement more readable.
191
+
192
+ #### Example
193
+
194
+ ```python
195
+ # create session and `df` like normal
196
+ # The model to use defaults to `gpt-4o` but can be changed by passing a string to the `openai_model` parameter.
197
+ >>> df.sql(openai_config={"mode": "cte_only", "model": "gpt-3.5-turbo"})
198
+ WITH `single_child_families_by_year` AS (
199
+ SELECT
200
+ `natality`.`year` AS `year`,
201
+ COUNT(*) AS `num_single_child_families`
202
+ FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
203
+ WHERE
204
+ `natality`.`ever_born` = 1
205
+ GROUP BY
206
+ `natality`.`year`
207
+ ), `families_with_percent_change` AS (
208
+ SELECT
209
+ `single_child_families_by_year`.`year` AS `year`,
210
+ `single_child_families_by_year`.`num_single_child_families` AS `num_single_child_families`,
211
+ LAG(`single_child_families_by_year`.`num_single_child_families`, 1) OVER (ORDER BY `single_child_families_by_year`.`year`) AS `last_year_num_single_child_families`
212
+ FROM `single_child_families_by_year` AS `single_child_families_by_year`
213
+ )
214
+ SELECT
215
+ `families_with_percent_change`.`year` AS `year`,
216
+ FORMAT('%\'.0f', ROUND(CAST(`families_with_percent_change`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
217
+ FORMAT(
218
+ '%\'.2f',
219
+ ROUND(
220
+ CAST((
221
+ (
222
+ (
223
+ `families_with_percent_change`.`num_single_child_families` - `families_with_percent_change`.`last_year_num_single_child_families`
224
+ ) / `families_with_percent_change`.`last_year_num_single_child_families`
225
+ ) * 100
226
+ ) AS FLOAT64),
227
+ 2
228
+ )
229
+ ) AS `percent change`
230
+ FROM `families_with_percent_change` AS `families_with_percent_change`
231
+ ORDER BY
232
+ ABS(`percent_change`) DESC
233
+ LIMIT 5
234
+ ```
235
+
236
+ #### Parameters
237
+
238
+ | Parameter | Description | Default |
239
+ |-------------------|-----------------------------------------------------------------------|------------|
240
+ | `mode` | The mode to use. Can be `cte_only` or `full`. | `cte_only` |
241
+ | `model` | The OpenAI model to use. Note: The default may change in new releases | `gpt-4o` |
242
+ | `prompt_override` | A string to use to override the default prompt. | None |
@@ -7,6 +7,7 @@ nav:
7
7
  - "DuckDB": duckdb.md
8
8
  - "Postgres": postgres.md
9
9
  - "Standalone": standalone.md
10
+ - "Configuration": configuration.md
10
11
  theme:
11
12
  name: material
12
13
  logo: images/SF.png
@@ -26,11 +26,11 @@ setup(
26
26
  "bigquery": [
27
27
  "google-cloud-bigquery[pandas]>=3,<4",
28
28
  "google-cloud-bigquery-storage>=2,<3",
29
- "pandas>=2,<3",
30
29
  ],
31
30
  "dev": [
32
31
  "duckdb>=0.9,<0.11",
33
32
  "mypy>=1.10.0,<1.11",
33
+ "openai>=1.30,<1.31",
34
34
  "pandas>=2,<3",
35
35
  "pandas-stubs>=2,<3",
36
36
  "psycopg>=3.1,<4",
@@ -56,17 +56,20 @@ setup(
56
56
  "duckdb>=0.9,<0.11",
57
57
  "pandas>=2,<3",
58
58
  ],
59
- "postgres": [
59
+ "openai": [
60
+ "openai>=1.30,<1.31",
61
+ ],
62
+ "pandas": [
60
63
  "pandas>=2,<3",
64
+ ],
65
+ "postgres": [
61
66
  "psycopg2>=2.8,<3",
62
67
  ],
63
68
  "redshift": [
64
- "pandas>=2,<3",
65
69
  "redshift_connector>=2.1.1,<2.2.0",
66
70
  ],
67
71
  "snowflake": [
68
- "pandas>=2,<3",
69
- "snowflake-connector-python[pandas,secure-local-storage]>=3.10.0,<3.11",
72
+ "snowflake-connector-python[secure-local-storage]>=3.10.0,<3.11",
70
73
  ],
71
74
  "spark": [
72
75
  "pyspark>=2,<3.6",
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.2.0'
16
- __version_tuple__ = version_tuple = (1, 2, 0)
15
+ __version__ = version = '1.4.0'
16
+ __version_tuple__ = version_tuple = (1, 4, 0)
@@ -9,9 +9,11 @@ import typing as t
9
9
  import sqlglot
10
10
  from sqlglot import expressions as exp
11
11
  from sqlglot.helper import flatten, is_iterable
12
+ from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
12
13
 
14
+ from sqlframe.base.decorators import normalize
13
15
  from sqlframe.base.types import DataType
14
- from sqlframe.base.util import get_func_from_session
16
+ from sqlframe.base.util import get_func_from_session, quote_preserving_alias_or_name
15
17
 
16
18
  if t.TYPE_CHECKING:
17
19
  from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
@@ -237,7 +239,7 @@ class Column:
237
239
 
238
240
  @property
239
241
  def alias_or_name(self) -> str:
240
- return self.expression.alias_or_name
242
+ return quote_preserving_alias_or_name(self.expression) # type: ignore
241
243
 
242
244
  @classmethod
243
245
  def ensure_literal(cls, value) -> Column:
@@ -266,7 +268,9 @@ class Column:
266
268
  from sqlframe.base.session import _BaseSession
267
269
 
268
270
  dialect = _BaseSession().input_dialect
269
- alias: exp.Expression = exp.parse_identifier(name, dialect=dialect)
271
+ alias: exp.Expression = normalize_identifiers(
272
+ exp.parse_identifier(name, dialect=dialect), dialect=dialect
273
+ )
270
274
  new_expression = exp.Alias(
271
275
  this=self.column_expression,
272
276
  alias=alias.this if isinstance(alias, exp.Column) else alias,
@@ -2,26 +2,34 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import enum
5
6
  import functools
6
7
  import itertools
8
+ import json
7
9
  import logging
8
10
  import sys
9
11
  import typing as t
10
12
  import zlib
11
13
  from copy import copy
14
+ from dataclasses import dataclass
12
15
 
13
16
  import sqlglot
14
17
  from prettytable import PrettyTable
15
18
  from sqlglot import Dialect
16
19
  from sqlglot import expressions as exp
17
20
  from sqlglot.helper import ensure_list, object_to_dict, seq_get
21
+ from sqlglot.optimizer.pushdown_projections import pushdown_projections
22
+ from sqlglot.optimizer.qualify import qualify
18
23
  from sqlglot.optimizer.qualify_columns import quote_identifiers
19
24
 
25
+ from sqlframe.base.decorators import normalize
20
26
  from sqlframe.base.operations import Operation, operation
21
27
  from sqlframe.base.transforms import replace_id_value
22
28
  from sqlframe.base.util import (
23
29
  get_func_from_session,
24
30
  get_tables_from_expression_with_join,
31
+ quote_preserving_alias_or_name,
32
+ verify_openai_installed,
25
33
  )
26
34
 
27
35
  if sys.version_info >= (3, 11):
@@ -70,6 +78,46 @@ JOIN_HINTS = {
70
78
  DF = t.TypeVar("DF", bound="_BaseDataFrame")
71
79
 
72
80
 
81
+ class OpenAIMode(enum.Enum):
82
+ CTE_ONLY = "cte_only"
83
+ FULL = "full"
84
+
85
+ @property
86
+ def is_cte_only(self) -> bool:
87
+ return self == OpenAIMode.CTE_ONLY
88
+
89
+ @property
90
+ def is_full(self) -> bool:
91
+ return self == OpenAIMode.FULL
92
+
93
+
94
+ @dataclass
95
+ class OpenAIConfig:
96
+ mode: OpenAIMode = OpenAIMode.CTE_ONLY
97
+ model: str = "gpt-4o"
98
+ prompt_override: t.Optional[str] = None
99
+
100
+ @classmethod
101
+ def from_dict(cls, config: t.Dict[str, t.Any]) -> OpenAIConfig:
102
+ if "mode" in config:
103
+ config["mode"] = OpenAIMode(config["mode"].lower())
104
+ return cls(**config)
105
+
106
+ def get_prompt(self, dialect: Dialect) -> str:
107
+ if self.prompt_override:
108
+ return self.prompt_override
109
+ if self.mode.is_cte_only:
110
+ return f"You are a backend tool that creates unique CTE alias names match what a human would write and in snake case. You respond without code blocks and only a json payload with the key being the CTE name that is being replaced and the value being the new CTE human readable name."
111
+ return f"""
112
+ You are a backend tool that converts correct {dialect} SQL to simplified and more human readable version.
113
+ You respond without code block with rewritten {dialect} SQL.
114
+ You don't change any column names in the final select because the user expects those to remain the same.
115
+ You make unique CTE alias names match what a human would write and in snake case.
116
+ You improve formatting with spacing and line-breaks.
117
+ You remove redundant parenthesis and aliases.
118
+ When remove extra quotes, make sure to keep quotes around words that could be reserved words"""
119
+
120
+
73
121
  class _BaseDataFrameNaFunctions(t.Generic[DF]):
74
122
  def __init__(self, df: DF):
75
123
  self.df = df
@@ -410,7 +458,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
410
458
 
411
459
  outer_select = item.find(exp.Select)
412
460
  if outer_select:
413
- return [col(x.alias_or_name) for x in outer_select.expressions]
461
+ return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
414
462
  return []
415
463
 
416
464
  def _create_hash_from_expression(self, expression: exp.Expression) -> str:
@@ -471,6 +519,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
471
519
  dialect: DialectType = None,
472
520
  optimize: bool = True,
473
521
  pretty: bool = True,
522
+ openai_config: t.Optional[t.Union[t.Dict[str, t.Any], OpenAIConfig]] = None,
474
523
  as_list: bool = False,
475
524
  **kwargs,
476
525
  ) -> t.Union[str, t.List[str]]:
@@ -480,6 +529,11 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
480
529
  select_expressions = df._get_select_expressions()
481
530
  output_expressions: t.List[t.Union[exp.Select, exp.Cache, exp.Drop]] = []
482
531
  replacement_mapping: t.Dict[exp.Identifier, exp.Identifier] = {}
532
+ openai_config = (
533
+ OpenAIConfig.from_dict(openai_config)
534
+ if openai_config is not None and isinstance(openai_config, dict)
535
+ else openai_config
536
+ )
483
537
 
484
538
  for expression_type, select_expression in select_expressions:
485
539
  select_expression = select_expression.transform(
@@ -490,6 +544,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
490
544
  select_expression = t.cast(
491
545
  exp.Select, self.session._optimize(select_expression, dialect=dialect)
492
546
  )
547
+ elif openai_config:
548
+ qualify(select_expression, dialect=dialect, schema=self.session.catalog._schema)
549
+ pushdown_projections(select_expression, schema=self.session.catalog._schema)
493
550
 
494
551
  select_expression = df._replace_cte_names_with_hashes(select_expression)
495
552
 
@@ -505,7 +562,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
505
562
  self.session.catalog.add_table(
506
563
  cache_table_name,
507
564
  {
508
- expression.alias_or_name: expression.type.sql(dialect=dialect)
565
+ quote_preserving_alias_or_name(expression): expression.type.sql(
566
+ dialect=dialect
567
+ )
509
568
  if expression.type
510
569
  else "UNKNOWN"
511
570
  for expression in select_expression.expressions
@@ -541,10 +600,37 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
541
600
 
542
601
  output_expressions.append(expression)
543
602
 
544
- results = [
545
- expression.sql(dialect=dialect, pretty=pretty, **kwargs)
546
- for expression in output_expressions
547
- ]
603
+ results = []
604
+ for expression in output_expressions:
605
+ sql = expression.sql(dialect=dialect, pretty=pretty, **kwargs)
606
+ if openai_config:
607
+ assert isinstance(openai_config, OpenAIConfig)
608
+ verify_openai_installed()
609
+ from openai import OpenAI
610
+
611
+ client = OpenAI()
612
+ chat_completed = client.chat.completions.create(
613
+ messages=[
614
+ { # type: ignore
615
+ "role": "system",
616
+ "content": openai_config.get_prompt(dialect),
617
+ },
618
+ {
619
+ "role": "user",
620
+ "content": sql,
621
+ },
622
+ ],
623
+ model=openai_config.model,
624
+ )
625
+ assert chat_completed.choices[0].message.content is not None
626
+ if openai_config.mode.is_cte_only:
627
+ cte_replacement_mapping = json.loads(chat_completed.choices[0].message.content)
628
+ for old_name, new_name in cte_replacement_mapping.items():
629
+ sql = sql.replace(old_name, new_name)
630
+ else:
631
+ sql = chat_completed.choices[0].message.content
632
+ results.append(sql)
633
+
548
634
  if as_list:
549
635
  return results
550
636
  return ";\n".join(results)
@@ -688,7 +774,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
688
774
  join_expression = self._add_ctes_to_expression(join_expression, other_df.expression.ctes)
689
775
  self_columns = self._get_outer_select_columns(join_expression)
690
776
  other_columns = self._get_outer_select_columns(other_df.expression)
691
- join_columns = self._ensure_list_of_columns(on)
777
+ join_columns = self._ensure_and_normalize_cols(on)
692
778
  # Determines the join clause and select columns to be used passed on what type of columns were provided for
693
779
  # the join. The columns returned changes based on how the on expression is provided.
694
780
  if how != "cross":
@@ -1324,6 +1410,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1324
1410
  assert sqls[-1] is not None
1325
1411
  return self.session._fetchdf(sqls[-1])
1326
1412
 
1413
+ @normalize("name")
1327
1414
  def createOrReplaceTempView(self, name: str) -> None:
1328
1415
  self.session.temp_views[name] = self.copy()._convert_leaf_to_cte()
1329
1416
 
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import typing as t
5
+
6
+ from sqlglot import parse_one
7
+ from sqlglot.helper import ensure_list
8
+ from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
9
+
10
+ if t.TYPE_CHECKING:
11
+ from sqlframe.base.catalog import _BaseCatalog
12
+
13
+ CALLING_CLASS = t.TypeVar("CALLING_CLASS")
14
+
15
+
16
+ def normalize(normalize_kwargs: t.Union[str, t.List[str]]) -> t.Callable[[t.Callable], t.Callable]:
17
+ """
18
+ Decorator used to normalize identifiers in the kwargs of a method.
19
+ """
20
+
21
+ def decorator(func: t.Callable) -> t.Callable:
22
+ @functools.wraps(func)
23
+ def wrapper(self: CALLING_CLASS, *args, **kwargs) -> CALLING_CLASS:
24
+ from sqlframe.base.session import _BaseSession
25
+
26
+ input_dialect = _BaseSession().input_dialect
27
+ kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
28
+ for kwarg in ensure_list(normalize_kwargs):
29
+ if kwarg in kwargs:
30
+ value = kwargs.get(kwarg)
31
+ if value:
32
+ expression = (
33
+ parse_one(value, dialect=input_dialect)
34
+ if isinstance(value, str)
35
+ else value
36
+ )
37
+ kwargs[kwarg] = normalize_identifiers(expression, input_dialect).sql(
38
+ dialect=input_dialect
39
+ )
40
+ return func(self, **kwargs)
41
+
42
+ wrapper.__wrapped__ = func # type: ignore
43
+ return wrapper
44
+
45
+ return decorator
46
+
47
+
48
+ def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None) -> t.Callable:
49
+ def _metadata(func: t.Callable) -> t.Callable:
50
+ func.unsupported_engines = ensure_list(unsupported_engines) if unsupported_engines else [] # type: ignore
51
+ return func
52
+
53
+ return _metadata