sqlframe 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. {sqlframe-1.2.0 → sqlframe-1.3.0}/Makefile +2 -2
  2. {sqlframe-1.2.0 → sqlframe-1.3.0}/PKG-INFO +3 -1
  3. sqlframe-1.3.0/docs/configuration.md +229 -0
  4. {sqlframe-1.2.0 → sqlframe-1.3.0}/mkdocs.yml +1 -0
  5. {sqlframe-1.2.0 → sqlframe-1.3.0}/setup.py +8 -5
  6. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/_version.py +2 -2
  7. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/column.py +7 -3
  8. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/dataframe.py +50 -7
  9. sqlframe-1.3.0/sqlframe/base/decorators.py +53 -0
  10. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/mixins/catalog_mixins.py +1 -1
  11. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/mixins/readwriter_mixins.py +4 -3
  12. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/readerwriter.py +3 -0
  13. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/session.py +6 -9
  14. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/util.py +38 -1
  15. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/catalog.py +3 -1
  16. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/session.py +31 -0
  17. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/session.py +3 -1
  18. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe.egg-info/PKG-INFO +3 -1
  19. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe.egg-info/SOURCES.txt +2 -0
  20. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe.egg-info/requires.txt +8 -5
  21. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/snowflake/test_snowflake_session.py +2 -2
  22. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/test_int_functions.py +4 -0
  23. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_session.py +1 -1
  24. sqlframe-1.3.0/tests/unit/test_util.py +26 -0
  25. sqlframe-1.2.0/sqlframe/base/decorators.py +0 -51
  26. {sqlframe-1.2.0 → sqlframe-1.3.0}/.github/CODEOWNERS +0 -0
  27. {sqlframe-1.2.0 → sqlframe-1.3.0}/.github/workflows/main.workflow.yaml +0 -0
  28. {sqlframe-1.2.0 → sqlframe-1.3.0}/.github/workflows/publish.workflow.yaml +0 -0
  29. {sqlframe-1.2.0 → sqlframe-1.3.0}/.gitignore +0 -0
  30. {sqlframe-1.2.0 → sqlframe-1.3.0}/.pre-commit-config.yaml +0 -0
  31. {sqlframe-1.2.0 → sqlframe-1.3.0}/.readthedocs.yaml +0 -0
  32. {sqlframe-1.2.0 → sqlframe-1.3.0}/LICENSE +0 -0
  33. {sqlframe-1.2.0 → sqlframe-1.3.0}/README.md +0 -0
  34. {sqlframe-1.2.0 → sqlframe-1.3.0}/blogs/images/but_wait_theres_more.gif +0 -0
  35. {sqlframe-1.2.0 → sqlframe-1.3.0}/blogs/images/cake.gif +0 -0
  36. {sqlframe-1.2.0 → sqlframe-1.3.0}/blogs/images/you_get_pyspark_api.gif +0 -0
  37. {sqlframe-1.2.0 → sqlframe-1.3.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
  38. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/bigquery.md +0 -0
  39. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/bigquery.md +0 -0
  40. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/duckdb.md +0 -0
  41. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/images/SF.png +0 -0
  42. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/images/favicon.png +0 -0
  43. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/images/favicon_old.png +0 -0
  44. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/images/sqlframe_diagram.png +0 -0
  45. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/images/sqlframe_logo.png +0 -0
  46. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/docs/postgres.md +0 -0
  47. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/duckdb.md +0 -0
  48. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/images/SF.png +0 -0
  49. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/images/favicon.png +0 -0
  50. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/images/favicon_old.png +0 -0
  51. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/images/sqlframe_diagram.png +0 -0
  52. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/images/sqlframe_logo.png +0 -0
  53. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/index.md +0 -0
  54. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/postgres.md +0 -0
  55. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/requirements.txt +0 -0
  56. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/standalone.md +0 -0
  57. {sqlframe-1.2.0 → sqlframe-1.3.0}/docs/stylesheets/extra.css +0 -0
  58. {sqlframe-1.2.0 → sqlframe-1.3.0}/pytest.ini +0 -0
  59. {sqlframe-1.2.0 → sqlframe-1.3.0}/renovate.json +0 -0
  60. {sqlframe-1.2.0 → sqlframe-1.3.0}/setup.cfg +0 -0
  61. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/LICENSE +0 -0
  62. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/__init__.py +0 -0
  63. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/__init__.py +0 -0
  64. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/_typing.py +0 -0
  65. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/catalog.py +0 -0
  66. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/exceptions.py +0 -0
  67. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/function_alternatives.py +0 -0
  68. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/functions.py +0 -0
  69. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/group.py +0 -0
  70. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/mixins/__init__.py +0 -0
  71. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/mixins/dataframe_mixins.py +0 -0
  72. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/normalize.py +0 -0
  73. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/operations.py +0 -0
  74. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/transforms.py +0 -0
  75. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/types.py +0 -0
  76. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/base/window.py +0 -0
  77. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/__init__.py +0 -0
  78. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/catalog.py +0 -0
  79. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/column.py +0 -0
  80. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/dataframe.py +0 -0
  81. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/functions.py +0 -0
  82. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/functions.pyi +0 -0
  83. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/group.py +0 -0
  84. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/readwriter.py +0 -0
  85. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/session.py +0 -0
  86. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/types.py +0 -0
  87. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/bigquery/window.py +0 -0
  88. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/__init__.py +0 -0
  89. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/catalog.py +0 -0
  90. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/column.py +0 -0
  91. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/dataframe.py +0 -0
  92. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/functions.py +0 -0
  93. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/functions.pyi +0 -0
  94. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/group.py +0 -0
  95. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/readwriter.py +0 -0
  96. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/session.py +0 -0
  97. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/types.py +0 -0
  98. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/duckdb/window.py +0 -0
  99. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/__init__.py +0 -0
  100. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/catalog.py +0 -0
  101. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/column.py +0 -0
  102. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/dataframe.py +0 -0
  103. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/functions.py +0 -0
  104. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/functions.pyi +0 -0
  105. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/group.py +0 -0
  106. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/readwriter.py +0 -0
  107. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/session.py +0 -0
  108. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/types.py +0 -0
  109. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/postgres/window.py +0 -0
  110. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/__init__.py +0 -0
  111. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/catalog.py +0 -0
  112. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/column.py +0 -0
  113. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/dataframe.py +0 -0
  114. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/functions.py +0 -0
  115. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/group.py +0 -0
  116. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/readwriter.py +0 -0
  117. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/session.py +0 -0
  118. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/types.py +0 -0
  119. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/redshift/window.py +0 -0
  120. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/__init__.py +0 -0
  121. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/column.py +0 -0
  122. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/dataframe.py +0 -0
  123. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/functions.py +0 -0
  124. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/group.py +0 -0
  125. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/readwriter.py +0 -0
  126. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/types.py +0 -0
  127. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/snowflake/window.py +0 -0
  128. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/__init__.py +0 -0
  129. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/catalog.py +0 -0
  130. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/column.py +0 -0
  131. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/dataframe.py +0 -0
  132. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/functions.py +0 -0
  133. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/group.py +0 -0
  134. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/readwriter.py +0 -0
  135. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/types.py +0 -0
  136. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/spark/window.py +0 -0
  137. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/__init__.py +0 -0
  138. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/catalog.py +0 -0
  139. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/column.py +0 -0
  140. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/dataframe.py +0 -0
  141. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/functions.py +0 -0
  142. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/group.py +0 -0
  143. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/readwriter.py +0 -0
  144. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/session.py +0 -0
  145. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/types.py +0 -0
  146. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe/standalone/window.py +0 -0
  147. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe.egg-info/dependency_links.txt +0 -0
  148. {sqlframe-1.2.0 → sqlframe-1.3.0}/sqlframe.egg-info/top_level.txt +0 -0
  149. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/__init__.py +0 -0
  150. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/common_fixtures.py +0 -0
  151. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/conftest.py +0 -0
  152. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/fixtures/employee.csv +0 -0
  153. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/fixtures/employee.json +0 -0
  154. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/fixtures/employee.parquet +0 -0
  155. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/fixtures/employee_extra_line.csv +0 -0
  156. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/__init__.py +0 -0
  157. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/__init__.py +0 -0
  158. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/bigquery/__init__.py +0 -0
  159. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
  160. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
  161. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/duck/__init__.py +0 -0
  162. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
  163. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/duck/test_duckdb_dataframe.py +0 -0
  164. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/duck/test_duckdb_reader.py +0 -0
  165. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
  166. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/postgres/__init__.py +0 -0
  167. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
  168. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/postgres/test_postgres_dataframe.py +0 -0
  169. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
  170. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/redshift/__init__.py +0 -0
  171. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
  172. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
  173. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/snowflake/__init__.py +0 -0
  174. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
  175. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/spark/__init__.py +0 -0
  176. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
  177. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
  178. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/test_engine_reader.py +0 -0
  179. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/test_engine_session.py +0 -0
  180. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/engines/test_engine_writer.py +0 -0
  181. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/fixtures.py +0 -0
  182. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/test_int_dataframe.py +0 -0
  183. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/test_int_dataframe_stats.py +0 -0
  184. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/test_int_grouped_data.py +0 -0
  185. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/integration/test_int_session.py +0 -0
  186. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/types.py +0 -0
  187. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/__init__.py +0 -0
  188. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/__init__.py +0 -0
  189. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/fixtures.py +0 -0
  190. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_column.py +0 -0
  191. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_dataframe.py +0 -0
  192. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_dataframe_writer.py +0 -0
  193. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_functions.py +0 -0
  194. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
  195. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_types.py +0 -0
  196. {sqlframe-1.2.0 → sqlframe-1.3.0}/tests/unit/standalone/test_window.py +0 -0
@@ -1,5 +1,5 @@
1
1
  install-dev:
2
- pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
2
+ pip install -e ".[bigquery,dev,docs,duckdb,pandas,postgres,redshift,snowflake,spark]"
3
3
 
4
4
  install-pre-commit:
5
5
  pre-commit install
@@ -8,7 +8,7 @@ slow-test:
8
8
  pytest -n auto tests
9
9
 
10
10
  fast-test:
11
- pytest -n auto -m "fast"
11
+ pytest -n auto tests/unit
12
12
 
13
13
  local-test:
14
14
  pytest -n auto -m "fast or local"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -20,6 +20,8 @@ Provides-Extra: bigquery
20
20
  Provides-Extra: dev
21
21
  Provides-Extra: docs
22
22
  Provides-Extra: duckdb
23
+ Provides-Extra: openai
24
+ Provides-Extra: pandas
23
25
  Provides-Extra: postgres
24
26
  Provides-Extra: redshift
25
27
  Provides-Extra: snowflake
@@ -0,0 +1,229 @@
1
+ # General Configuration
2
+
3
+ ## Generated SQL
4
+
5
+ ### Pretty
6
+
7
+ If the SQL should be returned in a "pretty" format meaning it has newlines and indentation. Defaults to `True`.
8
+
9
+ ```python
10
+ from sqlframe.standalone import StandaloneSession
11
+
12
+ session = StandaloneSession()
13
+
14
+ df = session.createDataFrame([{'a': 1, 'b': 2}])
15
+ ```
16
+ ```python
17
+ >>> print(df.sql())
18
+ SELECT
19
+ CAST(`a1`.`a` AS BIGINT) AS `a`,
20
+ CAST(`a1`.`b` AS BIGINT) AS `b`
21
+ FROM VALUES
22
+ (1, 2) AS `a1`(`a`, `b`)
23
+ ```
24
+ ```python
25
+ >>> print(df.sql(pretty=False))
26
+ SELECT CAST(`a3`.`a` AS BIGINT) AS `a`, CAST(`a3`.`b` AS BIGINT) AS `b` FROM VALUES (1, 2) AS `a3`(`a`, `b`)
27
+ ```
28
+
29
+ ### Optimized
30
+
31
+ Optimized SQL is SQL that has been processed by SQLGlot's optimizer. For complex queries this will significantly reduce the number of CTEs produced and remove extra unused columns. Defaults to `True`.
32
+
33
+ ```python
34
+ from sqlframe.bigquery import BigQuerySession
35
+ from sqlframe.bigquery import functions as F
36
+ from sqlframe.bigquery import Window
37
+
38
+ session = BigQuerySession()
39
+ table_path = "bigquery-public-data.samples.natality"
40
+ # Top 5 years with the greatest year-over-year % change in new families with single child
41
+ df = (
42
+ session.table(table_path)
43
+ .where(F.col("ever_born") == 1)
44
+ .groupBy("year")
45
+ .agg(F.count("*").alias("num_single_child_families"))
46
+ .withColumn(
47
+ "last_year_num_single_child_families",
48
+ F.lag(F.col("num_single_child_families"), 1).over(Window.orderBy("year"))
49
+ )
50
+ .withColumn(
51
+ "percent_change",
52
+ (F.col("num_single_child_families") - F.col("last_year_num_single_child_families"))
53
+ / F.col("last_year_num_single_child_families")
54
+ )
55
+ .orderBy(F.abs(F.col("percent_change")).desc())
56
+ .select(
57
+ F.col("year").alias("year"),
58
+ F.format_number("num_single_child_families", 0).alias("new families single child"),
59
+ F.format_number(F.col("percent_change") * 100, 2).alias("percent change"),
60
+ )
61
+ .limit(5)
62
+ )
63
+ ```
64
+ ```python
65
+ >>> print(df.sql(optimize=True))
66
+ WITH `t94228042` AS (
67
+ SELECT
68
+ `natality`.`year` AS `year`,
69
+ COUNT(*) AS `num_single_child_families`
70
+ FROM `bigquery-public-data`.`samples`.`natality` AS `natality`
71
+ WHERE
72
+ `natality`.`ever_born` = 1
73
+ GROUP BY
74
+ `natality`.`year`
75
+ ), `t30206548` AS (
76
+ SELECT
77
+ `t94228042`.`year` AS `year`,
78
+ `t94228042`.`num_single_child_families` AS `num_single_child_families`,
79
+ LAG(`t94228042`.`num_single_child_families`, 1) OVER (ORDER BY `t94228042`.`year`) AS `last_year_num_single_child_families`
80
+ FROM `t94228042` AS `t94228042`
81
+ )
82
+ SELECT
83
+ `t30206548`.`year` AS `year`,
84
+ FORMAT('%\'.0f', ROUND(CAST(`t30206548`.`num_single_child_families` AS FLOAT64), 0)) AS `new families single child`,
85
+ FORMAT(
86
+ '%\'.2f',
87
+ ROUND(
88
+ CAST((
89
+ (
90
+ (
91
+ `t30206548`.`num_single_child_families` - `t30206548`.`last_year_num_single_child_families`
92
+ ) / `t30206548`.`last_year_num_single_child_families`
93
+ ) * 100
94
+ ) AS FLOAT64),
95
+ 2
96
+ )
97
+ ) AS `percent change`
98
+ FROM `t30206548` AS `t30206548`
99
+ ORDER BY
100
+ ABS(`percent_change`) DESC
101
+ LIMIT 5
102
+ ```
103
+ ```python
104
+ >>> print(df.sql(optimize=False))
105
+ WITH t14183493 AS (
106
+ SELECT
107
+ `source_year`,
108
+ `year`,
109
+ `month`,
110
+ `day`,
111
+ `wday`,
112
+ `state`,
113
+ `is_male`,
114
+ `child_race`,
115
+ `weight_pounds`,
116
+ `plurality`,
117
+ `apgar_1min`,
118
+ `apgar_5min`,
119
+ `mother_residence_state`,
120
+ `mother_race`,
121
+ `mother_age`,
122
+ `gestation_weeks`,
123
+ `lmp`,
124
+ `mother_married`,
125
+ `mother_birth_state`,
126
+ `cigarette_use`,
127
+ `cigarettes_per_day`,
128
+ `alcohol_use`,
129
+ `drinks_per_week`,
130
+ `weight_gain_pounds`,
131
+ `born_alive_alive`,
132
+ `born_alive_dead`,
133
+ `born_dead`,
134
+ `ever_born`,
135
+ `father_race`,
136
+ `father_age`,
137
+ `record_weight`
138
+ FROM bigquery-public-data.samples.natality
139
+ ), t17633417 AS (
140
+ SELECT
141
+ year,
142
+ COUNT(*) AS num_single_child_families
143
+ FROM t14183493
144
+ WHERE
145
+ ever_born = 1
146
+ GROUP BY
147
+ year
148
+ ), t32066970 AS (
149
+ SELECT
150
+ year,
151
+ num_single_child_families,
152
+ LAG(num_single_child_families, 1) OVER (ORDER BY year) AS last_year_num_single_child_families
153
+ FROM t17633417
154
+ ), t21362690 AS (
155
+ SELECT
156
+ year,
157
+ num_single_child_families,
158
+ last_year_num_single_child_families,
159
+ (
160
+ (
161
+ num_single_child_families - last_year_num_single_child_families
162
+ ) / last_year_num_single_child_families
163
+ ) AS percent_change
164
+ FROM t32066970
165
+ ORDER BY
166
+ ABS(percent_change) DESC
167
+ )
168
+ SELECT
169
+ year AS year,
170
+ FORMAT('%\'.0f', ROUND(CAST(num_single_child_families AS FLOAT64), 0)) AS `new families single child`,
171
+ FORMAT('%\'.2f', ROUND(CAST((
172
+ percent_change * 100
173
+ ) AS FLOAT64), 2)) AS `percent change`
174
+ FROM t21362690
175
+ LIMIT 5
176
+ ```
177
+
178
+ ### Override Dialect
179
+
180
+ The dialect of the generated SQL will be based on the session's dialect. However, you can override the dialect by passing a string to the `dialect` parameter. This is useful when you want to generate SQL for a different database.
181
+
182
+ ```python
183
+ # create session and `df` like normal
184
+ df.sql(dialect="bigquery")
185
+ ```
186
+
187
+ ### OpenAI Enriched
188
+
189
+ OpenAI's models can be used to enrich the generated SQL to make it more human-like.
190
+ This is useful when you want to generate SQL that is more readable for humans.
191
+ You must have `OPENAI_API_KEY` set in your environment variables to use this feature.
192
+
193
+ ```python
194
+ # create session and `df` like normal
195
+ # The model to use defaults to `gpt-4o` but can be changed by passing a string to the `openai_model` parameter.
196
+ >>> df.sql(optimize=False, use_openai=True)
197
+ WITH natality_data AS (
198
+ SELECT
199
+ year,
200
+ ever_born
201
+ FROM `bigquery-public-data`.`samples`.`natality`
202
+ ), single_child_families AS (
203
+ SELECT
204
+ year,
205
+ COUNT(*) AS num_single_child_families
206
+ FROM natality_data
207
+ WHERE ever_born = 1
208
+ GROUP BY year
209
+ ), lagged_families AS (
210
+ SELECT
211
+ year,
212
+ num_single_child_families,
213
+ LAG(num_single_child_families, 1) OVER (ORDER BY year) AS last_year_num_single_child_families
214
+ FROM single_child_families
215
+ ), percent_change_families AS (
216
+ SELECT
217
+ year,
218
+ num_single_child_families,
219
+ ((num_single_child_families - last_year_num_single_child_families) / last_year_num_single_child_families) AS percent_change
220
+ FROM lagged_families
221
+ ORDER BY ABS(percent_change) DESC
222
+ )
223
+ SELECT
224
+ year,
225
+ FORMAT('%\'.0f', ROUND(CAST(num_single_child_families AS FLOAT64), 0)) AS `new families single child`,
226
+ FORMAT('%\'.2f', ROUND(CAST((percent_change * 100) AS FLOAT64), 2)) AS `percent change`
227
+ FROM percent_change_families
228
+ LIMIT 5
229
+ ```
@@ -7,6 +7,7 @@ nav:
7
7
  - "DuckDB": duckdb.md
8
8
  - "Postgres": postgres.md
9
9
  - "Standalone": standalone.md
10
+ - "Configuration": configuration.md
10
11
  theme:
11
12
  name: material
12
13
  logo: images/SF.png
@@ -26,11 +26,11 @@ setup(
26
26
  "bigquery": [
27
27
  "google-cloud-bigquery[pandas]>=3,<4",
28
28
  "google-cloud-bigquery-storage>=2,<3",
29
- "pandas>=2,<3",
30
29
  ],
31
30
  "dev": [
32
31
  "duckdb>=0.9,<0.11",
33
32
  "mypy>=1.10.0,<1.11",
33
+ "openai>=1.30,<1.31",
34
34
  "pandas>=2,<3",
35
35
  "pandas-stubs>=2,<3",
36
36
  "psycopg>=3.1,<4",
@@ -56,17 +56,20 @@ setup(
56
56
  "duckdb>=0.9,<0.11",
57
57
  "pandas>=2,<3",
58
58
  ],
59
- "postgres": [
59
+ "openai": [
60
+ "openai>=1.30,<1.31",
61
+ ],
62
+ "pandas": [
60
63
  "pandas>=2,<3",
64
+ ],
65
+ "postgres": [
61
66
  "psycopg2>=2.8,<3",
62
67
  ],
63
68
  "redshift": [
64
- "pandas>=2,<3",
65
69
  "redshift_connector>=2.1.1,<2.2.0",
66
70
  ],
67
71
  "snowflake": [
68
- "pandas>=2,<3",
69
- "snowflake-connector-python[pandas,secure-local-storage]>=3.10.0,<3.11",
72
+ "snowflake-connector-python[secure-local-storage]>=3.10.0,<3.11",
70
73
  ],
71
74
  "spark": [
72
75
  "pyspark>=2,<3.6",
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.2.0'
16
- __version_tuple__ = version_tuple = (1, 2, 0)
15
+ __version__ = version = '1.3.0'
16
+ __version_tuple__ = version_tuple = (1, 3, 0)
@@ -9,9 +9,11 @@ import typing as t
9
9
  import sqlglot
10
10
  from sqlglot import expressions as exp
11
11
  from sqlglot.helper import flatten, is_iterable
12
+ from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
12
13
 
14
+ from sqlframe.base.decorators import normalize
13
15
  from sqlframe.base.types import DataType
14
- from sqlframe.base.util import get_func_from_session
16
+ from sqlframe.base.util import get_func_from_session, quote_preserving_alias_or_name
15
17
 
16
18
  if t.TYPE_CHECKING:
17
19
  from sqlframe.base._typing import ColumnOrLiteral, ColumnOrName
@@ -237,7 +239,7 @@ class Column:
237
239
 
238
240
  @property
239
241
  def alias_or_name(self) -> str:
240
- return self.expression.alias_or_name
242
+ return quote_preserving_alias_or_name(self.expression) # type: ignore
241
243
 
242
244
  @classmethod
243
245
  def ensure_literal(cls, value) -> Column:
@@ -266,7 +268,9 @@ class Column:
266
268
  from sqlframe.base.session import _BaseSession
267
269
 
268
270
  dialect = _BaseSession().input_dialect
269
- alias: exp.Expression = exp.parse_identifier(name, dialect=dialect)
271
+ alias: exp.Expression = normalize_identifiers(
272
+ exp.parse_identifier(name, dialect=dialect), dialect=dialect
273
+ )
270
274
  new_expression = exp.Alias(
271
275
  this=self.column_expression,
272
276
  alias=alias.this if isinstance(alias, exp.Column) else alias,
@@ -15,13 +15,18 @@ from prettytable import PrettyTable
15
15
  from sqlglot import Dialect
16
16
  from sqlglot import expressions as exp
17
17
  from sqlglot.helper import ensure_list, object_to_dict, seq_get
18
+ from sqlglot.optimizer.pushdown_projections import pushdown_projections
19
+ from sqlglot.optimizer.qualify import qualify
18
20
  from sqlglot.optimizer.qualify_columns import quote_identifiers
19
21
 
22
+ from sqlframe.base.decorators import normalize
20
23
  from sqlframe.base.operations import Operation, operation
21
24
  from sqlframe.base.transforms import replace_id_value
22
25
  from sqlframe.base.util import (
23
26
  get_func_from_session,
24
27
  get_tables_from_expression_with_join,
28
+ quote_preserving_alias_or_name,
29
+ verify_openai_installed,
25
30
  )
26
31
 
27
32
  if sys.version_info >= (3, 11):
@@ -410,7 +415,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
410
415
 
411
416
  outer_select = item.find(exp.Select)
412
417
  if outer_select:
413
- return [col(x.alias_or_name) for x in outer_select.expressions]
418
+ return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
414
419
  return []
415
420
 
416
421
  def _create_hash_from_expression(self, expression: exp.Expression) -> str:
@@ -471,6 +476,8 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
471
476
  dialect: DialectType = None,
472
477
  optimize: bool = True,
473
478
  pretty: bool = True,
479
+ use_openai: bool = False,
480
+ openai_model: str = "gpt-4o",
474
481
  as_list: bool = False,
475
482
  **kwargs,
476
483
  ) -> t.Union[str, t.List[str]]:
@@ -490,6 +497,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
490
497
  select_expression = t.cast(
491
498
  exp.Select, self.session._optimize(select_expression, dialect=dialect)
492
499
  )
500
+ elif use_openai:
501
+ qualify(select_expression, dialect=dialect, schema=self.session.catalog._schema)
502
+ pushdown_projections(select_expression, schema=self.session.catalog._schema)
493
503
 
494
504
  select_expression = df._replace_cte_names_with_hashes(select_expression)
495
505
 
@@ -505,7 +515,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
505
515
  self.session.catalog.add_table(
506
516
  cache_table_name,
507
517
  {
508
- expression.alias_or_name: expression.type.sql(dialect=dialect)
518
+ quote_preserving_alias_or_name(expression): expression.type.sql(
519
+ dialect=dialect
520
+ )
509
521
  if expression.type
510
522
  else "UNKNOWN"
511
523
  for expression in select_expression.expressions
@@ -541,10 +553,40 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
541
553
 
542
554
  output_expressions.append(expression)
543
555
 
544
- results = [
545
- expression.sql(dialect=dialect, pretty=pretty, **kwargs)
546
- for expression in output_expressions
547
- ]
556
+ results = []
557
+ for expression in output_expressions:
558
+ sql = expression.sql(dialect=dialect, pretty=pretty, **kwargs)
559
+ if use_openai:
560
+ verify_openai_installed()
561
+ from openai import OpenAI
562
+
563
+ client = OpenAI()
564
+ prompt = f"""
565
+ You are a backend tool that converts correct {dialect} SQL to simplified and more human readable version.
566
+ You respond without code block with rewritten {dialect} SQL.
567
+ You don't change any column names in the final select because the user expects those to remain the same.
568
+ You make unique CTE alias names match what a human would write and in snake case.
569
+ You improve formatting with spacing and line-breaks.
570
+ You remove redundant parenthesis and aliases.
571
+ When remove extra quotes, make sure to keep quotes around words that could be reserved words
572
+ """
573
+ chat_completed = client.chat.completions.create(
574
+ messages=[
575
+ {
576
+ "role": "system",
577
+ "content": prompt,
578
+ },
579
+ {
580
+ "role": "user",
581
+ "content": sql,
582
+ },
583
+ ],
584
+ model=openai_model,
585
+ )
586
+ assert chat_completed.choices[0].message.content is not None
587
+ sql = chat_completed.choices[0].message.content
588
+ results.append(sql)
589
+
548
590
  if as_list:
549
591
  return results
550
592
  return ";\n".join(results)
@@ -688,7 +730,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
688
730
  join_expression = self._add_ctes_to_expression(join_expression, other_df.expression.ctes)
689
731
  self_columns = self._get_outer_select_columns(join_expression)
690
732
  other_columns = self._get_outer_select_columns(other_df.expression)
691
- join_columns = self._ensure_list_of_columns(on)
733
+ join_columns = self._ensure_and_normalize_cols(on)
692
734
  # Determines the join clause and select columns to be used passed on what type of columns were provided for
693
735
  # the join. The columns returned changes based on how the on expression is provided.
694
736
  if how != "cross":
@@ -1324,6 +1366,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1324
1366
  assert sqls[-1] is not None
1325
1367
  return self.session._fetchdf(sqls[-1])
1326
1368
 
1369
+ @normalize("name")
1327
1370
  def createOrReplaceTempView(self, name: str) -> None:
1328
1371
  self.session.temp_views[name] = self.copy()._convert_leaf_to_cte()
1329
1372
 
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import typing as t
5
+
6
+ from sqlglot import parse_one
7
+ from sqlglot.helper import ensure_list
8
+ from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
9
+
10
+ if t.TYPE_CHECKING:
11
+ from sqlframe.base.catalog import _BaseCatalog
12
+
13
+ CALLING_CLASS = t.TypeVar("CALLING_CLASS")
14
+
15
+
16
+ def normalize(normalize_kwargs: t.Union[str, t.List[str]]) -> t.Callable[[t.Callable], t.Callable]:
17
+ """
18
+ Decorator used to normalize identifiers in the kwargs of a method.
19
+ """
20
+
21
+ def decorator(func: t.Callable) -> t.Callable:
22
+ @functools.wraps(func)
23
+ def wrapper(self: CALLING_CLASS, *args, **kwargs) -> CALLING_CLASS:
24
+ from sqlframe.base.session import _BaseSession
25
+
26
+ input_dialect = _BaseSession().input_dialect
27
+ kwargs.update(dict(zip(func.__code__.co_varnames[1:], args)))
28
+ for kwarg in ensure_list(normalize_kwargs):
29
+ if kwarg in kwargs:
30
+ value = kwargs.get(kwarg)
31
+ if value:
32
+ expression = (
33
+ parse_one(value, dialect=input_dialect)
34
+ if isinstance(value, str)
35
+ else value
36
+ )
37
+ kwargs[kwarg] = normalize_identifiers(expression, input_dialect).sql(
38
+ dialect=input_dialect
39
+ )
40
+ return func(self, **kwargs)
41
+
42
+ wrapper.__wrapped__ = func # type: ignore
43
+ return wrapper
44
+
45
+ return decorator
46
+
47
+
48
+ def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None) -> t.Callable:
49
+ def _metadata(func: t.Callable) -> t.Callable:
50
+ func.unsupported_engines = ensure_list(unsupported_engines) if unsupported_engines else [] # type: ignore
51
+ return func
52
+
53
+ return _metadata
@@ -13,7 +13,7 @@ from sqlframe.base.catalog import (
13
13
  _BaseCatalog,
14
14
  )
15
15
  from sqlframe.base.decorators import normalize
16
- from sqlframe.base.util import decoded_str, schema_, to_schema
16
+ from sqlframe.base.util import schema_, to_schema
17
17
 
18
18
 
19
19
  class _BaseInfoSchemaMixin(_BaseCatalog, t.Generic[SESSION, DF]):
@@ -3,8 +3,6 @@ from __future__ import annotations
3
3
  import pathlib
4
4
  import typing as t
5
5
 
6
- import pandas as pd
7
-
8
6
  from sqlframe.base.exceptions import UnsupportedOperationError
9
7
  from sqlframe.base.readerwriter import (
10
8
  DF,
@@ -13,7 +11,7 @@ from sqlframe.base.readerwriter import (
13
11
  _BaseDataFrameWriter,
14
12
  _infer_format,
15
13
  )
16
- from sqlframe.base.util import pandas_to_spark_schema
14
+ from sqlframe.base.util import pandas_to_spark_schema, verify_pandas_installed
17
15
 
18
16
  if t.TYPE_CHECKING:
19
17
  from sqlframe.base._typing import OptionalPrimitiveType, PathOrPaths
@@ -72,6 +70,9 @@ class PandasLoaderMixin(_BaseDataFrameReader, t.Generic[SESSION, DF]):
72
70
  |100|NULL|
73
71
  +---+----+
74
72
  """
73
+ verify_pandas_installed()
74
+ import pandas as pd
75
+
75
76
  assert path is not None, "path is required"
76
77
  assert isinstance(path, str), "path must be a string"
77
78
  format = format or _infer_format(path)
@@ -11,6 +11,8 @@ from functools import reduce
11
11
  from sqlglot import exp
12
12
  from sqlglot.helper import object_to_dict
13
13
 
14
+ from sqlframe.base.decorators import normalize
15
+
14
16
  if sys.version_info >= (3, 11):
15
17
  from typing import Self
16
18
  else:
@@ -39,6 +41,7 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
39
41
  def session(self) -> SESSION:
40
42
  return self._session
41
43
 
44
+ @normalize("tableName")
42
45
  def table(self, tableName: str) -> DF:
43
46
  if df := self.session.temp_views.get(tableName):
44
47
  return df
@@ -24,7 +24,10 @@ from sqlglot.schema import MappingSchema
24
24
  from sqlframe.base.catalog import _BaseCatalog
25
25
  from sqlframe.base.dataframe import _BaseDataFrame
26
26
  from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
27
- from sqlframe.base.util import get_column_mapping_from_schema_input
27
+ from sqlframe.base.util import (
28
+ get_column_mapping_from_schema_input,
29
+ verify_pandas_installed,
30
+ )
28
31
 
29
32
  if sys.version_info >= (3, 11):
30
33
  from typing import Self
@@ -412,6 +415,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
412
415
  self, expression: exp.Expression, dialect: t.Optional[Dialect] = None
413
416
  ) -> exp.Expression:
414
417
  dialect = dialect or self.output_dialect
418
+ normalize_identifiers(expression, dialect=self.input_dialect)
415
419
  quote_identifiers_func(expression, dialect=dialect)
416
420
  return optimize(expression, dialect=dialect, schema=self.catalog._schema)
417
421
 
@@ -446,14 +450,6 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
446
450
  def _fetch_rows(
447
451
  self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
448
452
  ) -> t.List[Row]:
449
- from sqlframe.base.types import Row
450
-
451
- def _dict_to_row(row: t.Dict[str, t.Any]) -> Row:
452
- for key, value in row.items():
453
- if isinstance(value, dict):
454
- row[key] = _dict_to_row(value)
455
- return Row(**row)
456
-
457
453
  self._execute(sql, quote_identifiers=quote_identifiers)
458
454
  result = self._cur.fetchall()
459
455
  if not self._cur.description:
@@ -464,6 +460,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
464
460
  def _fetchdf(
465
461
  self, sql: t.Union[str, exp.Expression], *, quote_identifiers: bool = True
466
462
  ) -> pd.DataFrame:
463
+ verify_pandas_installed()
467
464
  from pandas.io.sql import read_sql_query
468
465
 
469
466
  return read_sql_query(self._to_sql(sql, quote_identifiers=quote_identifiers), self._conn)
@@ -154,7 +154,12 @@ def pandas_to_spark_schema(pandas_df: PandasDataFrame) -> types.StructType:
154
154
  """
155
155
  from sqlframe.base import types
156
156
 
157
- columns = list([x.replace("?column?", "unknown_column") for x in pandas_df.columns])
157
+ columns = list(
158
+ [
159
+ x.replace("?column?", f"unknown_column_{i}").replace("NULL", f"unknown_column_{i}")
160
+ for i, x in enumerate(pandas_df.columns)
161
+ ]
162
+ )
158
163
  d_types = list(pandas_df.dtypes)
159
164
  p_schema = types.StructType(
160
165
  [
@@ -240,3 +245,35 @@ def soundex(s):
240
245
 
241
246
  result += "0" * (4 - count)
242
247
  return "".join(result)
248
+
249
+
250
+ def verify_pandas_installed():
251
+ try:
252
+ import pandas # noqa
253
+ except ImportError:
254
+ raise ImportError(
255
+ """Pandas is required for this functionality. `pip install "sqlframe[pandas]"` (also include your engine if needed) to install pandas."""
256
+ )
257
+
258
+
259
+ def verify_openai_installed():
260
+ try:
261
+ import openai # noqa
262
+ except ImportError:
263
+ raise ImportError(
264
+ """OpenAI is required for this functionality. `pip install "sqlframe[openai]"` (also include your engine if needed) to install openai."""
265
+ )
266
+
267
+
268
+ def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
269
+ from sqlframe.base.session import _BaseSession
270
+
271
+ if isinstance(col, exp.Alias):
272
+ col = col.args["alias"]
273
+ if isinstance(col, exp.Column):
274
+ col = col.copy()
275
+ col.set("table", None)
276
+ if isinstance(col, (exp.Identifier, exp.Column)):
277
+ return col.sql(dialect=_BaseSession().input_dialect)
278
+ # We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
279
+ return col.alias_or_name
@@ -127,7 +127,9 @@ class SnowflakeCatalog(
127
127
  sql = f"SHOW COLUMNS IN TABLE {table.sql(dialect=self.session.input_dialect)}"
128
128
  results = self.session._fetch_rows(sql)
129
129
  return {
130
- row["column_name"]: exp.DataType.build(
130
+ exp.column(row["column_name"], quoted=True).sql(
131
+ dialect=self.session.input_dialect
132
+ ): exp.DataType.build(
131
133
  json.loads(row["data_type"])["type"], dialect=self.session.input_dialect, udt=True
132
134
  )
133
135
  for row in results