sqlframe 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. {sqlframe-1.0.0 → sqlframe-1.1.0}/PKG-INFO +1 -1
  2. {sqlframe-1.0.0 → sqlframe-1.1.0}/setup.py +1 -1
  3. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/_version.py +2 -2
  4. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/catalog.py +2 -1
  5. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/dataframe.py +7 -4
  6. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/session.py +4 -12
  7. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/util.py +2 -0
  8. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/readwriter.py +17 -5
  9. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe.egg-info/PKG-INFO +1 -1
  10. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe.egg-info/SOURCES.txt +2 -0
  11. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe.egg-info/requires.txt +1 -1
  12. sqlframe-1.1.0/tests/fixtures/employee_extra_line.csv +7 -0
  13. sqlframe-1.1.0/tests/integration/engines/duck/test_duckdb_reader.py +57 -0
  14. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/test_int_dataframe.py +11 -0
  15. sqlframe-1.1.0/tests/unit/standalone/test_dataframe.py +57 -0
  16. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/test_dataframe_writer.py +6 -6
  17. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/test_session.py +1 -1
  18. sqlframe-1.0.0/tests/unit/standalone/test_dataframe.py +0 -46
  19. {sqlframe-1.0.0 → sqlframe-1.1.0}/.github/CODEOWNERS +0 -0
  20. {sqlframe-1.0.0 → sqlframe-1.1.0}/.github/workflows/main.workflow.yaml +0 -0
  21. {sqlframe-1.0.0 → sqlframe-1.1.0}/.github/workflows/publish.workflow.yaml +0 -0
  22. {sqlframe-1.0.0 → sqlframe-1.1.0}/.gitignore +0 -0
  23. {sqlframe-1.0.0 → sqlframe-1.1.0}/.pre-commit-config.yaml +0 -0
  24. {sqlframe-1.0.0 → sqlframe-1.1.0}/.readthedocs.yaml +0 -0
  25. {sqlframe-1.0.0 → sqlframe-1.1.0}/LICENSE +0 -0
  26. {sqlframe-1.0.0 → sqlframe-1.1.0}/Makefile +0 -0
  27. {sqlframe-1.0.0 → sqlframe-1.1.0}/README.md +0 -0
  28. {sqlframe-1.0.0 → sqlframe-1.1.0}/blogs/images/but_wait_theres_more.gif +0 -0
  29. {sqlframe-1.0.0 → sqlframe-1.1.0}/blogs/images/cake.gif +0 -0
  30. {sqlframe-1.0.0 → sqlframe-1.1.0}/blogs/images/you_get_pyspark_api.gif +0 -0
  31. {sqlframe-1.0.0 → sqlframe-1.1.0}/blogs/sqlframe_universal_dataframe_api.md +0 -0
  32. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/bigquery.md +0 -0
  33. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/bigquery.md +0 -0
  34. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/duckdb.md +0 -0
  35. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/images/SF.png +0 -0
  36. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/images/favicon.png +0 -0
  37. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/images/favicon_old.png +0 -0
  38. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/images/sqlframe_diagram.png +0 -0
  39. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/images/sqlframe_logo.png +0 -0
  40. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/docs/postgres.md +0 -0
  41. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/duckdb.md +0 -0
  42. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/images/SF.png +0 -0
  43. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/images/favicon.png +0 -0
  44. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/images/favicon_old.png +0 -0
  45. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/images/sqlframe_diagram.png +0 -0
  46. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/images/sqlframe_logo.png +0 -0
  47. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/index.md +0 -0
  48. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/postgres.md +0 -0
  49. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/requirements.txt +0 -0
  50. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/standalone.md +0 -0
  51. {sqlframe-1.0.0 → sqlframe-1.1.0}/docs/stylesheets/extra.css +0 -0
  52. {sqlframe-1.0.0 → sqlframe-1.1.0}/mkdocs.yml +0 -0
  53. {sqlframe-1.0.0 → sqlframe-1.1.0}/pytest.ini +0 -0
  54. {sqlframe-1.0.0 → sqlframe-1.1.0}/renovate.json +0 -0
  55. {sqlframe-1.0.0 → sqlframe-1.1.0}/setup.cfg +0 -0
  56. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/LICENSE +0 -0
  57. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/__init__.py +0 -0
  58. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/__init__.py +0 -0
  59. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/_typing.py +0 -0
  60. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/column.py +0 -0
  61. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/decorators.py +0 -0
  62. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/exceptions.py +0 -0
  63. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/function_alternatives.py +0 -0
  64. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/functions.py +0 -0
  65. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/group.py +0 -0
  66. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/mixins/__init__.py +0 -0
  67. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/mixins/catalog_mixins.py +0 -0
  68. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
  69. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/normalize.py +0 -0
  70. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/operations.py +0 -0
  71. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/readerwriter.py +0 -0
  72. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/transforms.py +0 -0
  73. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/types.py +0 -0
  74. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/base/window.py +0 -0
  75. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/__init__.py +0 -0
  76. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/catalog.py +0 -0
  77. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/column.py +0 -0
  78. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/dataframe.py +0 -0
  79. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/functions.py +0 -0
  80. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/functions.pyi +0 -0
  81. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/group.py +0 -0
  82. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/readwriter.py +0 -0
  83. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/session.py +0 -0
  84. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/types.py +0 -0
  85. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/bigquery/window.py +0 -0
  86. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/__init__.py +0 -0
  87. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/catalog.py +0 -0
  88. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/column.py +0 -0
  89. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/dataframe.py +0 -0
  90. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/functions.py +0 -0
  91. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/functions.pyi +0 -0
  92. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/group.py +0 -0
  93. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/session.py +0 -0
  94. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/types.py +0 -0
  95. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/duckdb/window.py +0 -0
  96. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/__init__.py +0 -0
  97. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/catalog.py +0 -0
  98. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/column.py +0 -0
  99. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/dataframe.py +0 -0
  100. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/functions.py +0 -0
  101. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/functions.pyi +0 -0
  102. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/group.py +0 -0
  103. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/readwriter.py +0 -0
  104. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/session.py +0 -0
  105. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/types.py +0 -0
  106. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/postgres/window.py +0 -0
  107. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/__init__.py +0 -0
  108. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/catalog.py +0 -0
  109. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/column.py +0 -0
  110. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/dataframe.py +0 -0
  111. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/functions.py +0 -0
  112. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/group.py +0 -0
  113. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/readwriter.py +0 -0
  114. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/session.py +0 -0
  115. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/types.py +0 -0
  116. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/redshift/window.py +0 -0
  117. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/__init__.py +0 -0
  118. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/catalog.py +0 -0
  119. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/column.py +0 -0
  120. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/dataframe.py +0 -0
  121. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/functions.py +0 -0
  122. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/group.py +0 -0
  123. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/readwriter.py +0 -0
  124. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/session.py +0 -0
  125. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/types.py +0 -0
  126. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/snowflake/window.py +0 -0
  127. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/__init__.py +0 -0
  128. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/catalog.py +0 -0
  129. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/column.py +0 -0
  130. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/dataframe.py +0 -0
  131. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/functions.py +0 -0
  132. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/group.py +0 -0
  133. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/readwriter.py +0 -0
  134. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/session.py +0 -0
  135. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/types.py +0 -0
  136. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/spark/window.py +0 -0
  137. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/__init__.py +0 -0
  138. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/catalog.py +0 -0
  139. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/column.py +0 -0
  140. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/dataframe.py +0 -0
  141. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/functions.py +0 -0
  142. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/group.py +0 -0
  143. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/readwriter.py +0 -0
  144. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/session.py +0 -0
  145. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/types.py +0 -0
  146. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe/standalone/window.py +0 -0
  147. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe.egg-info/dependency_links.txt +0 -0
  148. {sqlframe-1.0.0 → sqlframe-1.1.0}/sqlframe.egg-info/top_level.txt +0 -0
  149. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/__init__.py +0 -0
  150. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/common_fixtures.py +0 -0
  151. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/conftest.py +0 -0
  152. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/fixtures/employee.csv +0 -0
  153. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/fixtures/employee.json +0 -0
  154. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/fixtures/employee.parquet +0 -0
  155. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/__init__.py +0 -0
  156. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/__init__.py +0 -0
  157. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/bigquery/__init__.py +0 -0
  158. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
  159. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
  160. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/duck/__init__.py +0 -0
  161. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
  162. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
  163. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/postgres/__init__.py +0 -0
  164. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
  165. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
  166. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/redshift/__init__.py +0 -0
  167. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
  168. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
  169. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/snowflake/__init__.py +0 -0
  170. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
  171. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
  172. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/spark/__init__.py +0 -0
  173. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
  174. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/test_engine_dataframe.py +0 -0
  175. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/test_engine_reader.py +0 -0
  176. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/test_engine_session.py +0 -0
  177. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/test_engine_writer.py +0 -0
  178. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/engines/test_int_functions.py +0 -0
  179. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/fixtures.py +0 -0
  180. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/test_int_dataframe_stats.py +0 -0
  181. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/test_int_grouped_data.py +0 -0
  182. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/integration/test_int_session.py +0 -0
  183. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/types.py +0 -0
  184. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/__init__.py +0 -0
  185. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/__init__.py +0 -0
  186. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/fixtures.py +0 -0
  187. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/test_column.py +0 -0
  188. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/test_functions.py +0 -0
  189. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
  190. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/test_types.py +0 -0
  191. {sqlframe-1.0.0 → sqlframe-1.1.0}/tests/unit/standalone/test_window.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -20,7 +20,7 @@ setup(
20
20
  python_requires=">=3.8",
21
21
  install_requires=[
22
22
  "prettytable<3.11.0",
23
- "sqlglot>=23.14.0,<23.18",
23
+ "sqlglot>=24.0.0,<24.1",
24
24
  ],
25
25
  extras_require={
26
26
  "bigquery": [
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.0.0'
16
- __version_tuple__ = version_tuple = (1, 0, 0)
15
+ __version__ = version = '1.1.0'
16
+ __version_tuple__ = version_tuple = (1, 1, 0)
@@ -8,7 +8,7 @@ from sqlglot import MappingSchema, exp
8
8
 
9
9
  from sqlframe.base.decorators import normalize
10
10
  from sqlframe.base.exceptions import TableSchemaError
11
- from sqlframe.base.util import to_schema
11
+ from sqlframe.base.util import ensure_column_mapping, to_schema
12
12
 
13
13
  if t.TYPE_CHECKING:
14
14
  from sqlglot.schema import ColumnMapping
@@ -82,6 +82,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
82
82
  raise TableSchemaError(
83
83
  "This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
84
84
  )
85
+ column_mapping = ensure_column_mapping(column_mapping) # type: ignore
85
86
  self._schema.add_table(table, column_mapping, dialect=self.session.input_dialect)
86
87
 
87
88
  @normalize(["dbName"])
@@ -417,7 +417,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
417
417
  from sqlframe.base.session import _BaseSession
418
418
 
419
419
  value = expression.sql(dialect=_BaseSession().input_dialect).encode("utf-8")
420
- hash = f"t{zlib.crc32(value)}"[:6]
420
+ hash = f"t{zlib.crc32(value)}"[:9]
421
421
  return self.session._normalize_string(hash)
422
422
 
423
423
  def _get_select_expressions(
@@ -606,8 +606,11 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
606
606
  return df._convert_leaf_to_cte(sequence_id=new_sequence_id)
607
607
 
608
608
  @operation(Operation.WHERE)
609
- def where(self, column: t.Union[Column, bool], **kwargs) -> Self:
610
- col = self._ensure_and_normalize_col(column)
609
+ def where(self, column: t.Union[Column, str, bool], **kwargs) -> Self:
610
+ if isinstance(column, str):
611
+ col = sqlglot.parse_one(column, dialect=self.session.input_dialect)
612
+ else:
613
+ col = self._ensure_and_normalize_col(column)
611
614
  return self.copy(expression=self.expression.where(col.expression))
612
615
 
613
616
  filter = where
@@ -1094,7 +1097,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1094
1097
  )
1095
1098
  if existing_col_index:
1096
1099
  expression = self.expression.copy()
1097
- expression.expressions[existing_col_index] = col.expression
1100
+ expression.expressions[existing_col_index] = col.alias(colName).expression
1098
1101
  return self.copy(expression=expression)
1099
1102
  return self.copy().select(col.alias(colName), append=True)
1100
1103
 
@@ -313,24 +313,16 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
313
313
  sel_expression = exp.Select(**select_kwargs)
314
314
  if empty_df:
315
315
  sel_expression = sel_expression.where(exp.false())
316
- # if empty_df:
317
- # if not column_mapping:
318
- # # If we don't have rows or columns then we just return a null with a false expression
319
- # sel_expression = (
320
- # exp.Select().select("null").from_("VALUES (NULL)").where(exp.false())
321
- # )
322
- # else:
323
- # # Ensure no results are returned if the dataframe is expected to be empty instead of
324
- # # a row of null values
325
- # sel_expression = sel_expression.where(exp.false())
326
316
  return self._create_df(sel_expression)
327
317
 
328
- def sql(self, sqlQuery: t.Union[str, exp.Expression]) -> DF:
329
- expression = self._optimize(
318
+ def sql(self, sqlQuery: t.Union[str, exp.Expression], optimize: bool = True) -> DF:
319
+ expression = (
330
320
  sqlglot.parse_one(sqlQuery, read=self.input_dialect)
331
321
  if isinstance(sqlQuery, str)
332
322
  else sqlQuery
333
323
  )
324
+ if optimize:
325
+ expression = self._optimize(expression)
334
326
  if self.temp_views:
335
327
  replacement_mapping = {}
336
328
  for table in expression.find_all(exp.Table):
@@ -113,6 +113,8 @@ def ensure_column_mapping(schema: t.Union[str, StructType]) -> t.Dict:
113
113
  }
114
114
  # TODO: Make a protocol with a `simpleString` attribute as what it looks for instead of the actual
115
115
  # `StructType` object.
116
+ elif hasattr(schema, "simpleString"):
117
+ return {struct_field.name: struct_field.dataType.simpleString() for struct_field in schema}
116
118
  return sqlglot_ensure_column_mapping(schema) # type: ignore
117
119
 
118
120
 
@@ -5,6 +5,9 @@ from __future__ import annotations
5
5
  import logging
6
6
  import typing as t
7
7
 
8
+ from sqlglot import exp
9
+ from sqlglot.helper import ensure_list
10
+
8
11
  from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
9
12
  from sqlframe.base.util import ensure_column_mapping, to_csv
10
13
 
@@ -69,13 +72,22 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
69
72
  |100|NULL|
70
73
  +---+----+
71
74
  """
75
+ if schema:
76
+ column_mapping = ensure_column_mapping(schema)
77
+ select_columns = [x.expression for x in self._to_casted_columns(column_mapping)]
78
+ if format == "csv":
79
+ duckdb_columns = ", ".join(
80
+ [f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
81
+ )
82
+ options["columns"] = "{" + duckdb_columns + "}"
83
+ else:
84
+ select_columns = [exp.Star()]
72
85
  if format:
73
- sql = f"SELECT * FROM read_{format}('{path}', {to_csv(options)})"
86
+ paths = ",".join([f"'{path}'" for path in ensure_list(path)])
87
+ from_clause = f"read_{format}([{paths}], {to_csv(options)})"
74
88
  else:
75
- sql = f"select * from '{path}'"
76
- df = self.session.sql(sql)
77
- if schema:
78
- df = df.select(*self._to_casted_columns(ensure_column_mapping(schema)))
89
+ from_clause = f"'{path}'"
90
+ df = self.session.sql(exp.select(*select_columns).from_(from_clause), optimize=False)
79
91
  self.session._last_loaded_file = path # type: ignore
80
92
  return df
81
93
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -145,6 +145,7 @@ tests/types.py
145
145
  tests/fixtures/employee.csv
146
146
  tests/fixtures/employee.json
147
147
  tests/fixtures/employee.parquet
148
+ tests/fixtures/employee_extra_line.csv
148
149
  tests/integration/__init__.py
149
150
  tests/integration/fixtures.py
150
151
  tests/integration/test_int_dataframe.py
@@ -162,6 +163,7 @@ tests/integration/engines/bigquery/test_bigquery_catalog.py
162
163
  tests/integration/engines/bigquery/test_bigquery_session.py
163
164
  tests/integration/engines/duck/__init__.py
164
165
  tests/integration/engines/duck/test_duckdb_catalog.py
166
+ tests/integration/engines/duck/test_duckdb_reader.py
165
167
  tests/integration/engines/duck/test_duckdb_session.py
166
168
  tests/integration/engines/postgres/__init__.py
167
169
  tests/integration/engines/postgres/test_postgres_catalog.py
@@ -1,5 +1,5 @@
1
1
  prettytable<3.11.0
2
- sqlglot<23.18,>=23.14.0
2
+ sqlglot<24.1,>=24.0.0
3
3
 
4
4
  [bigquery]
5
5
  google-cloud-bigquery-storage<3,>=2
@@ -0,0 +1,7 @@
1
+ some,stats,that,dont,relate,to,data
2
+ employee_id,fname,lname,age,store_id
3
+ 1,Jack,Shephard,37,1
4
+ 2,John,Locke,65,1
5
+ 3,Kate,Austen,37,2
6
+ 4,Claire,Littleton,27,2
7
+ 5,Hugo,Reyes,29,100
@@ -0,0 +1,57 @@
1
+ from sqlframe.base.types import Row
2
+ from sqlframe.duckdb import DuckDBSession
3
+
4
+ pytest_plugins = ["tests.common_fixtures"]
5
+
6
+
7
+ def test_employee_extra_line_csv(duckdb_session: DuckDBSession):
8
+ df = duckdb_session.read.load(
9
+ "tests/fixtures/employee_extra_line.csv",
10
+ format="csv",
11
+ schema="employee_id INT, fname STRING, lname STRING, age INT, store_id INT",
12
+ skip=1,
13
+ header=1,
14
+ filename=1,
15
+ null_padding=True,
16
+ ignore_errors=1,
17
+ auto_detect=False,
18
+ )
19
+ assert df.collect() == [
20
+ Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
21
+ Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
22
+ Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
23
+ Row(
24
+ **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
25
+ ),
26
+ Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
27
+ ]
28
+
29
+
30
+ def test_employee_extra_line_csv_multiple(duckdb_session: DuckDBSession):
31
+ df = duckdb_session.read.load(
32
+ ["tests/fixtures/employee_extra_line.csv", "tests/fixtures/employee_extra_line.csv"],
33
+ format="csv",
34
+ schema="employee_id INT, fname STRING, lname STRING, age INT, store_id INT",
35
+ skip=1,
36
+ header=1,
37
+ filename=1,
38
+ null_padding=True,
39
+ ignore_errors=1,
40
+ auto_detect=False,
41
+ )
42
+ assert df.collect() == [
43
+ Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
44
+ Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
45
+ Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
46
+ Row(
47
+ **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
48
+ ),
49
+ Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
50
+ Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
51
+ Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
52
+ Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
53
+ Row(
54
+ **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
55
+ ),
56
+ Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
57
+ ]
@@ -302,6 +302,17 @@ def test_where_multiple_chained(
302
302
  compare_frames(df_employee, dfs_employee)
303
303
 
304
304
 
305
+ def test_where_sql_expr(
306
+ pyspark_employee: PySparkDataFrame,
307
+ get_df: t.Callable[[str], _BaseDataFrame],
308
+ compare_frames: t.Callable,
309
+ ):
310
+ employee = get_df("employee")
311
+ df_employee = pyspark_employee.where("age = 37 AND fname = 'Jack'")
312
+ dfs_employee = employee.where("age = 37 AND fname = 'Jack'")
313
+ compare_frames(df_employee, dfs_employee)
314
+
315
+
305
316
  def test_operators(
306
317
  pyspark_employee: PySparkDataFrame,
307
318
  get_df: t.Callable[[str], _BaseDataFrame],
@@ -0,0 +1,57 @@
1
+ import typing as t
2
+
3
+ from sqlglot import expressions as exp
4
+
5
+ from sqlframe.standalone import functions as F
6
+ from sqlframe.standalone.dataframe import StandaloneDataFrame
7
+
8
+ pytest_plugins = ["tests.common_fixtures", "tests.unit.standalone.fixtures"]
9
+
10
+
11
+ def test_hash_select_expression(standalone_employee: StandaloneDataFrame):
12
+ expression = exp.select("cola").from_("table")
13
+ assert standalone_employee._create_hash_from_expression(expression) == "t17051938"
14
+
15
+
16
+ def test_columns(standalone_employee: StandaloneDataFrame):
17
+ assert standalone_employee.columns == ["employee_id", "fname", "lname", "age", "store_id"]
18
+
19
+
20
+ def test_cache(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
21
+ df = standalone_employee.select("fname").cache()
22
+ expected_statements = [
23
+ "DROP VIEW IF EXISTS t31563989",
24
+ "CACHE LAZY TABLE t31563989 OPTIONS('storageLevel' = 'MEMORY_AND_DISK') AS SELECT CAST(`a1`.`fname` AS STRING) AS `fname` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
25
+ "SELECT `t31563989`.`fname` AS `fname` FROM `t31563989` AS `t31563989`",
26
+ ]
27
+ compare_sql(df, expected_statements)
28
+
29
+
30
+ def test_persist_default(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
31
+ df = standalone_employee.select("fname").persist()
32
+ expected_statements = [
33
+ "DROP VIEW IF EXISTS t31563989",
34
+ "CACHE LAZY TABLE t31563989 OPTIONS('storageLevel' = 'MEMORY_AND_DISK_SER') AS SELECT CAST(`a1`.`fname` AS STRING) AS `fname` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
35
+ "SELECT `t31563989`.`fname` AS `fname` FROM `t31563989` AS `t31563989`",
36
+ ]
37
+ compare_sql(df, expected_statements)
38
+
39
+
40
+ def test_persist_storagelevel(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
41
+ df = standalone_employee.select("fname").persist("DISK_ONLY_2")
42
+ expected_statements = [
43
+ "DROP VIEW IF EXISTS t31563989",
44
+ "CACHE LAZY TABLE t31563989 OPTIONS('storageLevel' = 'DISK_ONLY_2') AS SELECT CAST(`a1`.`fname` AS STRING) AS `fname` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
45
+ "SELECT `t31563989`.`fname` AS `fname` FROM `t31563989` AS `t31563989`",
46
+ ]
47
+ compare_sql(df, expected_statements)
48
+
49
+
50
+ def test_with_column_duplicate_alias(standalone_employee: StandaloneDataFrame):
51
+ df = standalone_employee.withColumn("fname", F.col("age").cast("string"))
52
+ assert df.columns == ["employee_id", "fname", "lname", "age", "store_id"]
53
+ # Make sure that the new columns is added with an alias to `fname`
54
+ assert (
55
+ df.sql(pretty=False)
56
+ == "SELECT `a1`.`employee_id` AS `employee_id`, CAST(`a1`.`age` AS STRING) AS `fname`, CAST(`a1`.`lname` AS STRING) AS `lname`, `a1`.`age` AS `age`, `a1`.`store_id` AS `store_id` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)"
57
+ )
@@ -43,9 +43,9 @@ def test_insertInto_byName(standalone_employee: StandaloneDataFrame, compare_sql
43
43
  def test_insertInto_cache(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
44
44
  df = standalone_employee.cache().write.insertInto("table_name")
45
45
  expected_statements = [
46
- "DROP VIEW IF EXISTS t12441",
47
- "CACHE LAZY TABLE t12441 OPTIONS('storageLevel' = 'MEMORY_AND_DISK') AS SELECT `a1`.`employee_id` AS `employee_id`, CAST(`a1`.`fname` AS STRING) AS `fname`, CAST(`a1`.`lname` AS STRING) AS `lname`, `a1`.`age` AS `age`, `a1`.`store_id` AS `store_id` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
48
- "INSERT INTO table_name SELECT `t12441`.`employee_id` AS `employee_id`, `t12441`.`fname` AS `fname`, `t12441`.`lname` AS `lname`, `t12441`.`age` AS `age`, `t12441`.`store_id` AS `store_id` FROM `t12441` AS `t12441`",
46
+ "DROP VIEW IF EXISTS t12441709",
47
+ "CACHE LAZY TABLE t12441709 OPTIONS('storageLevel' = 'MEMORY_AND_DISK') AS SELECT `a1`.`employee_id` AS `employee_id`, CAST(`a1`.`fname` AS STRING) AS `fname`, CAST(`a1`.`lname` AS STRING) AS `lname`, `a1`.`age` AS `age`, `a1`.`store_id` AS `store_id` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
48
+ "INSERT INTO table_name SELECT `t12441709`.`employee_id` AS `employee_id`, `t12441709`.`fname` AS `fname`, `t12441709`.`lname` AS `lname`, `t12441709`.`age` AS `age`, `t12441709`.`store_id` AS `store_id` FROM `t12441709` AS `t12441709`",
49
49
  ]
50
50
  compare_sql(df, expected_statements)
51
51
 
@@ -94,9 +94,9 @@ def test_mode_override(standalone_employee: StandaloneDataFrame, compare_sql: t.
94
94
  def test_saveAsTable_cache(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
95
95
  df = standalone_employee.cache().write.saveAsTable("table_name")
96
96
  expected_statements = [
97
- "DROP VIEW IF EXISTS t12441",
98
- "CACHE LAZY TABLE t12441 OPTIONS('storageLevel' = 'MEMORY_AND_DISK') AS SELECT `a1`.`employee_id` AS `employee_id`, CAST(`a1`.`fname` AS STRING) AS `fname`, CAST(`a1`.`lname` AS STRING) AS `lname`, `a1`.`age` AS `age`, `a1`.`store_id` AS `store_id` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
99
- "CREATE TABLE table_name AS SELECT `t12441`.`employee_id` AS `employee_id`, `t12441`.`fname` AS `fname`, `t12441`.`lname` AS `lname`, `t12441`.`age` AS `age`, `t12441`.`store_id` AS `store_id` FROM `t12441` AS `t12441`",
97
+ "DROP VIEW IF EXISTS t12441709",
98
+ "CACHE LAZY TABLE t12441709 OPTIONS('storageLevel' = 'MEMORY_AND_DISK') AS SELECT `a1`.`employee_id` AS `employee_id`, CAST(`a1`.`fname` AS STRING) AS `fname`, CAST(`a1`.`lname` AS STRING) AS `lname`, `a1`.`age` AS `age`, `a1`.`store_id` AS `store_id` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
99
+ "CREATE TABLE table_name AS SELECT `t12441709`.`employee_id` AS `employee_id`, `t12441709`.`fname` AS `fname`, `t12441709`.`lname` AS `lname`, `t12441709`.`age` AS `age`, `t12441709`.`store_id` AS `store_id` FROM `t12441709` AS `t12441709`",
100
100
  ]
101
101
  compare_sql(df, expected_statements)
102
102
 
@@ -110,7 +110,7 @@ def test_sql_with_aggs(standalone_session: StandaloneSession, compare_sql: t.Cal
110
110
  df = standalone_session.sql(query).groupBy(F.col("cola")).agg(F.sum("colb"))
111
111
  compare_sql(
112
112
  df,
113
- "WITH t26614 AS (SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`), t23454 AS (SELECT cola, colb FROM t26614) SELECT cola, SUM(colb) FROM t23454 GROUP BY cola",
113
+ "WITH t26614157 AS (SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`), t38889420 AS (SELECT cola, colb FROM t26614157) SELECT cola, SUM(colb) FROM t38889420 GROUP BY cola",
114
114
  pretty=False,
115
115
  optimize=False,
116
116
  )
@@ -1,46 +0,0 @@
1
- import typing as t
2
-
3
- from sqlglot import expressions as exp
4
-
5
- from sqlframe.standalone.dataframe import StandaloneDataFrame
6
-
7
- pytest_plugins = ["tests.common_fixtures", "tests.unit.standalone.fixtures"]
8
-
9
-
10
- def test_hash_select_expression(standalone_employee: StandaloneDataFrame):
11
- expression = exp.select("cola").from_("table")
12
- assert standalone_employee._create_hash_from_expression(expression) == "t17051"
13
-
14
-
15
- def test_columns(standalone_employee: StandaloneDataFrame):
16
- assert standalone_employee.columns == ["employee_id", "fname", "lname", "age", "store_id"]
17
-
18
-
19
- def test_cache(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
20
- df = standalone_employee.select("fname").cache()
21
- expected_statements = [
22
- "DROP VIEW IF EXISTS t31563",
23
- "CACHE LAZY TABLE t31563 OPTIONS('storageLevel' = 'MEMORY_AND_DISK') AS SELECT CAST(`a1`.`fname` AS STRING) AS `fname` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
24
- "SELECT `t31563`.`fname` AS `fname` FROM `t31563` AS `t31563`",
25
- ]
26
- compare_sql(df, expected_statements)
27
-
28
-
29
- def test_persist_default(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
30
- df = standalone_employee.select("fname").persist()
31
- expected_statements = [
32
- "DROP VIEW IF EXISTS t31563",
33
- "CACHE LAZY TABLE t31563 OPTIONS('storageLevel' = 'MEMORY_AND_DISK_SER') AS SELECT CAST(`a1`.`fname` AS STRING) AS `fname` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
34
- "SELECT `t31563`.`fname` AS `fname` FROM `t31563` AS `t31563`",
35
- ]
36
- compare_sql(df, expected_statements)
37
-
38
-
39
- def test_persist_storagelevel(standalone_employee: StandaloneDataFrame, compare_sql: t.Callable):
40
- df = standalone_employee.select("fname").persist("DISK_ONLY_2")
41
- expected_statements = [
42
- "DROP VIEW IF EXISTS t31563",
43
- "CACHE LAZY TABLE t31563 OPTIONS('storageLevel' = 'DISK_ONLY_2') AS SELECT CAST(`a1`.`fname` AS STRING) AS `fname` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)",
44
- "SELECT `t31563`.`fname` AS `fname` FROM `t31563` AS `t31563`",
45
- ]
46
- compare_sql(df, expected_statements)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes