sqlframe 1.0.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. {sqlframe-1.0.0 → sqlframe-1.1.1}/Makefile +1 -1
  2. {sqlframe-1.0.0 → sqlframe-1.1.1}/PKG-INFO +1 -1
  3. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/bigquery.md +19 -0
  4. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/duckdb.md +16 -0
  5. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/postgres.md +18 -0
  6. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/standalone.md +18 -0
  7. {sqlframe-1.0.0 → sqlframe-1.1.1}/setup.py +2 -2
  8. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/_version.py +2 -2
  9. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/catalog.py +3 -2
  10. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/column.py +1 -1
  11. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/dataframe.py +10 -5
  12. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/functions.py +1 -1
  13. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/group.py +1 -1
  14. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/normalize.py +1 -1
  15. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/operations.py +1 -1
  16. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/readerwriter.py +1 -1
  17. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/session.py +5 -13
  18. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/transforms.py +1 -1
  19. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/types.py +1 -1
  20. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/util.py +2 -0
  21. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/window.py +1 -1
  22. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/group.py +1 -1
  23. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/readwriter.py +1 -1
  24. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/catalog.py +1 -1
  25. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/group.py +1 -1
  26. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/readwriter.py +18 -6
  27. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/catalog.py +1 -1
  28. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/group.py +1 -1
  29. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/readwriter.py +1 -1
  30. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/catalog.py +1 -1
  31. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/group.py +1 -1
  32. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/readwriter.py +1 -1
  33. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/catalog.py +1 -1
  34. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/group.py +1 -1
  35. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/readwriter.py +1 -1
  36. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/catalog.py +1 -1
  37. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/group.py +1 -1
  38. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/readwriter.py +1 -1
  39. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/group.py +1 -1
  40. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/readwriter.py +1 -1
  41. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/session.py +1 -1
  42. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/PKG-INFO +1 -1
  43. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/SOURCES.txt +2 -0
  44. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/requires.txt +2 -2
  45. sqlframe-1.1.1/tests/fixtures/employee_extra_line.csv +7 -0
  46. sqlframe-1.1.1/tests/integration/engines/duck/test_duckdb_reader.py +57 -0
  47. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/test_int_dataframe.py +11 -0
  48. sqlframe-1.1.1/tests/unit/standalone/test_dataframe.py +66 -0
  49. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/test_dataframe_writer.py +6 -6
  50. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/test_session.py +1 -1
  51. sqlframe-1.0.0/tests/unit/standalone/test_dataframe.py +0 -46
  52. {sqlframe-1.0.0 → sqlframe-1.1.1}/.github/CODEOWNERS +0 -0
  53. {sqlframe-1.0.0 → sqlframe-1.1.1}/.github/workflows/main.workflow.yaml +0 -0
  54. {sqlframe-1.0.0 → sqlframe-1.1.1}/.github/workflows/publish.workflow.yaml +0 -0
  55. {sqlframe-1.0.0 → sqlframe-1.1.1}/.gitignore +0 -0
  56. {sqlframe-1.0.0 → sqlframe-1.1.1}/.pre-commit-config.yaml +0 -0
  57. {sqlframe-1.0.0 → sqlframe-1.1.1}/.readthedocs.yaml +0 -0
  58. {sqlframe-1.0.0 → sqlframe-1.1.1}/LICENSE +0 -0
  59. {sqlframe-1.0.0 → sqlframe-1.1.1}/README.md +0 -0
  60. {sqlframe-1.0.0 → sqlframe-1.1.1}/blogs/images/but_wait_theres_more.gif +0 -0
  61. {sqlframe-1.0.0 → sqlframe-1.1.1}/blogs/images/cake.gif +0 -0
  62. {sqlframe-1.0.0 → sqlframe-1.1.1}/blogs/images/you_get_pyspark_api.gif +0 -0
  63. {sqlframe-1.0.0 → sqlframe-1.1.1}/blogs/sqlframe_universal_dataframe_api.md +0 -0
  64. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/bigquery.md +0 -0
  65. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/duckdb.md +0 -0
  66. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/images/SF.png +0 -0
  67. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/images/favicon.png +0 -0
  68. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/images/favicon_old.png +0 -0
  69. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/images/sqlframe_diagram.png +0 -0
  70. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/images/sqlframe_logo.png +0 -0
  71. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/docs/postgres.md +0 -0
  72. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/images/SF.png +0 -0
  73. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/images/favicon.png +0 -0
  74. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/images/favicon_old.png +0 -0
  75. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/images/sqlframe_diagram.png +0 -0
  76. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/images/sqlframe_logo.png +0 -0
  77. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/index.md +0 -0
  78. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/requirements.txt +0 -0
  79. {sqlframe-1.0.0 → sqlframe-1.1.1}/docs/stylesheets/extra.css +0 -0
  80. {sqlframe-1.0.0 → sqlframe-1.1.1}/mkdocs.yml +0 -0
  81. {sqlframe-1.0.0 → sqlframe-1.1.1}/pytest.ini +0 -0
  82. {sqlframe-1.0.0 → sqlframe-1.1.1}/renovate.json +0 -0
  83. {sqlframe-1.0.0 → sqlframe-1.1.1}/setup.cfg +0 -0
  84. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/LICENSE +0 -0
  85. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/__init__.py +0 -0
  86. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/__init__.py +0 -0
  87. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/_typing.py +0 -0
  88. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/decorators.py +0 -0
  89. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/exceptions.py +0 -0
  90. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/function_alternatives.py +0 -0
  91. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/mixins/__init__.py +0 -0
  92. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/mixins/catalog_mixins.py +0 -0
  93. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/base/mixins/readwriter_mixins.py +0 -0
  94. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/__init__.py +0 -0
  95. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/catalog.py +0 -0
  96. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/column.py +0 -0
  97. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/dataframe.py +0 -0
  98. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/functions.py +0 -0
  99. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/functions.pyi +0 -0
  100. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/session.py +0 -0
  101. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/types.py +0 -0
  102. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/bigquery/window.py +0 -0
  103. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/__init__.py +0 -0
  104. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/column.py +0 -0
  105. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/dataframe.py +0 -0
  106. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/functions.py +0 -0
  107. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/functions.pyi +0 -0
  108. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/session.py +0 -0
  109. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/types.py +0 -0
  110. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/duckdb/window.py +0 -0
  111. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/__init__.py +0 -0
  112. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/column.py +0 -0
  113. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/dataframe.py +0 -0
  114. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/functions.py +0 -0
  115. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/functions.pyi +0 -0
  116. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/session.py +0 -0
  117. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/types.py +0 -0
  118. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/postgres/window.py +0 -0
  119. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/__init__.py +0 -0
  120. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/column.py +0 -0
  121. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/dataframe.py +0 -0
  122. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/functions.py +0 -0
  123. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/session.py +0 -0
  124. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/types.py +0 -0
  125. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/redshift/window.py +0 -0
  126. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/__init__.py +0 -0
  127. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/column.py +0 -0
  128. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/dataframe.py +0 -0
  129. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/functions.py +0 -0
  130. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/session.py +0 -0
  131. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/types.py +0 -0
  132. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/snowflake/window.py +0 -0
  133. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/__init__.py +0 -0
  134. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/column.py +0 -0
  135. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/dataframe.py +0 -0
  136. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/functions.py +0 -0
  137. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/session.py +0 -0
  138. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/types.py +0 -0
  139. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/spark/window.py +0 -0
  140. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/__init__.py +0 -0
  141. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/catalog.py +0 -0
  142. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/column.py +0 -0
  143. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/dataframe.py +0 -0
  144. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/functions.py +0 -0
  145. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/types.py +0 -0
  146. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe/standalone/window.py +0 -0
  147. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/dependency_links.txt +0 -0
  148. {sqlframe-1.0.0 → sqlframe-1.1.1}/sqlframe.egg-info/top_level.txt +0 -0
  149. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/__init__.py +0 -0
  150. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/common_fixtures.py +0 -0
  151. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/conftest.py +0 -0
  152. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/fixtures/employee.csv +0 -0
  153. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/fixtures/employee.json +0 -0
  154. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/fixtures/employee.parquet +0 -0
  155. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/__init__.py +0 -0
  156. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/__init__.py +0 -0
  157. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/bigquery/__init__.py +0 -0
  158. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/bigquery/test_bigquery_catalog.py +0 -0
  159. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/bigquery/test_bigquery_session.py +0 -0
  160. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/duck/__init__.py +0 -0
  161. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/duck/test_duckdb_catalog.py +0 -0
  162. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/duck/test_duckdb_session.py +0 -0
  163. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/postgres/__init__.py +0 -0
  164. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/postgres/test_postgres_catalog.py +0 -0
  165. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/postgres/test_postgres_session.py +0 -0
  166. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/redshift/__init__.py +0 -0
  167. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/redshift/test_redshift_catalog.py +0 -0
  168. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/redshift/test_redshift_session.py +0 -0
  169. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/snowflake/__init__.py +0 -0
  170. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/snowflake/test_snowflake_catalog.py +0 -0
  171. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/snowflake/test_snowflake_session.py +0 -0
  172. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/spark/__init__.py +0 -0
  173. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/spark/test_spark_catalog.py +0 -0
  174. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/test_engine_dataframe.py +0 -0
  175. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/test_engine_reader.py +0 -0
  176. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/test_engine_session.py +0 -0
  177. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/test_engine_writer.py +0 -0
  178. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/engines/test_int_functions.py +0 -0
  179. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/fixtures.py +0 -0
  180. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/test_int_dataframe_stats.py +0 -0
  181. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/test_int_grouped_data.py +0 -0
  182. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/integration/test_int_session.py +0 -0
  183. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/types.py +0 -0
  184. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/__init__.py +0 -0
  185. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/__init__.py +0 -0
  186. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/fixtures.py +0 -0
  187. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/test_column.py +0 -0
  188. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/test_functions.py +0 -0
  189. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/test_session_case_sensitivity.py +0 -0
  190. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/test_types.py +0 -0
  191. {sqlframe-1.0.0 → sqlframe-1.1.1}/tests/unit/standalone/test_window.py +0 -0
@@ -1,5 +1,5 @@
1
1
  install-dev:
2
- pip install -e ".[dev,duckdb,postgres,redshift,bigquery,snowflake,spark]"
2
+ pip install -e ".[dev,docs,duckdb,postgres,redshift,bigquery,snowflake,spark]"
3
3
 
4
4
  install-pre-commit:
5
5
  pre-commit install
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -43,6 +43,25 @@ Regardless of approach, it is recommended to configure `default_dataset` in the
43
43
  session = BigQuerySession(conn=conn, default_dataset="sqlframe.db1")
44
44
  ```
45
45
 
46
+ ## Imports
47
+
48
+ If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.bigquery`.
49
+ In addition, many classes will have a `BigQuery` prefix.
50
+ For example, `BigQueryDataFrame` instead of `DataFrame`.
51
+
52
+
53
+ ```python
54
+ # PySpark import
55
+ # from pyspark.sql import SparkSession
56
+ # from pyspark.sql import functions as F
57
+ # from pyspark.sql.dataframe import DataFrame
58
+ # SQLFrame import
59
+ from sqlframe.bigquery import BigQuerySession
60
+ from sqlframe.bigquery import functions as F
61
+ from sqlframe.bigquery import BigQueryDataFrame
62
+ ```
63
+
64
+
46
65
  ## Example Usage
47
66
 
48
67
  ```python
@@ -30,7 +30,23 @@ By default, SQLFrame will create a connection to an in-memory database.
30
30
  conn = duckdb.connect(database=":memory:")
31
31
  session = DuckDBSession(conn=conn)
32
32
  ```
33
+ ## Imports
33
34
 
35
+ If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.duckdb`.
36
+ In addition, many classes will have a `DuckDB` prefix.
37
+ For example, `DuckDBDataFrame` instead of `DataFrame`.
38
+
39
+
40
+ ```python
41
+ # PySpark import
42
+ # from pyspark.sql import SparkSession
43
+ # from pyspark.sql import functions as F
44
+ # from pyspark.sql.dataframe import DataFrame
45
+ # SQLFrame import
46
+ from sqlframe.duckdb import DuckDBSession
47
+ from sqlframe.duckdb import functions as F
48
+ from sqlframe.duckdb import DuckDBDataFrame
49
+ ```
34
50
 
35
51
  ## Example Usage
36
52
 
@@ -25,6 +25,24 @@ conn = connect(
25
25
  session = PostgresSession(conn=conn)
26
26
  ```
27
27
 
28
+ ## Imports
29
+
30
+ If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.postgres`.
31
+ In addition, many classes will have a `Postgres` prefix.
32
+ For example, `PostgresDataFrame` instead of `DataFrame`.
33
+
34
+
35
+ ```python
36
+ # PySpark import
37
+ # from pyspark.sql import SparkSession
38
+ # from pyspark.sql import functions as F
39
+ # from pyspark.sql.dataframe import DataFrame
40
+ # SQLFrame import
41
+ from sqlframe.postgres import PostgresSession
42
+ from sqlframe.postgres import functions as F
43
+ from sqlframe.postgres import PostgresDataFrame
44
+ ```
45
+
28
46
  ## Example Usage
29
47
 
30
48
  ```python
@@ -24,6 +24,24 @@ from sqlframe.standalone import StandaloneSession
24
24
  session = StandaloneSession.builder.config(map={"sqlframe.input.dialect": 'duckdb', "sqlframe.output.dialect": 'bigquery'}).getOrCreate()
25
25
  ```
26
26
 
27
+ ## Imports
28
+
29
+ If converting a PySpark pipeline, all `pyspark.sql` should be replaced with `sqlframe.standalone`.
30
+ In addition, many classes will have a `Standalone` prefix.
31
+ For example, `StandaloneDataFrame` instead of `DataFrame`.
32
+
33
+
34
+ ```python
35
+ # PySpark import
36
+ # from pyspark.sql import SparkSession
37
+ # from pyspark.sql import functions as F
38
+ # from pyspark.sql.dataframe import DataFrame
39
+ # SQLFrame import
40
+ from sqlframe.standalone import StandaloneSession
41
+ from sqlframe.standalone import functions as F
42
+ from sqlframe.standalone import StandaloneDataFrame
43
+ ```
44
+
27
45
  ## Accessing Tables
28
46
 
29
47
  PySpark DataFrame API, and currently SQLFrame, requires that a table can be access to get it's schema information.
@@ -20,7 +20,7 @@ setup(
20
20
  python_requires=">=3.8",
21
21
  install_requires=[
22
22
  "prettytable<3.11.0",
23
- "sqlglot>=23.14.0,<23.18",
23
+ "sqlglot>=24.0.0,<24.1",
24
24
  ],
25
25
  extras_require={
26
26
  "bigquery": [
@@ -47,7 +47,7 @@ setup(
47
47
  ],
48
48
  "docs": [
49
49
  "mkdocs==1.4.2",
50
- "mkdocs-include-markdown-plugin==4.0.3",
50
+ "mkdocs-include-markdown-plugin==6.0.6",
51
51
  "mkdocs-material==9.0.5",
52
52
  "mkdocs-material-extensions==1.1.1",
53
53
  "pymdown-extensions",
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.0.0'
16
- __version_tuple__ = version_tuple = (1, 0, 0)
15
+ __version__ = version = '1.1.1'
16
+ __version_tuple__ = version_tuple = (1, 1, 1)
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -8,7 +8,7 @@ from sqlglot import MappingSchema, exp
8
8
 
9
9
  from sqlframe.base.decorators import normalize
10
10
  from sqlframe.base.exceptions import TableSchemaError
11
- from sqlframe.base.util import to_schema
11
+ from sqlframe.base.util import ensure_column_mapping, to_schema
12
12
 
13
13
  if t.TYPE_CHECKING:
14
14
  from sqlglot.schema import ColumnMapping
@@ -82,6 +82,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
82
82
  raise TableSchemaError(
83
83
  "This session does not have access to a catalog that can lookup column information. See docs for explicitly defining columns or using a session that can automatically determine this."
84
84
  )
85
+ column_mapping = ensure_column_mapping(column_mapping) # type: ignore
85
86
  self._schema.add_table(table, column_mapping, dialect=self.session.input_dialect)
86
87
 
87
88
  @normalize(["dbName"])
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -417,7 +417,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
417
417
  from sqlframe.base.session import _BaseSession
418
418
 
419
419
  value = expression.sql(dialect=_BaseSession().input_dialect).encode("utf-8")
420
- hash = f"t{zlib.crc32(value)}"[:6]
420
+ hash = f"t{zlib.crc32(value)}"[:9]
421
421
  return self.session._normalize_string(hash)
422
422
 
423
423
  def _get_select_expressions(
@@ -606,8 +606,13 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
606
606
  return df._convert_leaf_to_cte(sequence_id=new_sequence_id)
607
607
 
608
608
  @operation(Operation.WHERE)
609
- def where(self, column: t.Union[Column, bool], **kwargs) -> Self:
610
- col = self._ensure_and_normalize_col(column)
609
+ def where(self, column: t.Union[Column, str, bool], **kwargs) -> Self:
610
+ if isinstance(column, str):
611
+ col = self._ensure_and_normalize_col(
612
+ sqlglot.parse_one(column, dialect=self.session.input_dialect)
613
+ )
614
+ else:
615
+ col = self._ensure_and_normalize_col(column)
611
616
  return self.copy(expression=self.expression.where(col.expression))
612
617
 
613
618
  filter = where
@@ -1094,7 +1099,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1094
1099
  )
1095
1100
  if existing_col_index:
1096
1101
  expression = self.expression.copy()
1097
- expression.expressions[existing_col_index] = col.expression
1102
+ expression.expressions[existing_col_index] = col.alias(colName).expression
1098
1103
  return self.copy(expression=expression)
1099
1104
  return self.copy().select(col.alias(colName), append=True)
1100
1105
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -313,24 +313,16 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN]):
313
313
  sel_expression = exp.Select(**select_kwargs)
314
314
  if empty_df:
315
315
  sel_expression = sel_expression.where(exp.false())
316
- # if empty_df:
317
- # if not column_mapping:
318
- # # If we don't have rows or columns then we just return a null with a false expression
319
- # sel_expression = (
320
- # exp.Select().select("null").from_("VALUES (NULL)").where(exp.false())
321
- # )
322
- # else:
323
- # # Ensure no results are returned if the dataframe is expected to be empty instead of
324
- # # a row of null values
325
- # sel_expression = sel_expression.where(exp.false())
326
316
  return self._create_df(sel_expression)
327
317
 
328
- def sql(self, sqlQuery: t.Union[str, exp.Expression]) -> DF:
329
- expression = self._optimize(
318
+ def sql(self, sqlQuery: t.Union[str, exp.Expression], optimize: bool = True) -> DF:
319
+ expression = (
330
320
  sqlglot.parse_one(sqlQuery, read=self.input_dialect)
331
321
  if isinstance(sqlQuery, str)
332
322
  else sqlQuery
333
323
  )
324
+ if optimize:
325
+ expression = self._optimize(expression)
334
326
  if self.temp_views:
335
327
  replacement_mapping = {}
336
328
  for table in expression.find_all(exp.Table):
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  import typing as t
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -113,6 +113,8 @@ def ensure_column_mapping(schema: t.Union[str, StructType]) -> t.Dict:
113
113
  }
114
114
  # TODO: Make a protocol with a `simpleString` attribute as what it looks for instead of the actual
115
115
  # `StructType` object.
116
+ elif hasattr(schema, "simpleString"):
117
+ return {struct_field.name: struct_field.dataType.simpleString() for struct_field in schema}
116
118
  return sqlglot_ensure_column_mapping(schema) # type: ignore
117
119
 
118
120
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,10 +1,13 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
6
  import typing as t
7
7
 
8
+ from sqlglot import exp
9
+ from sqlglot.helper import ensure_list
10
+
8
11
  from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
9
12
  from sqlframe.base.util import ensure_column_mapping, to_csv
10
13
 
@@ -69,13 +72,22 @@ class DuckDBDataFrameReader(_BaseDataFrameReader["DuckDBSession", "DuckDBDataFra
69
72
  |100|NULL|
70
73
  +---+----+
71
74
  """
75
+ if schema:
76
+ column_mapping = ensure_column_mapping(schema)
77
+ select_columns = [x.expression for x in self._to_casted_columns(column_mapping)]
78
+ if format == "csv":
79
+ duckdb_columns = ", ".join(
80
+ [f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
81
+ )
82
+ options["columns"] = "{" + duckdb_columns + "}"
83
+ else:
84
+ select_columns = [exp.Star()]
72
85
  if format:
73
- sql = f"SELECT * FROM read_{format}('{path}', {to_csv(options)})"
86
+ paths = ",".join([f"'{path}'" for path in ensure_list(path)])
87
+ from_clause = f"read_{format}([{paths}], {to_csv(options)})"
74
88
  else:
75
- sql = f"select * from '{path}'"
76
- df = self.session.sql(sql)
77
- if schema:
78
- df = df.select(*self._to_casted_columns(ensure_column_mapping(schema)))
89
+ from_clause = f"'{path}'"
90
+ df = self.session.sql(exp.select(*select_columns).from_(from_clause), optimize=False)
79
91
  self.session._last_loaded_file = path # type: ignore
80
92
  return df
81
93
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'dataframe' folder.
1
+ # This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Summary: Taking the Spark out of PySpark by converting to SQL
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -145,6 +145,7 @@ tests/types.py
145
145
  tests/fixtures/employee.csv
146
146
  tests/fixtures/employee.json
147
147
  tests/fixtures/employee.parquet
148
+ tests/fixtures/employee_extra_line.csv
148
149
  tests/integration/__init__.py
149
150
  tests/integration/fixtures.py
150
151
  tests/integration/test_int_dataframe.py
@@ -162,6 +163,7 @@ tests/integration/engines/bigquery/test_bigquery_catalog.py
162
163
  tests/integration/engines/bigquery/test_bigquery_session.py
163
164
  tests/integration/engines/duck/__init__.py
164
165
  tests/integration/engines/duck/test_duckdb_catalog.py
166
+ tests/integration/engines/duck/test_duckdb_reader.py
165
167
  tests/integration/engines/duck/test_duckdb_session.py
166
168
  tests/integration/engines/postgres/__init__.py
167
169
  tests/integration/engines/postgres/test_postgres_catalog.py
@@ -1,5 +1,5 @@
1
1
  prettytable<3.11.0
2
- sqlglot<23.18,>=23.14.0
2
+ sqlglot<24.1,>=24.0.0
3
3
 
4
4
  [bigquery]
5
5
  google-cloud-bigquery-storage<3,>=2
@@ -28,7 +28,7 @@ pre-commit>=3.5
28
28
  pre-commit<3.8,>=3.7
29
29
 
30
30
  [docs]
31
- mkdocs-include-markdown-plugin==4.0.3
31
+ mkdocs-include-markdown-plugin==6.0.6
32
32
  mkdocs-material-extensions==1.1.1
33
33
  mkdocs-material==9.0.5
34
34
  mkdocs==1.4.2
@@ -0,0 +1,7 @@
1
+ some,stats,that,dont,relate,to,data
2
+ employee_id,fname,lname,age,store_id
3
+ 1,Jack,Shephard,37,1
4
+ 2,John,Locke,65,1
5
+ 3,Kate,Austen,37,2
6
+ 4,Claire,Littleton,27,2
7
+ 5,Hugo,Reyes,29,100
@@ -0,0 +1,57 @@
1
+ from sqlframe.base.types import Row
2
+ from sqlframe.duckdb import DuckDBSession
3
+
4
+ pytest_plugins = ["tests.common_fixtures"]
5
+
6
+
7
+ def test_employee_extra_line_csv(duckdb_session: DuckDBSession):
8
+ df = duckdb_session.read.load(
9
+ "tests/fixtures/employee_extra_line.csv",
10
+ format="csv",
11
+ schema="employee_id INT, fname STRING, lname STRING, age INT, store_id INT",
12
+ skip=1,
13
+ header=1,
14
+ filename=1,
15
+ null_padding=True,
16
+ ignore_errors=1,
17
+ auto_detect=False,
18
+ )
19
+ assert df.collect() == [
20
+ Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
21
+ Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
22
+ Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
23
+ Row(
24
+ **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
25
+ ),
26
+ Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
27
+ ]
28
+
29
+
30
+ def test_employee_extra_line_csv_multiple(duckdb_session: DuckDBSession):
31
+ df = duckdb_session.read.load(
32
+ ["tests/fixtures/employee_extra_line.csv", "tests/fixtures/employee_extra_line.csv"],
33
+ format="csv",
34
+ schema="employee_id INT, fname STRING, lname STRING, age INT, store_id INT",
35
+ skip=1,
36
+ header=1,
37
+ filename=1,
38
+ null_padding=True,
39
+ ignore_errors=1,
40
+ auto_detect=False,
41
+ )
42
+ assert df.collect() == [
43
+ Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
44
+ Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
45
+ Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
46
+ Row(
47
+ **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
48
+ ),
49
+ Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
50
+ Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
51
+ Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
52
+ Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
53
+ Row(
54
+ **{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
55
+ ),
56
+ Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
57
+ ]
@@ -302,6 +302,17 @@ def test_where_multiple_chained(
302
302
  compare_frames(df_employee, dfs_employee)
303
303
 
304
304
 
305
+ def test_where_sql_expr(
306
+ pyspark_employee: PySparkDataFrame,
307
+ get_df: t.Callable[[str], _BaseDataFrame],
308
+ compare_frames: t.Callable,
309
+ ):
310
+ employee = get_df("employee")
311
+ df_employee = pyspark_employee.where("age = 37 AND fname = 'Jack'")
312
+ dfs_employee = employee.where("age = 37 AND fname = 'Jack'")
313
+ compare_frames(df_employee, dfs_employee)
314
+
315
+
305
316
  def test_operators(
306
317
  pyspark_employee: PySparkDataFrame,
307
318
  get_df: t.Callable[[str], _BaseDataFrame],