sqlframe 3.29.1__py3-none-any.whl → 3.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/databricks/dataframe.py +29 -0
- {sqlframe-3.29.1.dist-info → sqlframe-3.30.0.dist-info}/METADATA +3 -3
- {sqlframe-3.29.1.dist-info → sqlframe-3.30.0.dist-info}/RECORD +7 -7
- {sqlframe-3.29.1.dist-info → sqlframe-3.30.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.29.1.dist-info → sqlframe-3.30.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.29.1.dist-info → sqlframe-3.30.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/databricks/dataframe.py
CHANGED
@@ -14,6 +14,9 @@ from sqlframe.base.util import normalize_string
|
|
14
14
|
from sqlframe.databricks.group import DatabricksGroupedData
|
15
15
|
|
16
16
|
if t.TYPE_CHECKING:
|
17
|
+
from databricks.sql.client import Cursor
|
18
|
+
from pyarrow import Table as ArrowTable
|
19
|
+
|
17
20
|
from sqlframe.databricks.readwriter import DatabricksDataFrameWriter
|
18
21
|
from sqlframe.databricks.session import DatabricksSession
|
19
22
|
|
@@ -21,6 +24,18 @@ if t.TYPE_CHECKING:
|
|
21
24
|
logger = logging.getLogger(__name__)
|
22
25
|
|
23
26
|
|
27
|
+
class RecordBatchReaderFacade:
|
28
|
+
def __init__(self, cur: Cursor, batch_size: int):
|
29
|
+
self.cur = cur
|
30
|
+
self.batch_size = batch_size
|
31
|
+
|
32
|
+
def read_next_batch(self) -> ArrowTable:
|
33
|
+
result = self.cur.fetchmany_arrow(self.batch_size)
|
34
|
+
if result.num_rows == 0:
|
35
|
+
raise StopIteration
|
36
|
+
return result
|
37
|
+
|
38
|
+
|
24
39
|
class DatabricksDataFrameNaFunctions(_BaseDataFrameNaFunctions["DatabricksDataFrame"]):
|
25
40
|
pass
|
26
41
|
|
@@ -68,3 +83,17 @@ class DatabricksDataFrame(
|
|
68
83
|
)
|
69
84
|
)
|
70
85
|
return columns
|
86
|
+
|
87
|
+
@t.overload # type: ignore
|
88
|
+
def toArrow(self) -> ArrowTable: ...
|
89
|
+
|
90
|
+
@t.overload
|
91
|
+
def toArrow(self, batch_size: int) -> RecordBatchReaderFacade: ...
|
92
|
+
|
93
|
+
def toArrow(
|
94
|
+
self, batch_size: t.Optional[int] = None
|
95
|
+
) -> t.Union[ArrowTable, RecordBatchReaderFacade]:
|
96
|
+
self._collect(skip_rows=True)
|
97
|
+
if not batch_size:
|
98
|
+
return self.session._cur.fetchall_arrow() # type: ignore
|
99
|
+
return RecordBatchReaderFacade(self.session._cur, batch_size) # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.30.0
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -17,13 +17,13 @@ Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <26.
|
20
|
+
Requires-Dist: sqlglot <26.16,>=24.0.0
|
21
21
|
Requires-Dist: typing-extensions
|
22
22
|
Provides-Extra: bigquery
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
24
24
|
Requires-Dist: google-cloud-bigquery[pandas] <4,>=3 ; extra == 'bigquery'
|
25
25
|
Provides-Extra: databricks
|
26
|
-
Requires-Dist: databricks-sql-connector <5,>=3.6 ; extra == 'databricks'
|
26
|
+
Requires-Dist: databricks-sql-connector[pyarrow] <5,>=3.6 ; extra == 'databricks'
|
27
27
|
Provides-Extra: dev
|
28
28
|
Requires-Dist: duckdb <1.3,>=1.2 ; extra == 'dev'
|
29
29
|
Requires-Dist: findspark <3,>=2 ; extra == 'dev'
|
@@ -1,5 +1,5 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=7VpfRH8_mJOR_L4lpWaUQ0WOsohnPcEU5DwM0XACUsI,513
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
@@ -42,7 +42,7 @@ sqlframe/bigquery/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
|
|
42
42
|
sqlframe/databricks/__init__.py,sha256=BkB_eO1UYwcf8j6x7bi4BWmDCMkfn0CUMwossWgwaG4,993
|
43
43
|
sqlframe/databricks/catalog.py,sha256=T_4NlQ7TD57_UTYFeRezYUu0t9NEx7cgJS60O4DKcBQ,18609
|
44
44
|
sqlframe/databricks/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
45
|
-
sqlframe/databricks/dataframe.py,sha256=
|
45
|
+
sqlframe/databricks/dataframe.py,sha256=8kwT1kWU2TwGjR9zDrGdmkvabiBCivA_Mcg06r2XVX4,3111
|
46
46
|
sqlframe/databricks/functions.py,sha256=La8rjAwO0hD4FBO0QxW5CtZtFAPvOrVc6lG4OtPGgbc,336
|
47
47
|
sqlframe/databricks/functions.pyi,sha256=FzVBpzXCJzxIp73sIAo_R8Wx8uOJrix-W12HsgyeTcQ,23799
|
48
48
|
sqlframe/databricks/group.py,sha256=dU3g0DVLRlfOSCamKchQFXRd1WTFbdxoXkpEX8tPD6Y,399
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
137
|
-
sqlframe-3.
|
133
|
+
sqlframe-3.30.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.30.0.dist-info/METADATA,sha256=6o9cQH-Uln7tjo2SsKP9NwsyxJMIIbHp7LLcB3WomO0,8987
|
135
|
+
sqlframe-3.30.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.30.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.30.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|