sqlframe 3.29.1__py3-none-any.whl → 3.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/functions.py +22 -2
- sqlframe/databricks/dataframe.py +29 -0
- {sqlframe-3.29.1.dist-info → sqlframe-3.31.0.dist-info}/METADATA +3 -3
- {sqlframe-3.29.1.dist-info → sqlframe-3.31.0.dist-info}/RECORD +8 -8
- {sqlframe-3.29.1.dist-info → sqlframe-3.31.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.29.1.dist-info → sqlframe-3.31.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.29.1.dist-info → sqlframe-3.31.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/functions.py
CHANGED
@@ -678,7 +678,22 @@ def isnull(col: ColumnOrName) -> Column:
|
|
678
678
|
def last(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
|
679
679
|
this = Column.invoke_expression_over_column(col, expression.Last)
|
680
680
|
if ignorenulls:
|
681
|
-
|
681
|
+
if _get_session()._is_duckdb:
|
682
|
+
return Column(
|
683
|
+
expression.Filter(
|
684
|
+
this=this.expression,
|
685
|
+
expression=expression.Where(
|
686
|
+
this=expression.Not(
|
687
|
+
this=expression.Is(
|
688
|
+
this=Column.ensure_col(col).expression,
|
689
|
+
expression=expression.Null(),
|
690
|
+
)
|
691
|
+
)
|
692
|
+
),
|
693
|
+
)
|
694
|
+
)
|
695
|
+
else:
|
696
|
+
return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
|
682
697
|
return this
|
683
698
|
|
684
699
|
|
@@ -3872,7 +3887,7 @@ def json_object_keys(col: ColumnOrName) -> Column:
|
|
3872
3887
|
return Column.invoke_anonymous_function(col, "json_object_keys")
|
3873
3888
|
|
3874
3889
|
|
3875
|
-
@meta(unsupported_engines="
|
3890
|
+
@meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
|
3876
3891
|
def last_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None) -> Column:
|
3877
3892
|
"""Returns the last value of `col` for a group of rows. It will return the last non-null
|
3878
3893
|
value it sees when `ignoreNulls` is set to true. If all values are null, then null is returned.
|
@@ -3913,6 +3928,11 @@ def last_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]]
|
|
3913
3928
|
| b| 2|
|
3914
3929
|
+-------------+-------------+
|
3915
3930
|
"""
|
3931
|
+
session = _get_session()
|
3932
|
+
|
3933
|
+
if session._is_duckdb:
|
3934
|
+
return last(col, ignoreNulls) # type: ignore
|
3935
|
+
|
3916
3936
|
column = Column.invoke_expression_over_column(col, expression.LastValue)
|
3917
3937
|
|
3918
3938
|
if ignoreNulls:
|
sqlframe/databricks/dataframe.py
CHANGED
@@ -14,6 +14,9 @@ from sqlframe.base.util import normalize_string
|
|
14
14
|
from sqlframe.databricks.group import DatabricksGroupedData
|
15
15
|
|
16
16
|
if t.TYPE_CHECKING:
|
17
|
+
from databricks.sql.client import Cursor
|
18
|
+
from pyarrow import Table as ArrowTable
|
19
|
+
|
17
20
|
from sqlframe.databricks.readwriter import DatabricksDataFrameWriter
|
18
21
|
from sqlframe.databricks.session import DatabricksSession
|
19
22
|
|
@@ -21,6 +24,18 @@ if t.TYPE_CHECKING:
|
|
21
24
|
logger = logging.getLogger(__name__)
|
22
25
|
|
23
26
|
|
27
|
+
class RecordBatchReaderFacade:
|
28
|
+
def __init__(self, cur: Cursor, batch_size: int):
|
29
|
+
self.cur = cur
|
30
|
+
self.batch_size = batch_size
|
31
|
+
|
32
|
+
def read_next_batch(self) -> ArrowTable:
|
33
|
+
result = self.cur.fetchmany_arrow(self.batch_size)
|
34
|
+
if result.num_rows == 0:
|
35
|
+
raise StopIteration
|
36
|
+
return result
|
37
|
+
|
38
|
+
|
24
39
|
class DatabricksDataFrameNaFunctions(_BaseDataFrameNaFunctions["DatabricksDataFrame"]):
|
25
40
|
pass
|
26
41
|
|
@@ -68,3 +83,17 @@ class DatabricksDataFrame(
|
|
68
83
|
)
|
69
84
|
)
|
70
85
|
return columns
|
86
|
+
|
87
|
+
@t.overload # type: ignore
|
88
|
+
def toArrow(self) -> ArrowTable: ...
|
89
|
+
|
90
|
+
@t.overload
|
91
|
+
def toArrow(self, batch_size: int) -> RecordBatchReaderFacade: ...
|
92
|
+
|
93
|
+
def toArrow(
|
94
|
+
self, batch_size: t.Optional[int] = None
|
95
|
+
) -> t.Union[ArrowTable, RecordBatchReaderFacade]:
|
96
|
+
self._collect(skip_rows=True)
|
97
|
+
if not batch_size:
|
98
|
+
return self.session._cur.fetchall_arrow() # type: ignore
|
99
|
+
return RecordBatchReaderFacade(self.session._cur, batch_size) # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.31.0
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -17,13 +17,13 @@ Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <26.
|
20
|
+
Requires-Dist: sqlglot <26.17,>=24.0.0
|
21
21
|
Requires-Dist: typing-extensions
|
22
22
|
Provides-Extra: bigquery
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
24
24
|
Requires-Dist: google-cloud-bigquery[pandas] <4,>=3 ; extra == 'bigquery'
|
25
25
|
Provides-Extra: databricks
|
26
|
-
Requires-Dist: databricks-sql-connector <5,>=3.6 ; extra == 'databricks'
|
26
|
+
Requires-Dist: databricks-sql-connector[pyarrow] <5,>=3.6 ; extra == 'databricks'
|
27
27
|
Provides-Extra: dev
|
28
28
|
Requires-Dist: duckdb <1.3,>=1.2 ; extra == 'dev'
|
29
29
|
Requires-Dist: findspark <3,>=2 ; extra == 'dev'
|
@@ -1,5 +1,5 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=AgRbmN0zJrj6Fie24WbIl_HozqASkUgbWo9IOWaS7vU,513
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
@@ -9,7 +9,7 @@ sqlframe/base/dataframe.py,sha256=D2N2Kvh_tiF60fYODUikq0xRCJYY4WB2aHbEcq5NIUo,84
|
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=Bs1bwl25fN3Yy9rb4GnUWBGunQ1C_yelkb2yV9DSZIY,53918
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=iReQ8NW4cwTvgGOXQq6WphC3DQbtKjIHDPJZgWjE614,224862
|
13
13
|
sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
@@ -42,7 +42,7 @@ sqlframe/bigquery/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
|
|
42
42
|
sqlframe/databricks/__init__.py,sha256=BkB_eO1UYwcf8j6x7bi4BWmDCMkfn0CUMwossWgwaG4,993
|
43
43
|
sqlframe/databricks/catalog.py,sha256=T_4NlQ7TD57_UTYFeRezYUu0t9NEx7cgJS60O4DKcBQ,18609
|
44
44
|
sqlframe/databricks/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
45
|
-
sqlframe/databricks/dataframe.py,sha256=
|
45
|
+
sqlframe/databricks/dataframe.py,sha256=8kwT1kWU2TwGjR9zDrGdmkvabiBCivA_Mcg06r2XVX4,3111
|
46
46
|
sqlframe/databricks/functions.py,sha256=La8rjAwO0hD4FBO0QxW5CtZtFAPvOrVc6lG4OtPGgbc,336
|
47
47
|
sqlframe/databricks/functions.pyi,sha256=FzVBpzXCJzxIp73sIAo_R8Wx8uOJrix-W12HsgyeTcQ,23799
|
48
48
|
sqlframe/databricks/group.py,sha256=dU3g0DVLRlfOSCamKchQFXRd1WTFbdxoXkpEX8tPD6Y,399
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
137
|
-
sqlframe-3.
|
133
|
+
sqlframe-3.31.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.31.0.dist-info/METADATA,sha256=9D8GGMLw5XslYPUw8U_cEa_Ab4NgB1FRTXW0Wa6lBb0,8987
|
135
|
+
sqlframe-3.31.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.31.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.31.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|