sqlframe 3.31.1__py3-none-any.whl → 3.31.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/functions.py +61 -5
- sqlframe/duckdb/session.py +2 -1
- {sqlframe-3.31.1.dist-info → sqlframe-3.31.3.dist-info}/METADATA +3 -3
- {sqlframe-3.31.1.dist-info → sqlframe-3.31.3.dist-info}/RECORD +8 -8
- {sqlframe-3.31.1.dist-info → sqlframe-3.31.3.dist-info}/LICENSE +0 -0
- {sqlframe-3.31.1.dist-info → sqlframe-3.31.3.dist-info}/WHEEL +0 -0
- {sqlframe-3.31.1.dist-info → sqlframe-3.31.3.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/functions.py
CHANGED
@@ -625,11 +625,22 @@ def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column:
|
|
625
625
|
def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
|
626
626
|
session = _get_session()
|
627
627
|
|
628
|
-
if session._is_duckdb:
|
629
|
-
ignorenulls = None
|
630
|
-
|
631
628
|
this = Column.invoke_expression_over_column(col, expression.First)
|
632
629
|
if ignorenulls:
|
630
|
+
if session._is_duckdb:
|
631
|
+
return Column(
|
632
|
+
expression.Filter(
|
633
|
+
this=this.expression,
|
634
|
+
expression=expression.Where(
|
635
|
+
this=expression.Not(
|
636
|
+
this=expression.Is(
|
637
|
+
this=Column.ensure_col(col).expression,
|
638
|
+
expression=expression.Null(),
|
639
|
+
)
|
640
|
+
)
|
641
|
+
),
|
642
|
+
)
|
643
|
+
)
|
633
644
|
return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
|
634
645
|
return this
|
635
646
|
|
@@ -3266,12 +3277,57 @@ def find_in_set(str: ColumnOrName, str_array: ColumnOrName) -> Column:
|
|
3266
3277
|
return Column.invoke_anonymous_function(str, "find_in_set", str_array)
|
3267
3278
|
|
3268
3279
|
|
3269
|
-
@meta(unsupported_engines="
|
3280
|
+
@meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
|
3270
3281
|
def first_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None) -> Column:
|
3282
|
+
"""Returns the first value of `col` for a group of rows. It will return the first non-null
|
3283
|
+
value it sees when `ignoreNulls` is set to true. If all values are null, then null is returned.
|
3284
|
+
|
3285
|
+
Parameters
|
3286
|
+
----------
|
3287
|
+
col : :class:`~pyspark.sql.Column` or str
|
3288
|
+
target column to work on.
|
3289
|
+
ignorenulls : :class:`~pyspark.sql.Column` or bool
|
3290
|
+
if first value is null then look for first non-null value.
|
3291
|
+
|
3292
|
+
Returns
|
3293
|
+
-------
|
3294
|
+
:class:`~pyspark.sql.Column`
|
3295
|
+
some value of `col` for a group of rows.
|
3296
|
+
|
3297
|
+
Examples
|
3298
|
+
--------
|
3299
|
+
>>> import pyspark.sql.functions as sf
|
3300
|
+
>>> spark.createDataFrame(
|
3301
|
+
... [(None, 1), ("a", 2), ("a", 3), ("b", 8), (None, 2)], ["a", "b"]
|
3302
|
+
... ).select(sf.first_value('a'), sf.first_value('b')).show()
|
3303
|
+
+--------------+--------------+
|
3304
|
+
|first_value(a)|first_value(b)|
|
3305
|
+
+--------------+--------------+
|
3306
|
+
| NULL| 1|
|
3307
|
+
+--------------+--------------+
|
3308
|
+
|
3309
|
+
>>> import pyspark.sql.functions as sf
|
3310
|
+
>>> spark.createDataFrame(
|
3311
|
+
... [(None, 1), ("a", 2), ("a", 3), ("b", 8), (None, 2)], ["a", "b"]
|
3312
|
+
... ).select(sf.first_value('a', True), sf.first_value('b', True)).show()
|
3313
|
+
+--------------+--------------+
|
3314
|
+
|first_value(a)|first_value(b)|
|
3315
|
+
+--------------+--------------+
|
3316
|
+
| a| 1|
|
3317
|
+
+--------------+--------------+
|
3318
|
+
"""
|
3319
|
+
session = _get_session()
|
3320
|
+
|
3271
3321
|
column = Column.invoke_expression_over_column(col, expression.FirstValue)
|
3272
3322
|
|
3273
3323
|
if ignoreNulls:
|
3274
|
-
|
3324
|
+
column = Column(expression.IgnoreNulls(this=column.column_expression))
|
3325
|
+
|
3326
|
+
if session._is_duckdb:
|
3327
|
+
agg_func = first(col, ignoreNulls) # type: ignore
|
3328
|
+
agg_func.expression._meta = agg_func.expression._meta or {}
|
3329
|
+
agg_func.expression._meta["window_func"] = column.expression
|
3330
|
+
return agg_func
|
3275
3331
|
return column
|
3276
3332
|
|
3277
3333
|
|
sqlframe/duckdb/session.py
CHANGED
@@ -41,13 +41,14 @@ class DuckDBSession(
|
|
41
41
|
|
42
42
|
def __init__(self, conn: t.Optional[DuckDBPyConnection] = None, *args, **kwargs):
|
43
43
|
import duckdb
|
44
|
+
from duckdb import InvalidInputException
|
44
45
|
from duckdb.typing import VARCHAR
|
45
46
|
|
46
47
|
if not hasattr(self, "_conn"):
|
47
48
|
conn = conn or duckdb.connect()
|
48
49
|
try:
|
49
50
|
conn.create_function("SOUNDEX", lambda x: soundex(x), return_type=VARCHAR)
|
50
|
-
except ImportError:
|
51
|
+
except (ImportError, InvalidInputException):
|
51
52
|
pass
|
52
53
|
|
53
54
|
super().__init__(conn, *args, **kwargs)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.31.
|
3
|
+
Version: 3.31.3
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -33,7 +33,7 @@ Requires-Dist: pandas-stubs <3,>=2 ; extra == 'dev'
|
|
33
33
|
Requires-Dist: pandas <3,>=2 ; extra == 'dev'
|
34
34
|
Requires-Dist: pre-commit <5,>=3.7 ; extra == 'dev'
|
35
35
|
Requires-Dist: psycopg <4,>=3.1 ; extra == 'dev'
|
36
|
-
Requires-Dist: pyarrow <
|
36
|
+
Requires-Dist: pyarrow <21,>=10 ; extra == 'dev'
|
37
37
|
Requires-Dist: pyspark <3.6,>=2 ; extra == 'dev'
|
38
38
|
Requires-Dist: pytest-forked ; extra == 'dev'
|
39
39
|
Requires-Dist: pytest-postgresql <8,>=6 ; extra == 'dev'
|
@@ -59,7 +59,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
|
|
59
59
|
Provides-Extra: redshift
|
60
60
|
Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
|
61
61
|
Provides-Extra: snowflake
|
62
|
-
Requires-Dist: snowflake-connector-python[secure-local-storage] <3.
|
62
|
+
Requires-Dist: snowflake-connector-python[secure-local-storage] <3.16,>=3.10.0 ; extra == 'snowflake'
|
63
63
|
Provides-Extra: spark
|
64
64
|
Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
|
65
65
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=UFO-lHcdw6c4FMaEKD7EQRsAybP7q9m-0eJwNH5EZtA,513
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
@@ -9,7 +9,7 @@ sqlframe/base/dataframe.py,sha256=D2N2Kvh_tiF60fYODUikq0xRCJYY4WB2aHbEcq5NIUo,84
|
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=Bs1bwl25fN3Yy9rb4GnUWBGunQ1C_yelkb2yV9DSZIY,53918
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=pL56_rBUwGSYCnNJ9NaD0AT65lOQqNlw1axJxtXrSsA,227187
|
13
13
|
sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
@@ -60,7 +60,7 @@ sqlframe/duckdb/functions.py,sha256=ix2efGGD4HLaY1rtCtEd3IrsicGEVGiBAeKOo5OD8rA,
|
|
60
60
|
sqlframe/duckdb/functions.pyi,sha256=hDjpT-tGDO8LyElcno5YYRUnJg1dXXbGcRjJ69Zqk_U,12542
|
61
61
|
sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
|
62
62
|
sqlframe/duckdb/readwriter.py,sha256=WEfUSKI68BFwAt4xwQX-GO8ZSGuUQYgYKkmWE55DmJo,5171
|
63
|
-
sqlframe/duckdb/session.py,sha256=
|
63
|
+
sqlframe/duckdb/session.py,sha256=FBU78oA9Lnj5A8ikVswQEDIlJcA3wc0Thn6KVso5iqM,2793
|
64
64
|
sqlframe/duckdb/table.py,sha256=AmEKoH2TZo98loS5NbNaTuqv0eg76SY_OckVBMmQ6Co,410
|
65
65
|
sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
66
66
|
sqlframe/duckdb/udf.py,sha256=Du9LnOtT1lJvB90D4HSR2tB7MXy179jZngDR-EjVjQk,656
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.31.
|
134
|
-
sqlframe-3.31.
|
135
|
-
sqlframe-3.31.
|
136
|
-
sqlframe-3.31.
|
137
|
-
sqlframe-3.31.
|
133
|
+
sqlframe-3.31.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.31.3.dist-info/METADATA,sha256=RptYyIn66gSabMqE67sugz8unZERau0Ip1EbMIdStb4,8987
|
135
|
+
sqlframe-3.31.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.31.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.31.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|