sqlframe 3.31.1__py3-none-any.whl → 3.31.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.31.1'
21
- __version_tuple__ = version_tuple = (3, 31, 1)
20
+ __version__ = version = '3.31.3'
21
+ __version_tuple__ = version_tuple = (3, 31, 3)
@@ -625,11 +625,22 @@ def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column:
625
625
  def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
626
626
  session = _get_session()
627
627
 
628
- if session._is_duckdb:
629
- ignorenulls = None
630
-
631
628
  this = Column.invoke_expression_over_column(col, expression.First)
632
629
  if ignorenulls:
630
+ if session._is_duckdb:
631
+ return Column(
632
+ expression.Filter(
633
+ this=this.expression,
634
+ expression=expression.Where(
635
+ this=expression.Not(
636
+ this=expression.Is(
637
+ this=Column.ensure_col(col).expression,
638
+ expression=expression.Null(),
639
+ )
640
+ )
641
+ ),
642
+ )
643
+ )
633
644
  return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
634
645
  return this
635
646
 
@@ -3266,12 +3277,57 @@ def find_in_set(str: ColumnOrName, str_array: ColumnOrName) -> Column:
3266
3277
  return Column.invoke_anonymous_function(str, "find_in_set", str_array)
3267
3278
 
3268
3279
 
3269
- @meta(unsupported_engines="*")
3280
+ @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
3270
3281
  def first_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None) -> Column:
3282
+ """Returns the first value of `col` for a group of rows. It will return the first non-null
3283
+ value it sees when `ignoreNulls` is set to true. If all values are null, then null is returned.
3284
+
3285
+ Parameters
3286
+ ----------
3287
+ col : :class:`~pyspark.sql.Column` or str
3288
+ target column to work on.
3289
+ ignorenulls : :class:`~pyspark.sql.Column` or bool
3290
+ if first value is null then look for first non-null value.
3291
+
3292
+ Returns
3293
+ -------
3294
+ :class:`~pyspark.sql.Column`
3295
+ some value of `col` for a group of rows.
3296
+
3297
+ Examples
3298
+ --------
3299
+ >>> import pyspark.sql.functions as sf
3300
+ >>> spark.createDataFrame(
3301
+ ... [(None, 1), ("a", 2), ("a", 3), ("b", 8), (None, 2)], ["a", "b"]
3302
+ ... ).select(sf.first_value('a'), sf.first_value('b')).show()
3303
+ +--------------+--------------+
3304
+ |first_value(a)|first_value(b)|
3305
+ +--------------+--------------+
3306
+ | NULL| 1|
3307
+ +--------------+--------------+
3308
+
3309
+ >>> import pyspark.sql.functions as sf
3310
+ >>> spark.createDataFrame(
3311
+ ... [(None, 1), ("a", 2), ("a", 3), ("b", 8), (None, 2)], ["a", "b"]
3312
+ ... ).select(sf.first_value('a', True), sf.first_value('b', True)).show()
3313
+ +--------------+--------------+
3314
+ |first_value(a)|first_value(b)|
3315
+ +--------------+--------------+
3316
+ | a| 1|
3317
+ +--------------+--------------+
3318
+ """
3319
+ session = _get_session()
3320
+
3271
3321
  column = Column.invoke_expression_over_column(col, expression.FirstValue)
3272
3322
 
3273
3323
  if ignoreNulls:
3274
- return Column(expression.IgnoreNulls(this=column.column_expression))
3324
+ column = Column(expression.IgnoreNulls(this=column.column_expression))
3325
+
3326
+ if session._is_duckdb:
3327
+ agg_func = first(col, ignoreNulls) # type: ignore
3328
+ agg_func.expression._meta = agg_func.expression._meta or {}
3329
+ agg_func.expression._meta["window_func"] = column.expression
3330
+ return agg_func
3275
3331
  return column
3276
3332
 
3277
3333
 
@@ -41,13 +41,14 @@ class DuckDBSession(
41
41
 
42
42
  def __init__(self, conn: t.Optional[DuckDBPyConnection] = None, *args, **kwargs):
43
43
  import duckdb
44
+ from duckdb import InvalidInputException
44
45
  from duckdb.typing import VARCHAR
45
46
 
46
47
  if not hasattr(self, "_conn"):
47
48
  conn = conn or duckdb.connect()
48
49
  try:
49
50
  conn.create_function("SOUNDEX", lambda x: soundex(x), return_type=VARCHAR)
50
- except ImportError:
51
+ except (ImportError, InvalidInputException):
51
52
  pass
52
53
 
53
54
  super().__init__(conn, *args, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.31.1
3
+ Version: 3.31.3
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -33,7 +33,7 @@ Requires-Dist: pandas-stubs <3,>=2 ; extra == 'dev'
33
33
  Requires-Dist: pandas <3,>=2 ; extra == 'dev'
34
34
  Requires-Dist: pre-commit <5,>=3.7 ; extra == 'dev'
35
35
  Requires-Dist: psycopg <4,>=3.1 ; extra == 'dev'
36
- Requires-Dist: pyarrow <20,>=10 ; extra == 'dev'
36
+ Requires-Dist: pyarrow <21,>=10 ; extra == 'dev'
37
37
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'dev'
38
38
  Requires-Dist: pytest-forked ; extra == 'dev'
39
39
  Requires-Dist: pytest-postgresql <8,>=6 ; extra == 'dev'
@@ -59,7 +59,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
59
59
  Provides-Extra: redshift
60
60
  Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
61
61
  Provides-Extra: snowflake
62
- Requires-Dist: snowflake-connector-python[secure-local-storage] <3.15,>=3.10.0 ; extra == 'snowflake'
62
+ Requires-Dist: snowflake-connector-python[secure-local-storage] <3.16,>=3.10.0 ; extra == 'snowflake'
63
63
  Provides-Extra: spark
64
64
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
65
65
 
@@ -1,5 +1,5 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=7cXV0iDbN83CKh1pJdAw2JSG9yI-8kW4wmTY23El-aE,513
2
+ sqlframe/_version.py,sha256=UFO-lHcdw6c4FMaEKD7EQRsAybP7q9m-0eJwNH5EZtA,513
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
@@ -9,7 +9,7 @@ sqlframe/base/dataframe.py,sha256=D2N2Kvh_tiF60fYODUikq0xRCJYY4WB2aHbEcq5NIUo,84
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=Bs1bwl25fN3Yy9rb4GnUWBGunQ1C_yelkb2yV9DSZIY,53918
12
- sqlframe/base/functions.py,sha256=vi1ME9DYOeJHT26BM_3LjOzwq_x6Q-a8bv9wa6JIhYM,225029
12
+ sqlframe/base/functions.py,sha256=pL56_rBUwGSYCnNJ9NaD0AT65lOQqNlw1axJxtXrSsA,227187
13
13
  sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
@@ -60,7 +60,7 @@ sqlframe/duckdb/functions.py,sha256=ix2efGGD4HLaY1rtCtEd3IrsicGEVGiBAeKOo5OD8rA,
60
60
  sqlframe/duckdb/functions.pyi,sha256=hDjpT-tGDO8LyElcno5YYRUnJg1dXXbGcRjJ69Zqk_U,12542
61
61
  sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
62
62
  sqlframe/duckdb/readwriter.py,sha256=WEfUSKI68BFwAt4xwQX-GO8ZSGuUQYgYKkmWE55DmJo,5171
63
- sqlframe/duckdb/session.py,sha256=H1qjMYmhpwUHmf6jOPA6IhPIEIeX8rlvOl3MTIEijG0,2719
63
+ sqlframe/duckdb/session.py,sha256=FBU78oA9Lnj5A8ikVswQEDIlJcA3wc0Thn6KVso5iqM,2793
64
64
  sqlframe/duckdb/table.py,sha256=AmEKoH2TZo98loS5NbNaTuqv0eg76SY_OckVBMmQ6Co,410
65
65
  sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
66
66
  sqlframe/duckdb/udf.py,sha256=Du9LnOtT1lJvB90D4HSR2tB7MXy179jZngDR-EjVjQk,656
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.31.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.31.1.dist-info/METADATA,sha256=qyaYIcsC9vQHCx8KdaKUUN6hIYYKBTArtP8aENRG6UE,8987
135
- sqlframe-3.31.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.31.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.31.1.dist-info/RECORD,,
133
+ sqlframe-3.31.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.31.3.dist-info/METADATA,sha256=RptYyIn66gSabMqE67sugz8unZERau0Ip1EbMIdStb4,8987
135
+ sqlframe-3.31.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.31.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.31.3.dist-info/RECORD,,