sqlframe 3.31.0__py3-none-any.whl → 3.31.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.31.0'
21
- __version_tuple__ = version_tuple = (3, 31, 0)
20
+ __version__ = version = '3.31.2'
21
+ __version_tuple__ = version_tuple = (3, 31, 2)
sqlframe/base/column.py CHANGED
@@ -430,8 +430,9 @@ class Column:
430
430
  )
431
431
 
432
432
  def over(self, window: WindowSpec) -> Column:
433
+ column_expression = self.column_expression.meta.get("window_func", self.column_expression)
433
434
  window_expression = window.expression.copy()
434
- window_expression.set("this", self.column_expression)
435
+ window_expression.set("this", column_expression)
435
436
  return Column(window_expression)
436
437
 
437
438
  def getItem(self, key: t.Any) -> Column:
@@ -625,11 +625,22 @@ def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column:
625
625
  def first(col: ColumnOrName, ignorenulls: t.Optional[bool] = None) -> Column:
626
626
  session = _get_session()
627
627
 
628
- if session._is_duckdb:
629
- ignorenulls = None
630
-
631
628
  this = Column.invoke_expression_over_column(col, expression.First)
632
629
  if ignorenulls:
630
+ if session._is_duckdb:
631
+ return Column(
632
+ expression.Filter(
633
+ this=this.expression,
634
+ expression=expression.Where(
635
+ this=expression.Not(
636
+ this=expression.Is(
637
+ this=Column.ensure_col(col).expression,
638
+ expression=expression.Null(),
639
+ )
640
+ )
641
+ ),
642
+ )
643
+ )
633
644
  return Column.invoke_expression_over_column(this, expression.IgnoreNulls)
634
645
  return this
635
646
 
@@ -3266,12 +3277,57 @@ def find_in_set(str: ColumnOrName, str_array: ColumnOrName) -> Column:
3266
3277
  return Column.invoke_anonymous_function(str, "find_in_set", str_array)
3267
3278
 
3268
3279
 
3269
- @meta(unsupported_engines="*")
3280
+ @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
3270
3281
  def first_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]] = None) -> Column:
3282
+ """Returns the first value of `col` for a group of rows. It will return the first non-null
3283
+ value it sees when `ignoreNulls` is set to true. If all values are null, then null is returned.
3284
+
3285
+ Parameters
3286
+ ----------
3287
+ col : :class:`~pyspark.sql.Column` or str
3288
+ target column to work on.
3289
+ ignorenulls : :class:`~pyspark.sql.Column` or bool
3290
+ if first value is null then look for first non-null value.
3291
+
3292
+ Returns
3293
+ -------
3294
+ :class:`~pyspark.sql.Column`
3295
+ some value of `col` for a group of rows.
3296
+
3297
+ Examples
3298
+ --------
3299
+ >>> import pyspark.sql.functions as sf
3300
+ >>> spark.createDataFrame(
3301
+ ... [(None, 1), ("a", 2), ("a", 3), ("b", 8), (None, 2)], ["a", "b"]
3302
+ ... ).select(sf.first_value('a'), sf.first_value('b')).show()
3303
+ +--------------+--------------+
3304
+ |first_value(a)|first_value(b)|
3305
+ +--------------+--------------+
3306
+ | NULL| 1|
3307
+ +--------------+--------------+
3308
+
3309
+ >>> import pyspark.sql.functions as sf
3310
+ >>> spark.createDataFrame(
3311
+ ... [(None, 1), ("a", 2), ("a", 3), ("b", 8), (None, 2)], ["a", "b"]
3312
+ ... ).select(sf.first_value('a', True), sf.first_value('b', True)).show()
3313
+ +--------------+--------------+
3314
+ |first_value(a)|first_value(b)|
3315
+ +--------------+--------------+
3316
+ | a| 1|
3317
+ +--------------+--------------+
3318
+ """
3319
+ session = _get_session()
3320
+
3271
3321
  column = Column.invoke_expression_over_column(col, expression.FirstValue)
3272
3322
 
3273
3323
  if ignoreNulls:
3274
- return Column(expression.IgnoreNulls(this=column.column_expression))
3324
+ column = Column(expression.IgnoreNulls(this=column.column_expression))
3325
+
3326
+ if session._is_duckdb:
3327
+ agg_func = first(col, ignoreNulls) # type: ignore
3328
+ agg_func.expression._meta = agg_func.expression._meta or {}
3329
+ agg_func.expression._meta["window_func"] = column.expression
3330
+ return agg_func
3275
3331
  return column
3276
3332
 
3277
3333
 
@@ -3930,13 +3986,16 @@ def last_value(col: ColumnOrName, ignoreNulls: t.Optional[t.Union[bool, Column]]
3930
3986
  """
3931
3987
  session = _get_session()
3932
3988
 
3933
- if session._is_duckdb:
3934
- return last(col, ignoreNulls) # type: ignore
3935
-
3936
3989
  column = Column.invoke_expression_over_column(col, expression.LastValue)
3937
3990
 
3938
3991
  if ignoreNulls:
3939
- return Column(expression.IgnoreNulls(this=column.column_expression))
3992
+ column = Column(expression.IgnoreNulls(this=column.column_expression))
3993
+
3994
+ if session._is_duckdb:
3995
+ agg_func = last(col, ignoreNulls) # type: ignore
3996
+ agg_func.expression._meta = agg_func.expression._meta or {}
3997
+ agg_func.expression._meta["window_func"] = column.expression
3998
+ return agg_func
3940
3999
  return column
3941
4000
 
3942
4001
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.31.0
3
+ Version: 3.31.2
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,15 +1,15 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=AgRbmN0zJrj6Fie24WbIl_HozqASkUgbWo9IOWaS7vU,513
2
+ sqlframe/_version.py,sha256=NVEtY8IR79NwYZzL_LNH-0DstDZDCujwAeO2Mcu8K0E,513
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
6
6
  sqlframe/base/catalog.py,sha256=ZuU_qmt4yjSoTYgecSGnOhitOdh3rJbGCUjnUBp5mlc,38564
7
- sqlframe/base/column.py,sha256=AG9Z_6RNhVxLhLU29kRCgzMgDNSm-_GFg96xLqk1-bs,19838
7
+ sqlframe/base/column.py,sha256=sp3fJstA49FslE2CcgvVFHyi7Jxsxk8qHTd-Z0cAEWc,19932
8
8
  sqlframe/base/dataframe.py,sha256=D2N2Kvh_tiF60fYODUikq0xRCJYY4WB2aHbEcq5NIUo,84310
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=Bs1bwl25fN3Yy9rb4GnUWBGunQ1C_yelkb2yV9DSZIY,53918
12
- sqlframe/base/functions.py,sha256=iReQ8NW4cwTvgGOXQq6WphC3DQbtKjIHDPJZgWjE614,224862
12
+ sqlframe/base/functions.py,sha256=pL56_rBUwGSYCnNJ9NaD0AT65lOQqNlw1axJxtXrSsA,227187
13
13
  sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.31.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.31.0.dist-info/METADATA,sha256=9D8GGMLw5XslYPUw8U_cEa_Ab4NgB1FRTXW0Wa6lBb0,8987
135
- sqlframe-3.31.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.31.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.31.0.dist-info/RECORD,,
133
+ sqlframe-3.31.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.31.2.dist-info/METADATA,sha256=H7g5gxzMh6Za__8fohB0XWz4xqlgJhWh8fgjCap-kz0,8987
135
+ sqlframe-3.31.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.31.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.31.2.dist-info/RECORD,,