sqlframe 3.41.0__py3-none-any.whl → 3.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.41.0'
32
- __version_tuple__ = version_tuple = (3, 41, 0)
31
+ __version__ = version = '3.43.0'
32
+ __version_tuple__ = version_tuple = (3, 43, 0)
33
33
 
34
- __commit_id__ = commit_id = 'g961d3fda2'
34
+ __commit_id__ = commit_id = 'g38d15d998'
@@ -1665,10 +1665,34 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1665
1665
  def should_drop_expression(expr: exp.Expression) -> bool:
1666
1666
  # Check against fully qualified Column objects and
1667
1667
  # Check against unqualified string column names (drop ALL columns with this name)
1668
- if expr.sql() in drop_sql or (
1669
- isinstance(expr, exp.Column) and expr.alias_or_name in column_names
1670
- ):
1668
+ if expr.sql() in drop_sql:
1671
1669
  return True
1670
+
1671
+ if isinstance(expr, exp.Column) and (alias_or_name := expr.alias_or_name):
1672
+ # Check direct match first
1673
+ if alias_or_name in column_names:
1674
+ return True
1675
+
1676
+ # Handle string column references that contain aliases
1677
+ for col_name in column_names:
1678
+ if ("." in col_name) and alias_or_name == (col_name.split(".", maxsplit=1)[-1]):
1679
+ # Extract the column name part after the last dot
1680
+ return True
1681
+
1682
+ # Handle case where normalized columns have table qualifiers but actual expressions
1683
+ # are unqualified. This happens when using aliased column references like
1684
+ # f.col('df.foo')
1685
+
1686
+ # Check if any drop column matches by column name AND table qualifier
1687
+ for drop_col in drop_cols:
1688
+ if ((drop_expression := drop_col.expression).alias_or_name) == alias_or_name:
1689
+ if expr_table := expr.table:
1690
+ drop_table = drop_expression.args.get("table")
1691
+ if (not drop_table) or (expr_table == drop_table):
1692
+ return True
1693
+ else:
1694
+ return True
1695
+
1672
1696
  return False
1673
1697
 
1674
1698
  new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
@@ -1974,8 +1974,30 @@ def regexp_replace(
1974
1974
  )
1975
1975
 
1976
1976
 
1977
- @meta(unsupported_engines="duckdb")
1977
+ @meta()
1978
1978
  def initcap(col: ColumnOrName) -> Column:
1979
+ session = _get_session()
1980
+
1981
+ if session._is_duckdb:
1982
+ split_func = get_func_from_session("split")
1983
+ transform_func = get_func_from_session("transform")
1984
+ reduce_func = get_func_from_session("reduce")
1985
+ upper_func = get_func_from_session("upper")
1986
+ lower_func = get_func_from_session("lower")
1987
+ length_func = get_func_from_session("length")
1988
+ concat_func = get_func_from_session("concat")
1989
+ concat_ws_func = get_func_from_session("concat_ws")
1990
+ return reduce_func(
1991
+ transform_func(
1992
+ split_func(col, r"\s+"),
1993
+ lambda w: concat_func(
1994
+ upper_func(w.substr(1, 1)), lower_func(w.substr(2, length_func(w) - 1))
1995
+ ),
1996
+ ),
1997
+ None,
1998
+ merge=lambda x, y: concat_ws_func(" ", x, y),
1999
+ )
2000
+
1979
2001
  return Column.invoke_expression_over_column(col, expression.Initcap)
1980
2002
 
1981
2003
 
@@ -2686,7 +2708,7 @@ def from_csv(
2686
2708
  return Column.invoke_anonymous_function(col, "FROM_CSV", schema)
2687
2709
 
2688
2710
 
2689
- @meta(unsupported_engines=["bigquery", "duckdb", "postgres", "snowflake"])
2711
+ @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
2690
2712
  def aggregate(
2691
2713
  col: ColumnOrName,
2692
2714
  initialValue: ColumnOrName,
@@ -2694,21 +2716,20 @@ def aggregate(
2694
2716
  finish: t.Optional[t.Callable[[Column], Column]] = None,
2695
2717
  ) -> Column:
2696
2718
  merge_exp = _get_lambda_from_func(merge)
2719
+ kwargs = dict(
2720
+ initial=initialValue,
2721
+ merge=merge_exp,
2722
+ )
2723
+ session = _get_session()
2697
2724
  if finish is not None:
2698
2725
  finish_exp = _get_lambda_from_func(finish)
2699
- return Column.invoke_expression_over_column(
2700
- col,
2701
- expression.Reduce,
2702
- initial=initialValue,
2703
- merge=Column(merge_exp),
2704
- finish=Column(finish_exp),
2705
- )
2706
- return Column.invoke_expression_over_column(
2707
- col, expression.Reduce, initial=initialValue, merge=Column(merge_exp)
2708
- )
2726
+ kwargs["finish"] = Column(finish_exp)
2727
+ if session._is_duckdb:
2728
+ kwargs.pop("initial", None)
2729
+ return Column.invoke_expression_over_column(col, expression.Reduce, **kwargs)
2709
2730
 
2710
2731
 
2711
- @meta(unsupported_engines=["bigquery", "duckdb", "postgres", "snowflake"])
2732
+ @meta(unsupported_engines="postgres")
2712
2733
  def transform(
2713
2734
  col: ColumnOrName,
2714
2735
  f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
sqlframe/base/session.py CHANGED
@@ -437,7 +437,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
437
437
  for cte in df.expression.ctes:
438
438
  if cte.alias_or_name not in expression_ctes:
439
439
  ctes_to_add.append(cte)
440
- expression.set("with", exp.With(expressions=expression.ctes + ctes_to_add)) # type: ignore
440
+ expression.set("with", exp.With(expressions=ctes_to_add + expression.ctes)) # type: ignore
441
441
 
442
442
  def replace_temp_view_name_with_cte(node: exp.Expression) -> exp.Expression:
443
443
  if isinstance(node, exp.Table):
@@ -61,5 +61,5 @@ class DuckDBDataFrame(
61
61
  def toArrow(self, batch_size: t.Optional[int] = None) -> t.Union[ArrowTable, RecordBatchReader]:
62
62
  self._collect(skip_rows=True)
63
63
  if not batch_size:
64
- return self.session._last_result.arrow()
64
+ return self.session._last_result.fetch_arrow_table()
65
65
  return self.session._last_result.fetch_record_batch(batch_size)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.41.0
3
+ Version: 3.43.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: more-itertools
20
20
  Requires-Dist: prettytable <4
21
- Requires-Dist: sqlglot <27.15,>=24.0.0
21
+ Requires-Dist: sqlglot <27.16,>=24.0.0
22
22
  Requires-Dist: typing-extensions
23
23
  Provides-Extra: bigquery
24
24
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -26,7 +26,7 @@ Requires-Dist: google-cloud-bigquery[pandas] <4,>=3 ; extra == 'bigquery'
26
26
  Provides-Extra: databricks
27
27
  Requires-Dist: databricks-sql-connector[pyarrow] <5,>=3.6 ; extra == 'databricks'
28
28
  Provides-Extra: dev
29
- Requires-Dist: duckdb <1.4,>=1.2 ; extra == 'dev'
29
+ Requires-Dist: duckdb <1.5,>=1.2 ; extra == 'dev'
30
30
  Requires-Dist: findspark <3,>=2 ; extra == 'dev'
31
31
  Requires-Dist: mypy <1.19,>=1.10.0 ; extra == 'dev'
32
32
  Requires-Dist: openai <2,>=1.30 ; extra == 'dev'
@@ -50,7 +50,7 @@ Requires-Dist: mkdocs-material ==9.0.5 ; extra == 'docs'
50
50
  Requires-Dist: mkdocs ==1.4.2 ; extra == 'docs'
51
51
  Requires-Dist: pymdown-extensions ; extra == 'docs'
52
52
  Provides-Extra: duckdb
53
- Requires-Dist: duckdb <1.4,>=1.2 ; extra == 'duckdb'
53
+ Requires-Dist: duckdb <1.5,>=1.2 ; extra == 'duckdb'
54
54
  Requires-Dist: pandas <3,>=2 ; extra == 'duckdb'
55
55
  Provides-Extra: openai
56
56
  Requires-Dist: openai <2,>=1.30 ; extra == 'openai'
@@ -1,20 +1,20 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=dgLbChf8wsQ5H9o1FXadKfn_qB0pcaOHICUJO8Rhj6U,714
2
+ sqlframe/_version.py,sha256=GkrwqhnbavsDQW5LQsruCWe67_xixzoHIDhkzjlbf38,714
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
6
6
  sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
7
7
  sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
8
- sqlframe/base/dataframe.py,sha256=Kl3WycARIWBBIze0enmZDGkfOt65mZDQ2hx_6pxRsxI,87329
8
+ sqlframe/base/dataframe.py,sha256=wT3R2Qmq4edOefxXCRepiMDNJfVOIvSf6eHd6GOn4i0,88543
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
12
- sqlframe/base/functions.py,sha256=vlPGxKlgU1oFmXD8WDClpZlDvvC4L4q5qJs4qrG9xjw,228101
12
+ sqlframe/base/functions.py,sha256=QaCeMMBLz69LE-73x4ksXN6NbZlsshYADg-F8yRXTPA,228816
13
13
  sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
14
14
  sqlframe/base/normalize.py,sha256=YPeopWr8ZRjevArYfrM-DZBkQp4t4UfAEwynoj4VvcU,11773
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
16
16
  sqlframe/base/readerwriter.py,sha256=b1CZgOZv-8h0sC3PWqPVAwAwlDMjpmRys6FGhugKspU,31391
17
- sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
17
+ sqlframe/base/session.py,sha256=jMm18v5MYW7Z61pXda-yd-WNYYwnYU9i2jIaT8gKSvA,27687
18
18
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
19
19
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
20
20
  sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
@@ -55,7 +55,7 @@ sqlframe/databricks/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0
55
55
  sqlframe/duckdb/__init__.py,sha256=KAw_uZEhFMwi3D9Wj6AgHAKqLNk-EAx2uDIYu56oL44,872
56
56
  sqlframe/duckdb/catalog.py,sha256=89FCSJglMbOxonk3IXmlkMcdXCfMdePpGfqlbkkB_d0,5107
57
57
  sqlframe/duckdb/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
58
- sqlframe/duckdb/dataframe.py,sha256=Z8_K69UQGZVeBfVGXVwIJP8OMuIvNBB3DPKTP3Lfu4w,1908
58
+ sqlframe/duckdb/dataframe.py,sha256=G99muPe0kUV6pBO4nx5zzcjb4H1s9JA0WzUUX8_vjE0,1920
59
59
  sqlframe/duckdb/functions.py,sha256=ix2efGGD4HLaY1rtCtEd3IrsicGEVGiBAeKOo5OD8rA,424
60
60
  sqlframe/duckdb/functions.pyi,sha256=hDjpT-tGDO8LyElcno5YYRUnJg1dXXbGcRjJ69Zqk_U,12542
61
61
  sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.41.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.41.0.dist-info/METADATA,sha256=O8Y62mZw3zncgCc6RWb8i4_zHyeNySrjl6AeQV_tflc,9070
135
- sqlframe-3.41.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.41.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.41.0.dist-info/RECORD,,
133
+ sqlframe-3.43.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.43.0.dist-info/METADATA,sha256=-M-YLCPxdylEzhy7aPCDjVQc4lkJN47zxzJJdWmvVUo,9070
135
+ sqlframe-3.43.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.43.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.43.0.dist-info/RECORD,,