sqlframe 3.41.0__py3-none-any.whl → 3.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +27 -3
- sqlframe/base/functions.py +34 -13
- sqlframe/base/session.py +1 -1
- sqlframe/duckdb/dataframe.py +1 -1
- {sqlframe-3.41.0.dist-info → sqlframe-3.43.0.dist-info}/METADATA +4 -4
- {sqlframe-3.41.0.dist-info → sqlframe-3.43.0.dist-info}/RECORD +10 -10
- {sqlframe-3.41.0.dist-info → sqlframe-3.43.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.41.0.dist-info → sqlframe-3.43.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.41.0.dist-info → sqlframe-3.43.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '3.
|
32
|
-
__version_tuple__ = version_tuple = (3,
|
31
|
+
__version__ = version = '3.43.0'
|
32
|
+
__version_tuple__ = version_tuple = (3, 43, 0)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g38d15d998'
|
sqlframe/base/dataframe.py
CHANGED
@@ -1665,10 +1665,34 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1665
1665
|
def should_drop_expression(expr: exp.Expression) -> bool:
|
1666
1666
|
# Check against fully qualified Column objects and
|
1667
1667
|
# Check against unqualified string column names (drop ALL columns with this name)
|
1668
|
-
if expr.sql() in drop_sql
|
1669
|
-
isinstance(expr, exp.Column) and expr.alias_or_name in column_names
|
1670
|
-
):
|
1668
|
+
if expr.sql() in drop_sql:
|
1671
1669
|
return True
|
1670
|
+
|
1671
|
+
if isinstance(expr, exp.Column) and (alias_or_name := expr.alias_or_name):
|
1672
|
+
# Check direct match first
|
1673
|
+
if alias_or_name in column_names:
|
1674
|
+
return True
|
1675
|
+
|
1676
|
+
# Handle string column references that contain aliases
|
1677
|
+
for col_name in column_names:
|
1678
|
+
if ("." in col_name) and alias_or_name == (col_name.split(".", maxsplit=1)[-1]):
|
1679
|
+
# Extract the column name part after the last dot
|
1680
|
+
return True
|
1681
|
+
|
1682
|
+
# Handle case where normalized columns have table qualifiers but actual expressions
|
1683
|
+
# are unqualified. This happens when using aliased column references like
|
1684
|
+
# f.col('df.foo')
|
1685
|
+
|
1686
|
+
# Check if any drop column matches by column name AND table qualifier
|
1687
|
+
for drop_col in drop_cols:
|
1688
|
+
if ((drop_expression := drop_col.expression).alias_or_name) == alias_or_name:
|
1689
|
+
if expr_table := expr.table:
|
1690
|
+
drop_table = drop_expression.args.get("table")
|
1691
|
+
if (not drop_table) or (expr_table == drop_table):
|
1692
|
+
return True
|
1693
|
+
else:
|
1694
|
+
return True
|
1695
|
+
|
1672
1696
|
return False
|
1673
1697
|
|
1674
1698
|
new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
|
sqlframe/base/functions.py
CHANGED
@@ -1974,8 +1974,30 @@ def regexp_replace(
|
|
1974
1974
|
)
|
1975
1975
|
|
1976
1976
|
|
1977
|
-
@meta(
|
1977
|
+
@meta()
|
1978
1978
|
def initcap(col: ColumnOrName) -> Column:
|
1979
|
+
session = _get_session()
|
1980
|
+
|
1981
|
+
if session._is_duckdb:
|
1982
|
+
split_func = get_func_from_session("split")
|
1983
|
+
transform_func = get_func_from_session("transform")
|
1984
|
+
reduce_func = get_func_from_session("reduce")
|
1985
|
+
upper_func = get_func_from_session("upper")
|
1986
|
+
lower_func = get_func_from_session("lower")
|
1987
|
+
length_func = get_func_from_session("length")
|
1988
|
+
concat_func = get_func_from_session("concat")
|
1989
|
+
concat_ws_func = get_func_from_session("concat_ws")
|
1990
|
+
return reduce_func(
|
1991
|
+
transform_func(
|
1992
|
+
split_func(col, r"\s+"),
|
1993
|
+
lambda w: concat_func(
|
1994
|
+
upper_func(w.substr(1, 1)), lower_func(w.substr(2, length_func(w) - 1))
|
1995
|
+
),
|
1996
|
+
),
|
1997
|
+
None,
|
1998
|
+
merge=lambda x, y: concat_ws_func(" ", x, y),
|
1999
|
+
)
|
2000
|
+
|
1979
2001
|
return Column.invoke_expression_over_column(col, expression.Initcap)
|
1980
2002
|
|
1981
2003
|
|
@@ -2686,7 +2708,7 @@ def from_csv(
|
|
2686
2708
|
return Column.invoke_anonymous_function(col, "FROM_CSV", schema)
|
2687
2709
|
|
2688
2710
|
|
2689
|
-
@meta(unsupported_engines=["bigquery", "
|
2711
|
+
@meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
|
2690
2712
|
def aggregate(
|
2691
2713
|
col: ColumnOrName,
|
2692
2714
|
initialValue: ColumnOrName,
|
@@ -2694,21 +2716,20 @@ def aggregate(
|
|
2694
2716
|
finish: t.Optional[t.Callable[[Column], Column]] = None,
|
2695
2717
|
) -> Column:
|
2696
2718
|
merge_exp = _get_lambda_from_func(merge)
|
2719
|
+
kwargs = dict(
|
2720
|
+
initial=initialValue,
|
2721
|
+
merge=merge_exp,
|
2722
|
+
)
|
2723
|
+
session = _get_session()
|
2697
2724
|
if finish is not None:
|
2698
2725
|
finish_exp = _get_lambda_from_func(finish)
|
2699
|
-
|
2700
|
-
|
2701
|
-
|
2702
|
-
|
2703
|
-
merge=Column(merge_exp),
|
2704
|
-
finish=Column(finish_exp),
|
2705
|
-
)
|
2706
|
-
return Column.invoke_expression_over_column(
|
2707
|
-
col, expression.Reduce, initial=initialValue, merge=Column(merge_exp)
|
2708
|
-
)
|
2726
|
+
kwargs["finish"] = Column(finish_exp)
|
2727
|
+
if session._is_duckdb:
|
2728
|
+
kwargs.pop("initial", None)
|
2729
|
+
return Column.invoke_expression_over_column(col, expression.Reduce, **kwargs)
|
2709
2730
|
|
2710
2731
|
|
2711
|
-
@meta(unsupported_engines=
|
2732
|
+
@meta(unsupported_engines="postgres")
|
2712
2733
|
def transform(
|
2713
2734
|
col: ColumnOrName,
|
2714
2735
|
f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
|
sqlframe/base/session.py
CHANGED
@@ -437,7 +437,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
437
437
|
for cte in df.expression.ctes:
|
438
438
|
if cte.alias_or_name not in expression_ctes:
|
439
439
|
ctes_to_add.append(cte)
|
440
|
-
expression.set("with", exp.With(expressions=expression.ctes
|
440
|
+
expression.set("with", exp.With(expressions=ctes_to_add + expression.ctes)) # type: ignore
|
441
441
|
|
442
442
|
def replace_temp_view_name_with_cte(node: exp.Expression) -> exp.Expression:
|
443
443
|
if isinstance(node, exp.Table):
|
sqlframe/duckdb/dataframe.py
CHANGED
@@ -61,5 +61,5 @@ class DuckDBDataFrame(
|
|
61
61
|
def toArrow(self, batch_size: t.Optional[int] = None) -> t.Union[ArrowTable, RecordBatchReader]:
|
62
62
|
self._collect(skip_rows=True)
|
63
63
|
if not batch_size:
|
64
|
-
return self.session._last_result.
|
64
|
+
return self.session._last_result.fetch_arrow_table()
|
65
65
|
return self.session._last_result.fetch_record_batch(batch_size)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.43.0
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: more-itertools
|
20
20
|
Requires-Dist: prettytable <4
|
21
|
-
Requires-Dist: sqlglot <27.
|
21
|
+
Requires-Dist: sqlglot <27.16,>=24.0.0
|
22
22
|
Requires-Dist: typing-extensions
|
23
23
|
Provides-Extra: bigquery
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -26,7 +26,7 @@ Requires-Dist: google-cloud-bigquery[pandas] <4,>=3 ; extra == 'bigquery'
|
|
26
26
|
Provides-Extra: databricks
|
27
27
|
Requires-Dist: databricks-sql-connector[pyarrow] <5,>=3.6 ; extra == 'databricks'
|
28
28
|
Provides-Extra: dev
|
29
|
-
Requires-Dist: duckdb <1.
|
29
|
+
Requires-Dist: duckdb <1.5,>=1.2 ; extra == 'dev'
|
30
30
|
Requires-Dist: findspark <3,>=2 ; extra == 'dev'
|
31
31
|
Requires-Dist: mypy <1.19,>=1.10.0 ; extra == 'dev'
|
32
32
|
Requires-Dist: openai <2,>=1.30 ; extra == 'dev'
|
@@ -50,7 +50,7 @@ Requires-Dist: mkdocs-material ==9.0.5 ; extra == 'docs'
|
|
50
50
|
Requires-Dist: mkdocs ==1.4.2 ; extra == 'docs'
|
51
51
|
Requires-Dist: pymdown-extensions ; extra == 'docs'
|
52
52
|
Provides-Extra: duckdb
|
53
|
-
Requires-Dist: duckdb <1.
|
53
|
+
Requires-Dist: duckdb <1.5,>=1.2 ; extra == 'duckdb'
|
54
54
|
Requires-Dist: pandas <3,>=2 ; extra == 'duckdb'
|
55
55
|
Provides-Extra: openai
|
56
56
|
Requires-Dist: openai <2,>=1.30 ; extra == 'openai'
|
@@ -1,20 +1,20 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=GkrwqhnbavsDQW5LQsruCWe67_xixzoHIDhkzjlbf38,714
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
8
|
-
sqlframe/base/dataframe.py,sha256=
|
8
|
+
sqlframe/base/dataframe.py,sha256=wT3R2Qmq4edOefxXCRepiMDNJfVOIvSf6eHd6GOn4i0,88543
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=QaCeMMBLz69LE-73x4ksXN6NbZlsshYADg-F8yRXTPA,228816
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=YPeopWr8ZRjevArYfrM-DZBkQp4t4UfAEwynoj4VvcU,11773
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=b1CZgOZv-8h0sC3PWqPVAwAwlDMjpmRys6FGhugKspU,31391
|
17
|
-
sqlframe/base/session.py,sha256=
|
17
|
+
sqlframe/base/session.py,sha256=jMm18v5MYW7Z61pXda-yd-WNYYwnYU9i2jIaT8gKSvA,27687
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
19
19
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
20
20
|
sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
|
@@ -55,7 +55,7 @@ sqlframe/databricks/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0
|
|
55
55
|
sqlframe/duckdb/__init__.py,sha256=KAw_uZEhFMwi3D9Wj6AgHAKqLNk-EAx2uDIYu56oL44,872
|
56
56
|
sqlframe/duckdb/catalog.py,sha256=89FCSJglMbOxonk3IXmlkMcdXCfMdePpGfqlbkkB_d0,5107
|
57
57
|
sqlframe/duckdb/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
58
|
-
sqlframe/duckdb/dataframe.py,sha256=
|
58
|
+
sqlframe/duckdb/dataframe.py,sha256=G99muPe0kUV6pBO4nx5zzcjb4H1s9JA0WzUUX8_vjE0,1920
|
59
59
|
sqlframe/duckdb/functions.py,sha256=ix2efGGD4HLaY1rtCtEd3IrsicGEVGiBAeKOo5OD8rA,424
|
60
60
|
sqlframe/duckdb/functions.pyi,sha256=hDjpT-tGDO8LyElcno5YYRUnJg1dXXbGcRjJ69Zqk_U,12542
|
61
61
|
sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
137
|
-
sqlframe-3.
|
133
|
+
sqlframe-3.43.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.43.0.dist-info/METADATA,sha256=-M-YLCPxdylEzhy7aPCDjVQc4lkJN47zxzJJdWmvVUo,9070
|
135
|
+
sqlframe-3.43.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.43.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.43.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|