sqlframe 3.42.0__py3-none-any.whl → 3.43.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +27 -3
- sqlframe/base/functions.py +26 -6
- sqlframe/base/session.py +1 -1
- sqlframe/duckdb/dataframe.py +1 -1
- {sqlframe-3.42.0.dist-info → sqlframe-3.43.1.dist-info}/METADATA +4 -4
- {sqlframe-3.42.0.dist-info → sqlframe-3.43.1.dist-info}/RECORD +10 -10
- {sqlframe-3.42.0.dist-info → sqlframe-3.43.1.dist-info}/LICENSE +0 -0
- {sqlframe-3.42.0.dist-info → sqlframe-3.43.1.dist-info}/WHEEL +0 -0
- {sqlframe-3.42.0.dist-info → sqlframe-3.43.1.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '3.
|
32
|
-
__version_tuple__ = version_tuple = (3,
|
31
|
+
__version__ = version = '3.43.1'
|
32
|
+
__version_tuple__ = version_tuple = (3, 43, 1)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g07ceeb0ec'
|
sqlframe/base/dataframe.py
CHANGED
@@ -1665,10 +1665,34 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1665
1665
|
def should_drop_expression(expr: exp.Expression) -> bool:
|
1666
1666
|
# Check against fully qualified Column objects and
|
1667
1667
|
# Check against unqualified string column names (drop ALL columns with this name)
|
1668
|
-
if expr.sql() in drop_sql
|
1669
|
-
isinstance(expr, exp.Column) and expr.alias_or_name in column_names
|
1670
|
-
):
|
1668
|
+
if expr.sql() in drop_sql:
|
1671
1669
|
return True
|
1670
|
+
|
1671
|
+
if isinstance(expr, exp.Column) and (alias_or_name := expr.alias_or_name):
|
1672
|
+
# Check direct match first
|
1673
|
+
if alias_or_name in column_names:
|
1674
|
+
return True
|
1675
|
+
|
1676
|
+
# Handle string column references that contain aliases
|
1677
|
+
for col_name in column_names:
|
1678
|
+
if ("." in col_name) and alias_or_name == (col_name.split(".", maxsplit=1)[-1]):
|
1679
|
+
# Extract the column name part after the last dot
|
1680
|
+
return True
|
1681
|
+
|
1682
|
+
# Handle case where normalized columns have table qualifiers but actual expressions
|
1683
|
+
# are unqualified. This happens when using aliased column references like
|
1684
|
+
# f.col('df.foo')
|
1685
|
+
|
1686
|
+
# Check if any drop column matches by column name AND table qualifier
|
1687
|
+
for drop_col in drop_cols:
|
1688
|
+
if ((drop_expression := drop_col.expression).alias_or_name) == alias_or_name:
|
1689
|
+
if expr_table := expr.table:
|
1690
|
+
drop_table = drop_expression.args.get("table")
|
1691
|
+
if (not drop_table) or (expr_table == drop_table):
|
1692
|
+
return True
|
1693
|
+
else:
|
1694
|
+
return True
|
1695
|
+
|
1672
1696
|
return False
|
1673
1697
|
|
1674
1698
|
new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
|
sqlframe/base/functions.py
CHANGED
@@ -2729,7 +2729,7 @@ def aggregate(
|
|
2729
2729
|
return Column.invoke_expression_over_column(col, expression.Reduce, **kwargs)
|
2730
2730
|
|
2731
2731
|
|
2732
|
-
@meta(unsupported_engines="postgres")
|
2732
|
+
@meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
|
2733
2733
|
def transform(
|
2734
2734
|
col: ColumnOrName,
|
2735
2735
|
f: t.Union[t.Callable[[Column], Column], t.Callable[[Column, Column], Column]],
|
@@ -7169,12 +7169,32 @@ def _lambda_quoted(value: str) -> t.Optional[bool]:
|
|
7169
7169
|
|
7170
7170
|
@meta()
|
7171
7171
|
def _get_lambda_from_func(lambda_expression: t.Callable):
|
7172
|
-
|
7173
|
-
|
7174
|
-
|
7175
|
-
|
7172
|
+
import inspect
|
7173
|
+
|
7174
|
+
# Get the function signature
|
7175
|
+
sig = inspect.signature(lambda_expression)
|
7176
|
+
param_names = list(sig.parameters.keys())
|
7177
|
+
|
7178
|
+
# Check if this looks like a column function (single 'col' parameter)
|
7179
|
+
if len(param_names) == 1 and param_names[0] in ["col", "column"]:
|
7180
|
+
# Wrap column functions to work with transform
|
7181
|
+
variables = [expression.to_identifier("x", quoted=_lambda_quoted("x"))]
|
7182
|
+
result = lambda_expression(Column("x"))
|
7183
|
+
return expression.Lambda(
|
7184
|
+
this=result.column_expression,
|
7185
|
+
expressions=variables,
|
7186
|
+
)
|
7187
|
+
|
7188
|
+
# Handle regular functions and lambdas
|
7189
|
+
var_names = lambda_expression.__code__.co_varnames[: lambda_expression.__code__.co_argcount]
|
7190
|
+
|
7191
|
+
variables = [expression.to_identifier(x, quoted=_lambda_quoted(x)) for x in var_names]
|
7192
|
+
|
7193
|
+
# Call with Column objects for each parameter
|
7194
|
+
result = lambda_expression(*[Column(x) for x in var_names])
|
7195
|
+
|
7176
7196
|
return expression.Lambda(
|
7177
|
-
this=
|
7197
|
+
this=result.column_expression,
|
7178
7198
|
expressions=variables,
|
7179
7199
|
)
|
7180
7200
|
|
sqlframe/base/session.py
CHANGED
@@ -437,7 +437,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
|
|
437
437
|
for cte in df.expression.ctes:
|
438
438
|
if cte.alias_or_name not in expression_ctes:
|
439
439
|
ctes_to_add.append(cte)
|
440
|
-
expression.set("with", exp.With(expressions=expression.ctes
|
440
|
+
expression.set("with", exp.With(expressions=ctes_to_add + expression.ctes)) # type: ignore
|
441
441
|
|
442
442
|
def replace_temp_view_name_with_cte(node: exp.Expression) -> exp.Expression:
|
443
443
|
if isinstance(node, exp.Table):
|
sqlframe/duckdb/dataframe.py
CHANGED
@@ -61,5 +61,5 @@ class DuckDBDataFrame(
|
|
61
61
|
def toArrow(self, batch_size: t.Optional[int] = None) -> t.Union[ArrowTable, RecordBatchReader]:
|
62
62
|
self._collect(skip_rows=True)
|
63
63
|
if not batch_size:
|
64
|
-
return self.session._last_result.
|
64
|
+
return self.session._last_result.fetch_arrow_table()
|
65
65
|
return self.session._last_result.fetch_record_batch(batch_size)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.43.1
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: more-itertools
|
20
20
|
Requires-Dist: prettytable <4
|
21
|
-
Requires-Dist: sqlglot <27.
|
21
|
+
Requires-Dist: sqlglot <27.17,>=24.0.0
|
22
22
|
Requires-Dist: typing-extensions
|
23
23
|
Provides-Extra: bigquery
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -26,7 +26,7 @@ Requires-Dist: google-cloud-bigquery[pandas] <4,>=3 ; extra == 'bigquery'
|
|
26
26
|
Provides-Extra: databricks
|
27
27
|
Requires-Dist: databricks-sql-connector[pyarrow] <5,>=3.6 ; extra == 'databricks'
|
28
28
|
Provides-Extra: dev
|
29
|
-
Requires-Dist: duckdb <1.
|
29
|
+
Requires-Dist: duckdb <1.5,>=1.2 ; extra == 'dev'
|
30
30
|
Requires-Dist: findspark <3,>=2 ; extra == 'dev'
|
31
31
|
Requires-Dist: mypy <1.19,>=1.10.0 ; extra == 'dev'
|
32
32
|
Requires-Dist: openai <2,>=1.30 ; extra == 'dev'
|
@@ -50,7 +50,7 @@ Requires-Dist: mkdocs-material ==9.0.5 ; extra == 'docs'
|
|
50
50
|
Requires-Dist: mkdocs ==1.4.2 ; extra == 'docs'
|
51
51
|
Requires-Dist: pymdown-extensions ; extra == 'docs'
|
52
52
|
Provides-Extra: duckdb
|
53
|
-
Requires-Dist: duckdb <1.
|
53
|
+
Requires-Dist: duckdb <1.5,>=1.2 ; extra == 'duckdb'
|
54
54
|
Requires-Dist: pandas <3,>=2 ; extra == 'duckdb'
|
55
55
|
Provides-Extra: openai
|
56
56
|
Requires-Dist: openai <2,>=1.30 ; extra == 'openai'
|
@@ -1,20 +1,20 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=J2b9R7RcIQKVNt66tZoZMlGnx0Hhbva3g6YxYwpILD4,714
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
8
|
-
sqlframe/base/dataframe.py,sha256=
|
8
|
+
sqlframe/base/dataframe.py,sha256=wT3R2Qmq4edOefxXCRepiMDNJfVOIvSf6eHd6GOn4i0,88543
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=bA6LuduIBcBgG9mE1GXmrO0zxcPV_Yx8pytRoqOZ0Zo,229602
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=YPeopWr8ZRjevArYfrM-DZBkQp4t4UfAEwynoj4VvcU,11773
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=b1CZgOZv-8h0sC3PWqPVAwAwlDMjpmRys6FGhugKspU,31391
|
17
|
-
sqlframe/base/session.py,sha256=
|
17
|
+
sqlframe/base/session.py,sha256=jMm18v5MYW7Z61pXda-yd-WNYYwnYU9i2jIaT8gKSvA,27687
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
19
19
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
20
20
|
sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
|
@@ -55,7 +55,7 @@ sqlframe/databricks/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0
|
|
55
55
|
sqlframe/duckdb/__init__.py,sha256=KAw_uZEhFMwi3D9Wj6AgHAKqLNk-EAx2uDIYu56oL44,872
|
56
56
|
sqlframe/duckdb/catalog.py,sha256=89FCSJglMbOxonk3IXmlkMcdXCfMdePpGfqlbkkB_d0,5107
|
57
57
|
sqlframe/duckdb/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
58
|
-
sqlframe/duckdb/dataframe.py,sha256=
|
58
|
+
sqlframe/duckdb/dataframe.py,sha256=G99muPe0kUV6pBO4nx5zzcjb4H1s9JA0WzUUX8_vjE0,1920
|
59
59
|
sqlframe/duckdb/functions.py,sha256=ix2efGGD4HLaY1rtCtEd3IrsicGEVGiBAeKOo5OD8rA,424
|
60
60
|
sqlframe/duckdb/functions.pyi,sha256=hDjpT-tGDO8LyElcno5YYRUnJg1dXXbGcRjJ69Zqk_U,12542
|
61
61
|
sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
137
|
-
sqlframe-3.
|
133
|
+
sqlframe-3.43.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.43.1.dist-info/METADATA,sha256=QRiDOgcTBZwCscawbAcGZzIXpzlWE1tfbysgGDLzOnU,9070
|
135
|
+
sqlframe-3.43.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.43.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.43.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|