sqlframe 3.36.0__py3-none-any.whl → 3.36.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/functions.py +7 -3
- sqlframe/base/group.py +51 -2
- sqlframe/base/mixins/table_mixins.py +18 -16
- {sqlframe-3.36.0.dist-info → sqlframe-3.36.2.dist-info}/METADATA +4 -4
- {sqlframe-3.36.0.dist-info → sqlframe-3.36.2.dist-info}/RECORD +9 -9
- {sqlframe-3.36.0.dist-info → sqlframe-3.36.2.dist-info}/LICENSE +0 -0
- {sqlframe-3.36.0.dist-info → sqlframe-3.36.2.dist-info}/WHEEL +0 -0
- {sqlframe-3.36.0.dist-info → sqlframe-3.36.2.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/functions.py
CHANGED
@@ -1796,7 +1796,9 @@ def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
|
|
1796
1796
|
if session._is_bigquery:
|
1797
1797
|
return substring_index_bgutil(str, delim, count)
|
1798
1798
|
|
1799
|
-
return Column.
|
1799
|
+
return Column.invoke_expression_over_column(
|
1800
|
+
str, expression.SubstringIndex, delimiter=lit(delim), count=lit(count)
|
1801
|
+
)
|
1800
1802
|
|
1801
1803
|
|
1802
1804
|
@meta(unsupported_engines="bigquery")
|
@@ -2205,7 +2207,9 @@ def slice(
|
|
2205
2207
|
|
2206
2208
|
start_col = lit(start) if isinstance(start, int) else start
|
2207
2209
|
length_col = lit(length) if isinstance(length, int) else length
|
2208
|
-
return Column.
|
2210
|
+
return Column.invoke_expression_over_column(
|
2211
|
+
x, expression.ArraySlice, start=start_col, end=length_col
|
2212
|
+
)
|
2209
2213
|
|
2210
2214
|
|
2211
2215
|
@meta()
|
@@ -2748,7 +2752,7 @@ def typeof(col: ColumnOrName) -> Column:
|
|
2748
2752
|
if session._is_snowflake:
|
2749
2753
|
return typeof_from_variant(col)
|
2750
2754
|
|
2751
|
-
return Column.
|
2755
|
+
return Column.invoke_expression_over_column(col, expression.Typeof)
|
2752
2756
|
|
2753
2757
|
|
2754
2758
|
@meta()
|
sqlframe/base/group.py
CHANGED
@@ -70,6 +70,11 @@ class _BaseGroupedData(t.Generic[DF]):
|
|
70
70
|
|
71
71
|
from sqlframe.base import functions as F
|
72
72
|
|
73
|
+
if self.session._is_snowflake and len(cols) > 1:
|
74
|
+
raise ValueError(
|
75
|
+
"Snowflake does not support multiple aggregation functions in a single group by operation."
|
76
|
+
)
|
77
|
+
|
73
78
|
# Build the pivot expression
|
74
79
|
# First, we need to convert the DataFrame to include the pivot logic
|
75
80
|
df = self._df.copy()
|
@@ -132,11 +137,55 @@ class _BaseGroupedData(t.Generic[DF]):
|
|
132
137
|
subquery.set("pivots", [pivot])
|
133
138
|
|
134
139
|
# Create the final select from the pivoted subquery
|
135
|
-
|
140
|
+
final_select_in_values = []
|
141
|
+
for col in in_values: # type: ignore
|
142
|
+
for agg_col in cols:
|
143
|
+
original_name = col.alias_or_name # type: ignore
|
144
|
+
if self.session._is_snowflake:
|
145
|
+
# Snowflake takes the provided values, like 'Java', and creates the column as "'Java'"
|
146
|
+
# Therefore the user to select the column would need to use "'Java'"
|
147
|
+
# This does not conform to the PySpark API, nor is it very user-friendly.
|
148
|
+
# Therefore, we select the column as expected, and tell SQLFrame it is case-sensitive, but then
|
149
|
+
# alias is to case-insensitive "Java" so that the user can select it without quotes.
|
150
|
+
# This has a downside that if a user really needed case-sensitive column names then it wouldn't work.
|
151
|
+
new_col = exp.to_column(
|
152
|
+
col.alias_or_name, # type: ignore
|
153
|
+
quoted=True,
|
154
|
+
dialect=self.session.execution_dialect,
|
155
|
+
)
|
156
|
+
new_col.this.set("this", f"'{new_col.this.this}'")
|
157
|
+
new_col = exp.alias_(new_col, original_name)
|
158
|
+
new_col.unalias()._meta = {"case_sensitive": True}
|
159
|
+
elif self.session._is_bigquery:
|
160
|
+
# BigQuery flips the alias order to <alias>_<value> instead of <value>_<alias>
|
161
|
+
new_col = exp.to_column(
|
162
|
+
f"{agg_col.alias_or_name}_{original_name}",
|
163
|
+
dialect=self.session.execution_dialect,
|
164
|
+
)
|
165
|
+
new_col = (
|
166
|
+
exp.alias_(new_col, original_name)
|
167
|
+
if len(cols) == 1
|
168
|
+
else exp.alias_(new_col, f"{original_name}_{agg_col.alias_or_name}")
|
169
|
+
)
|
170
|
+
elif self.session._is_duckdb:
|
171
|
+
# DuckDB always respects the alias if if num_cols == 1
|
172
|
+
new_col = exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
|
173
|
+
if len(cols) == 1:
|
174
|
+
new_col = exp.alias_(new_col, original_name)
|
175
|
+
else:
|
176
|
+
new_col = (
|
177
|
+
exp.column(original_name)
|
178
|
+
if len(cols) == 1
|
179
|
+
else exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
|
180
|
+
)
|
181
|
+
final_select_in_values.append(new_col)
|
182
|
+
|
183
|
+
expression = exp.select(
|
184
|
+
*[x.column_expression for x in self.group_by_cols] + final_select_in_values # type: ignore
|
185
|
+
).from_(subquery)
|
136
186
|
|
137
187
|
return self._df.copy(expression=expression)
|
138
188
|
|
139
|
-
# Original non-pivot logic
|
140
189
|
if not self.group_by_cols or not isinstance(self.group_by_cols[0], (list, tuple, set)):
|
141
190
|
expression = self._df.expression.group_by(
|
142
191
|
# User column_expression for group by to avoid alias in group by
|
@@ -3,6 +3,7 @@ import logging
|
|
3
3
|
import typing as t
|
4
4
|
|
5
5
|
from sqlglot import exp
|
6
|
+
from typing_extensions import ParamSpec
|
6
7
|
|
7
8
|
try:
|
8
9
|
from sqlglot.expressions import Whens
|
@@ -28,21 +29,22 @@ if t.TYPE_CHECKING:
|
|
28
29
|
logger = logging.getLogger(__name__)
|
29
30
|
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
def wrapper(self: _BaseTable, *args, **kwargs) -> t.Any:
|
35
|
-
if len(self.expression.ctes) > 0:
|
36
|
-
return func(self, *args, **kwargs) # type: ignore
|
37
|
-
self_class = self.__class__
|
38
|
-
self = self._convert_leaf_to_cte()
|
39
|
-
self = self_class(**object_to_dict(self))
|
40
|
-
return func(self, *args, **kwargs) # type: ignore
|
32
|
+
P = ParamSpec("P")
|
33
|
+
T = t.TypeVar("T")
|
34
|
+
|
41
35
|
|
42
|
-
|
43
|
-
|
36
|
+
def ensure_cte(func: t.Callable[P, T]) -> t.Callable[P, T]:
|
37
|
+
@functools.wraps(func)
|
38
|
+
def wrapper(self: _BaseTable, *args: P.args, **kwargs: P.kwargs) -> T:
|
39
|
+
if len(self.expression.ctes) > 0:
|
40
|
+
return func(self, *args, **kwargs) # type: ignore
|
41
|
+
self_class = self.__class__
|
42
|
+
self = self._convert_leaf_to_cte()
|
43
|
+
self = self_class(**object_to_dict(self))
|
44
|
+
return func(self, *args, **kwargs) # type: ignore
|
44
45
|
|
45
|
-
|
46
|
+
wrapper.__wrapped__ = func # type: ignore
|
47
|
+
return wrapper # type: ignore[return-value]
|
46
48
|
|
47
49
|
|
48
50
|
class _BaseTableMixins(_BaseTable, t.Generic[DF]):
|
@@ -68,7 +70,7 @@ class _BaseTableMixins(_BaseTable, t.Generic[DF]):
|
|
68
70
|
|
69
71
|
|
70
72
|
class UpdateSupportMixin(_BaseTableMixins, t.Generic[DF]):
|
71
|
-
@ensure_cte
|
73
|
+
@ensure_cte
|
72
74
|
def update(
|
73
75
|
self,
|
74
76
|
set_: t.Dict[t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]],
|
@@ -119,7 +121,7 @@ class UpdateSupportMixin(_BaseTableMixins, t.Generic[DF]):
|
|
119
121
|
|
120
122
|
|
121
123
|
class DeleteSupportMixin(_BaseTableMixins, t.Generic[DF]):
|
122
|
-
@ensure_cte
|
124
|
+
@ensure_cte
|
123
125
|
def delete(
|
124
126
|
self,
|
125
127
|
where: t.Optional[t.Union[Column, str, bool]] = None,
|
@@ -141,7 +143,7 @@ class MergeSupportMixin(_BaseTable, t.Generic[DF]):
|
|
141
143
|
]
|
142
144
|
_merge_support_star: bool
|
143
145
|
|
144
|
-
@ensure_cte
|
146
|
+
@ensure_cte
|
145
147
|
def merge(
|
146
148
|
self,
|
147
149
|
other_df: DF,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.36.
|
3
|
+
Version: 3.36.2
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <26.
|
20
|
+
Requires-Dist: sqlglot <26.34,>=24.0.0
|
21
21
|
Requires-Dist: typing-extensions
|
22
22
|
Provides-Extra: bigquery
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -37,7 +37,7 @@ Requires-Dist: pyarrow <21,>=10 ; extra == 'dev'
|
|
37
37
|
Requires-Dist: pyspark <3.6,>=2 ; extra == 'dev'
|
38
38
|
Requires-Dist: pytest-forked ; extra == 'dev'
|
39
39
|
Requires-Dist: pytest-postgresql <8,>=6 ; extra == 'dev'
|
40
|
-
Requires-Dist: pytest-xdist <3.
|
40
|
+
Requires-Dist: pytest-xdist <3.9,>=3.6 ; extra == 'dev'
|
41
41
|
Requires-Dist: pytest <8.5,>=8.2.0 ; extra == 'dev'
|
42
42
|
Requires-Dist: ruff <0.13,>=0.4.4 ; extra == 'dev'
|
43
43
|
Requires-Dist: types-psycopg2 <3,>=2.9 ; extra == 'dev'
|
@@ -59,7 +59,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
|
|
59
59
|
Provides-Extra: redshift
|
60
60
|
Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
|
61
61
|
Provides-Extra: snowflake
|
62
|
-
Requires-Dist: snowflake-connector-python[secure-local-storage] <3.
|
62
|
+
Requires-Dist: snowflake-connector-python[secure-local-storage] <3.17,>=3.10.0 ; extra == 'snowflake'
|
63
63
|
Provides-Extra: spark
|
64
64
|
Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
|
65
65
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=8n9kfLZeiKlOVTLEVFfs0B2MLJQ8xc2SyKauUUuFT3s,513
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
@@ -9,8 +9,8 @@ sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85
|
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
13
|
-
sqlframe/base/group.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=n1MsfJt2WWUk7-YwbfByWG065g0W45AwJVIME5H-QJU,225875
|
13
|
+
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
@@ -25,7 +25,7 @@ sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
25
25
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
|
26
26
|
sqlframe/base/mixins/dataframe_mixins.py,sha256=8D2AFtfc0tj9Q5qzlNAXdXOYw9RuD8kpe8wixo8pY5o,1534
|
27
27
|
sqlframe/base/mixins/readwriter_mixins.py,sha256=ItQ_0jZ5RljgmLjGDIzLMRP_NQdy3wAyKwJ6K5NjaqA,4954
|
28
|
-
sqlframe/base/mixins/table_mixins.py,sha256=
|
28
|
+
sqlframe/base/mixins/table_mixins.py,sha256=3MhsOARkplwED1GRD0wq1vR8GNuop34kt3Jg8MATIjc,13791
|
29
29
|
sqlframe/bigquery/__init__.py,sha256=kbaomhYAANPdxeDQhajv8IHfMg_ENKivtYK-rPwaV08,939
|
30
30
|
sqlframe/bigquery/catalog.py,sha256=Dcpp1JKftc3ukdYpn6M1ujqixA-6_1k8aY21Y5Johyc,11899
|
31
31
|
sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.36.
|
134
|
-
sqlframe-3.36.
|
135
|
-
sqlframe-3.36.
|
136
|
-
sqlframe-3.36.
|
137
|
-
sqlframe-3.36.
|
133
|
+
sqlframe-3.36.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.36.2.dist-info/METADATA,sha256=G40goRUAdQg115DFuLq6-RYZ_6OSyBJ0zRsPHu7mhMQ,8987
|
135
|
+
sqlframe-3.36.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.36.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.36.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|