sqlframe 3.36.1__py3-none-any.whl → 3.36.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/functions.py +33 -3
- sqlframe/base/group.py +51 -2
- {sqlframe-3.36.1.dist-info → sqlframe-3.36.3.dist-info}/METADATA +5 -4
- {sqlframe-3.36.1.dist-info → sqlframe-3.36.3.dist-info}/RECORD +8 -8
- {sqlframe-3.36.1.dist-info → sqlframe-3.36.3.dist-info}/LICENSE +0 -0
- {sqlframe-3.36.1.dist-info → sqlframe-3.36.3.dist-info}/WHEEL +0 -0
- {sqlframe-3.36.1.dist-info → sqlframe-3.36.3.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/functions.py
CHANGED
@@ -1475,6 +1475,32 @@ def timestamp_seconds(col: ColumnOrName) -> Column:
|
|
1475
1475
|
return Column.invoke_expression_over_column(col, expression.UnixToTime)
|
1476
1476
|
|
1477
1477
|
|
1478
|
+
@meta()
|
1479
|
+
def timestamp_add(unit: str, quantity: ColumnOrName, ts: ColumnOrName) -> Column:
|
1480
|
+
session = _get_session()
|
1481
|
+
|
1482
|
+
if session._is_duckdb or session._is_postgres:
|
1483
|
+
quantity = lit(quantity) if isinstance(quantity, int) else quantity
|
1484
|
+
if (
|
1485
|
+
isinstance(quantity, Column)
|
1486
|
+
and isinstance(quantity.expression, expression.Literal)
|
1487
|
+
and quantity.expression.is_number
|
1488
|
+
and int(quantity.expression.this) < 0
|
1489
|
+
):
|
1490
|
+
# If quantity is a negative literal, we use DateSub
|
1491
|
+
expr = expression.DateSub
|
1492
|
+
quantity.expression.set("this", str(-int(quantity.expression.this)))
|
1493
|
+
else:
|
1494
|
+
expr = expression.DateAdd # type: ignore
|
1495
|
+
return Column.invoke_expression_over_column(
|
1496
|
+
ts, expr, expression=quantity, unit=expression.Var(this=unit.upper())
|
1497
|
+
)
|
1498
|
+
|
1499
|
+
return Column.invoke_expression_over_column(
|
1500
|
+
ts, expression.TimestampAdd, expression=quantity, unit=expression.Var(this=unit.upper())
|
1501
|
+
)
|
1502
|
+
|
1503
|
+
|
1478
1504
|
@meta(unsupported_engines=["*", "spark"])
|
1479
1505
|
def window(
|
1480
1506
|
timeColumn: ColumnOrName,
|
@@ -1796,7 +1822,9 @@ def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
|
|
1796
1822
|
if session._is_bigquery:
|
1797
1823
|
return substring_index_bgutil(str, delim, count)
|
1798
1824
|
|
1799
|
-
return Column.
|
1825
|
+
return Column.invoke_expression_over_column(
|
1826
|
+
str, expression.SubstringIndex, delimiter=lit(delim), count=lit(count)
|
1827
|
+
)
|
1800
1828
|
|
1801
1829
|
|
1802
1830
|
@meta(unsupported_engines="bigquery")
|
@@ -2205,7 +2233,9 @@ def slice(
|
|
2205
2233
|
|
2206
2234
|
start_col = lit(start) if isinstance(start, int) else start
|
2207
2235
|
length_col = lit(length) if isinstance(length, int) else length
|
2208
|
-
return Column.
|
2236
|
+
return Column.invoke_expression_over_column(
|
2237
|
+
x, expression.ArraySlice, start=start_col, end=length_col
|
2238
|
+
)
|
2209
2239
|
|
2210
2240
|
|
2211
2241
|
@meta()
|
@@ -2748,7 +2778,7 @@ def typeof(col: ColumnOrName) -> Column:
|
|
2748
2778
|
if session._is_snowflake:
|
2749
2779
|
return typeof_from_variant(col)
|
2750
2780
|
|
2751
|
-
return Column.
|
2781
|
+
return Column.invoke_expression_over_column(col, expression.Typeof)
|
2752
2782
|
|
2753
2783
|
|
2754
2784
|
@meta()
|
sqlframe/base/group.py
CHANGED
@@ -70,6 +70,11 @@ class _BaseGroupedData(t.Generic[DF]):
|
|
70
70
|
|
71
71
|
from sqlframe.base import functions as F
|
72
72
|
|
73
|
+
if self.session._is_snowflake and len(cols) > 1:
|
74
|
+
raise ValueError(
|
75
|
+
"Snowflake does not support multiple aggregation functions in a single group by operation."
|
76
|
+
)
|
77
|
+
|
73
78
|
# Build the pivot expression
|
74
79
|
# First, we need to convert the DataFrame to include the pivot logic
|
75
80
|
df = self._df.copy()
|
@@ -132,11 +137,55 @@ class _BaseGroupedData(t.Generic[DF]):
|
|
132
137
|
subquery.set("pivots", [pivot])
|
133
138
|
|
134
139
|
# Create the final select from the pivoted subquery
|
135
|
-
|
140
|
+
final_select_in_values = []
|
141
|
+
for col in in_values: # type: ignore
|
142
|
+
for agg_col in cols:
|
143
|
+
original_name = col.alias_or_name # type: ignore
|
144
|
+
if self.session._is_snowflake:
|
145
|
+
# Snowflake takes the provided values, like 'Java', and creates the column as "'Java'"
|
146
|
+
# Therefore the user to select the column would need to use "'Java'"
|
147
|
+
# This does not conform to the PySpark API, nor is it very user-friendly.
|
148
|
+
# Therefore, we select the column as expected, and tell SQLFrame it is case-sensitive, but then
|
149
|
+
# alias is to case-insensitive "Java" so that the user can select it without quotes.
|
150
|
+
# This has a downside that if a user really needed case-sensitive column names then it wouldn't work.
|
151
|
+
new_col = exp.to_column(
|
152
|
+
col.alias_or_name, # type: ignore
|
153
|
+
quoted=True,
|
154
|
+
dialect=self.session.execution_dialect,
|
155
|
+
)
|
156
|
+
new_col.this.set("this", f"'{new_col.this.this}'")
|
157
|
+
new_col = exp.alias_(new_col, original_name)
|
158
|
+
new_col.unalias()._meta = {"case_sensitive": True}
|
159
|
+
elif self.session._is_bigquery:
|
160
|
+
# BigQuery flips the alias order to <alias>_<value> instead of <value>_<alias>
|
161
|
+
new_col = exp.to_column(
|
162
|
+
f"{agg_col.alias_or_name}_{original_name}",
|
163
|
+
dialect=self.session.execution_dialect,
|
164
|
+
)
|
165
|
+
new_col = (
|
166
|
+
exp.alias_(new_col, original_name)
|
167
|
+
if len(cols) == 1
|
168
|
+
else exp.alias_(new_col, f"{original_name}_{agg_col.alias_or_name}")
|
169
|
+
)
|
170
|
+
elif self.session._is_duckdb:
|
171
|
+
# DuckDB always respects the alias if if num_cols == 1
|
172
|
+
new_col = exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
|
173
|
+
if len(cols) == 1:
|
174
|
+
new_col = exp.alias_(new_col, original_name)
|
175
|
+
else:
|
176
|
+
new_col = (
|
177
|
+
exp.column(original_name)
|
178
|
+
if len(cols) == 1
|
179
|
+
else exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
|
180
|
+
)
|
181
|
+
final_select_in_values.append(new_col)
|
182
|
+
|
183
|
+
expression = exp.select(
|
184
|
+
*[x.column_expression for x in self.group_by_cols] + final_select_in_values # type: ignore
|
185
|
+
).from_(subquery)
|
136
186
|
|
137
187
|
return self._df.copy(expression=expression)
|
138
188
|
|
139
|
-
# Original non-pivot logic
|
140
189
|
if not self.group_by_cols or not isinstance(self.group_by_cols[0], (list, tuple, set)):
|
141
190
|
expression = self._df.expression.group_by(
|
142
191
|
# User column_expression for group by to avoid alias in group by
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.36.
|
3
|
+
Version: 3.36.3
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <26.
|
20
|
+
Requires-Dist: sqlglot <26.34,>=24.0.0
|
21
21
|
Requires-Dist: typing-extensions
|
22
22
|
Provides-Extra: bigquery
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -37,7 +37,8 @@ Requires-Dist: pyarrow <21,>=10 ; extra == 'dev'
|
|
37
37
|
Requires-Dist: pyspark <3.6,>=2 ; extra == 'dev'
|
38
38
|
Requires-Dist: pytest-forked ; extra == 'dev'
|
39
39
|
Requires-Dist: pytest-postgresql <8,>=6 ; extra == 'dev'
|
40
|
-
Requires-Dist: pytest-
|
40
|
+
Requires-Dist: pytest-rerunfailures ; extra == 'dev'
|
41
|
+
Requires-Dist: pytest-xdist <3.9,>=3.6 ; extra == 'dev'
|
41
42
|
Requires-Dist: pytest <8.5,>=8.2.0 ; extra == 'dev'
|
42
43
|
Requires-Dist: ruff <0.13,>=0.4.4 ; extra == 'dev'
|
43
44
|
Requires-Dist: types-psycopg2 <3,>=2.9 ; extra == 'dev'
|
@@ -59,7 +60,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
|
|
59
60
|
Provides-Extra: redshift
|
60
61
|
Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
|
61
62
|
Provides-Extra: snowflake
|
62
|
-
Requires-Dist: snowflake-connector-python[secure-local-storage] <3.
|
63
|
+
Requires-Dist: snowflake-connector-python[secure-local-storage] <3.17,>=3.10.0 ; extra == 'snowflake'
|
63
64
|
Provides-Extra: spark
|
64
65
|
Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
|
65
66
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=k0__IYrsLFXLDTAPbWVSipvAv_-Gzp6mw1szlQWJb_o,513
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
@@ -9,8 +9,8 @@ sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85
|
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
13
|
-
sqlframe/base/group.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=jcZZZkylIy6jktXkyqVgSpJgUSgA0g8qjO0SGDwQD30,226902
|
13
|
+
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.36.
|
134
|
-
sqlframe-3.36.
|
135
|
-
sqlframe-3.36.
|
136
|
-
sqlframe-3.36.
|
137
|
-
sqlframe-3.36.
|
133
|
+
sqlframe-3.36.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.36.3.dist-info/METADATA,sha256=1G_ICerBb0qaFNlT8OdNQrmHtkI_gIbW8xOQyPlrAO0,9040
|
135
|
+
sqlframe-3.36.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.36.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.36.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|