sqlframe 3.36.1__py3-none-any.whl → 3.36.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.36.1'
21
- __version_tuple__ = version_tuple = (3, 36, 1)
20
+ __version__ = version = '3.36.2'
21
+ __version_tuple__ = version_tuple = (3, 36, 2)
@@ -1796,7 +1796,9 @@ def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
1796
1796
  if session._is_bigquery:
1797
1797
  return substring_index_bgutil(str, delim, count)
1798
1798
 
1799
- return Column.invoke_anonymous_function(str, "SUBSTRING_INDEX", lit(delim), lit(count))
1799
+ return Column.invoke_expression_over_column(
1800
+ str, expression.SubstringIndex, delimiter=lit(delim), count=lit(count)
1801
+ )
1800
1802
 
1801
1803
 
1802
1804
  @meta(unsupported_engines="bigquery")
@@ -2205,7 +2207,9 @@ def slice(
2205
2207
 
2206
2208
  start_col = lit(start) if isinstance(start, int) else start
2207
2209
  length_col = lit(length) if isinstance(length, int) else length
2208
- return Column.invoke_anonymous_function(x, "SLICE", start_col, length_col)
2210
+ return Column.invoke_expression_over_column(
2211
+ x, expression.ArraySlice, start=start_col, end=length_col
2212
+ )
2209
2213
 
2210
2214
 
2211
2215
  @meta()
@@ -2748,7 +2752,7 @@ def typeof(col: ColumnOrName) -> Column:
2748
2752
  if session._is_snowflake:
2749
2753
  return typeof_from_variant(col)
2750
2754
 
2751
- return Column.invoke_anonymous_function(col, "TYPEOF")
2755
+ return Column.invoke_expression_over_column(col, expression.Typeof)
2752
2756
 
2753
2757
 
2754
2758
  @meta()
sqlframe/base/group.py CHANGED
@@ -70,6 +70,11 @@ class _BaseGroupedData(t.Generic[DF]):
70
70
 
71
71
  from sqlframe.base import functions as F
72
72
 
73
+ if self.session._is_snowflake and len(cols) > 1:
74
+ raise ValueError(
75
+ "Snowflake does not support multiple aggregation functions in a single group by operation."
76
+ )
77
+
73
78
  # Build the pivot expression
74
79
  # First, we need to convert the DataFrame to include the pivot logic
75
80
  df = self._df.copy()
@@ -132,11 +137,55 @@ class _BaseGroupedData(t.Generic[DF]):
132
137
  subquery.set("pivots", [pivot])
133
138
 
134
139
  # Create the final select from the pivoted subquery
135
- expression = exp.select("*").from_(subquery)
140
+ final_select_in_values = []
141
+ for col in in_values: # type: ignore
142
+ for agg_col in cols:
143
+ original_name = col.alias_or_name # type: ignore
144
+ if self.session._is_snowflake:
145
+ # Snowflake takes the provided values, like 'Java', and creates the column as "'Java'"
146
+ # Therefore the user to select the column would need to use "'Java'"
147
+ # This does not conform to the PySpark API, nor is it very user-friendly.
148
+ # Therefore, we select the column as expected, and tell SQLFrame it is case-sensitive, but then
149
+ # alias is to case-insensitive "Java" so that the user can select it without quotes.
150
+ # This has a downside that if a user really needed case-sensitive column names then it wouldn't work.
151
+ new_col = exp.to_column(
152
+ col.alias_or_name, # type: ignore
153
+ quoted=True,
154
+ dialect=self.session.execution_dialect,
155
+ )
156
+ new_col.this.set("this", f"'{new_col.this.this}'")
157
+ new_col = exp.alias_(new_col, original_name)
158
+ new_col.unalias()._meta = {"case_sensitive": True}
159
+ elif self.session._is_bigquery:
160
+ # BigQuery flips the alias order to <alias>_<value> instead of <value>_<alias>
161
+ new_col = exp.to_column(
162
+ f"{agg_col.alias_or_name}_{original_name}",
163
+ dialect=self.session.execution_dialect,
164
+ )
165
+ new_col = (
166
+ exp.alias_(new_col, original_name)
167
+ if len(cols) == 1
168
+ else exp.alias_(new_col, f"{original_name}_{agg_col.alias_or_name}")
169
+ )
170
+ elif self.session._is_duckdb:
171
+ # DuckDB always respects the alias if if num_cols == 1
172
+ new_col = exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
173
+ if len(cols) == 1:
174
+ new_col = exp.alias_(new_col, original_name)
175
+ else:
176
+ new_col = (
177
+ exp.column(original_name)
178
+ if len(cols) == 1
179
+ else exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
180
+ )
181
+ final_select_in_values.append(new_col)
182
+
183
+ expression = exp.select(
184
+ *[x.column_expression for x in self.group_by_cols] + final_select_in_values # type: ignore
185
+ ).from_(subquery)
136
186
 
137
187
  return self._df.copy(expression=expression)
138
188
 
139
- # Original non-pivot logic
140
189
  if not self.group_by_cols or not isinstance(self.group_by_cols[0], (list, tuple, set)):
141
190
  expression = self._df.expression.group_by(
142
191
  # User column_expression for group by to avoid alias in group by
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.36.1
3
+ Version: 3.36.2
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <26.32,>=24.0.0
20
+ Requires-Dist: sqlglot <26.34,>=24.0.0
21
21
  Requires-Dist: typing-extensions
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -37,7 +37,7 @@ Requires-Dist: pyarrow <21,>=10 ; extra == 'dev'
37
37
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'dev'
38
38
  Requires-Dist: pytest-forked ; extra == 'dev'
39
39
  Requires-Dist: pytest-postgresql <8,>=6 ; extra == 'dev'
40
- Requires-Dist: pytest-xdist <3.8,>=3.6 ; extra == 'dev'
40
+ Requires-Dist: pytest-xdist <3.9,>=3.6 ; extra == 'dev'
41
41
  Requires-Dist: pytest <8.5,>=8.2.0 ; extra == 'dev'
42
42
  Requires-Dist: ruff <0.13,>=0.4.4 ; extra == 'dev'
43
43
  Requires-Dist: types-psycopg2 <3,>=2.9 ; extra == 'dev'
@@ -59,7 +59,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
59
59
  Provides-Extra: redshift
60
60
  Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
61
61
  Provides-Extra: snowflake
62
- Requires-Dist: snowflake-connector-python[secure-local-storage] <3.16,>=3.10.0 ; extra == 'snowflake'
62
+ Requires-Dist: snowflake-connector-python[secure-local-storage] <3.17,>=3.10.0 ; extra == 'snowflake'
63
63
  Provides-Extra: spark
64
64
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
65
65
 
@@ -1,5 +1,5 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=aFVkO79_dcSzv2bBviVfMu6iWSclcSYamQO4TGPQNMo,513
2
+ sqlframe/_version.py,sha256=8n9kfLZeiKlOVTLEVFfs0B2MLJQ8xc2SyKauUUuFT3s,513
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
@@ -9,8 +9,8 @@ sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
12
- sqlframe/base/functions.py,sha256=qyV-4R4CPSkuS-0S3dPza0BZykoKAanxjQq83tu8L34,225778
13
- sqlframe/base/group.py,sha256=PGxUAnZkNlYKBIVNzoEDtoHbsP9Rhy1bGcSg2eYuWF4,9015
12
+ sqlframe/base/functions.py,sha256=n1MsfJt2WWUk7-YwbfByWG065g0W45AwJVIME5H-QJU,225875
13
+ sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
16
16
  sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.36.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.36.1.dist-info/METADATA,sha256=9Osg-ZJKc1nPTLTUkZ6Xz45RmGRqPoHA-0FYCLXv_7U,8987
135
- sqlframe-3.36.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.36.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.36.1.dist-info/RECORD,,
133
+ sqlframe-3.36.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.36.2.dist-info/METADATA,sha256=G40goRUAdQg115DFuLq6-RYZ_6OSyBJ0zRsPHu7mhMQ,8987
135
+ sqlframe-3.36.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.36.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.36.2.dist-info/RECORD,,