sqlframe 3.36.0__py3-none-any.whl → 3.36.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.36.0'
21
- __version_tuple__ = version_tuple = (3, 36, 0)
20
+ __version__ = version = '3.36.2'
21
+ __version_tuple__ = version_tuple = (3, 36, 2)
@@ -1796,7 +1796,9 @@ def substring_index(str: ColumnOrName, delim: str, count: int) -> Column:
1796
1796
  if session._is_bigquery:
1797
1797
  return substring_index_bgutil(str, delim, count)
1798
1798
 
1799
- return Column.invoke_anonymous_function(str, "SUBSTRING_INDEX", lit(delim), lit(count))
1799
+ return Column.invoke_expression_over_column(
1800
+ str, expression.SubstringIndex, delimiter=lit(delim), count=lit(count)
1801
+ )
1800
1802
 
1801
1803
 
1802
1804
  @meta(unsupported_engines="bigquery")
@@ -2205,7 +2207,9 @@ def slice(
2205
2207
 
2206
2208
  start_col = lit(start) if isinstance(start, int) else start
2207
2209
  length_col = lit(length) if isinstance(length, int) else length
2208
- return Column.invoke_anonymous_function(x, "SLICE", start_col, length_col)
2210
+ return Column.invoke_expression_over_column(
2211
+ x, expression.ArraySlice, start=start_col, end=length_col
2212
+ )
2209
2213
 
2210
2214
 
2211
2215
  @meta()
@@ -2748,7 +2752,7 @@ def typeof(col: ColumnOrName) -> Column:
2748
2752
  if session._is_snowflake:
2749
2753
  return typeof_from_variant(col)
2750
2754
 
2751
- return Column.invoke_anonymous_function(col, "TYPEOF")
2755
+ return Column.invoke_expression_over_column(col, expression.Typeof)
2752
2756
 
2753
2757
 
2754
2758
  @meta()
sqlframe/base/group.py CHANGED
@@ -70,6 +70,11 @@ class _BaseGroupedData(t.Generic[DF]):
70
70
 
71
71
  from sqlframe.base import functions as F
72
72
 
73
+ if self.session._is_snowflake and len(cols) > 1:
74
+ raise ValueError(
75
+ "Snowflake does not support multiple aggregation functions in a single group by operation."
76
+ )
77
+
73
78
  # Build the pivot expression
74
79
  # First, we need to convert the DataFrame to include the pivot logic
75
80
  df = self._df.copy()
@@ -132,11 +137,55 @@ class _BaseGroupedData(t.Generic[DF]):
132
137
  subquery.set("pivots", [pivot])
133
138
 
134
139
  # Create the final select from the pivoted subquery
135
- expression = exp.select("*").from_(subquery)
140
+ final_select_in_values = []
141
+ for col in in_values: # type: ignore
142
+ for agg_col in cols:
143
+ original_name = col.alias_or_name # type: ignore
144
+ if self.session._is_snowflake:
145
+ # Snowflake takes the provided values, like 'Java', and creates the column as "'Java'"
146
+ # Therefore the user to select the column would need to use "'Java'"
147
+ # This does not conform to the PySpark API, nor is it very user-friendly.
148
+ # Therefore, we select the column as expected, and tell SQLFrame it is case-sensitive, but then
149
+ # alias is to case-insensitive "Java" so that the user can select it without quotes.
150
+ # This has a downside that if a user really needed case-sensitive column names then it wouldn't work.
151
+ new_col = exp.to_column(
152
+ col.alias_or_name, # type: ignore
153
+ quoted=True,
154
+ dialect=self.session.execution_dialect,
155
+ )
156
+ new_col.this.set("this", f"'{new_col.this.this}'")
157
+ new_col = exp.alias_(new_col, original_name)
158
+ new_col.unalias()._meta = {"case_sensitive": True}
159
+ elif self.session._is_bigquery:
160
+ # BigQuery flips the alias order to <alias>_<value> instead of <value>_<alias>
161
+ new_col = exp.to_column(
162
+ f"{agg_col.alias_or_name}_{original_name}",
163
+ dialect=self.session.execution_dialect,
164
+ )
165
+ new_col = (
166
+ exp.alias_(new_col, original_name)
167
+ if len(cols) == 1
168
+ else exp.alias_(new_col, f"{original_name}_{agg_col.alias_or_name}")
169
+ )
170
+ elif self.session._is_duckdb:
171
+ # DuckDB always respects the alias if if num_cols == 1
172
+ new_col = exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
173
+ if len(cols) == 1:
174
+ new_col = exp.alias_(new_col, original_name)
175
+ else:
176
+ new_col = (
177
+ exp.column(original_name)
178
+ if len(cols) == 1
179
+ else exp.column(f"{original_name}_{agg_col.expression.alias_or_name}")
180
+ )
181
+ final_select_in_values.append(new_col)
182
+
183
+ expression = exp.select(
184
+ *[x.column_expression for x in self.group_by_cols] + final_select_in_values # type: ignore
185
+ ).from_(subquery)
136
186
 
137
187
  return self._df.copy(expression=expression)
138
188
 
139
- # Original non-pivot logic
140
189
  if not self.group_by_cols or not isinstance(self.group_by_cols[0], (list, tuple, set)):
141
190
  expression = self._df.expression.group_by(
142
191
  # User column_expression for group by to avoid alias in group by
@@ -3,6 +3,7 @@ import logging
3
3
  import typing as t
4
4
 
5
5
  from sqlglot import exp
6
+ from typing_extensions import ParamSpec
6
7
 
7
8
  try:
8
9
  from sqlglot.expressions import Whens
@@ -28,21 +29,22 @@ if t.TYPE_CHECKING:
28
29
  logger = logging.getLogger(__name__)
29
30
 
30
31
 
31
- def ensure_cte() -> t.Callable[[t.Callable], t.Callable]:
32
- def decorator(func: t.Callable) -> t.Callable:
33
- @functools.wraps(func)
34
- def wrapper(self: _BaseTable, *args, **kwargs) -> t.Any:
35
- if len(self.expression.ctes) > 0:
36
- return func(self, *args, **kwargs) # type: ignore
37
- self_class = self.__class__
38
- self = self._convert_leaf_to_cte()
39
- self = self_class(**object_to_dict(self))
40
- return func(self, *args, **kwargs) # type: ignore
32
+ P = ParamSpec("P")
33
+ T = t.TypeVar("T")
34
+
41
35
 
42
- wrapper.__wrapped__ = func # type: ignore
43
- return wrapper
36
+ def ensure_cte(func: t.Callable[P, T]) -> t.Callable[P, T]:
37
+ @functools.wraps(func)
38
+ def wrapper(self: _BaseTable, *args: P.args, **kwargs: P.kwargs) -> T:
39
+ if len(self.expression.ctes) > 0:
40
+ return func(self, *args, **kwargs) # type: ignore
41
+ self_class = self.__class__
42
+ self = self._convert_leaf_to_cte()
43
+ self = self_class(**object_to_dict(self))
44
+ return func(self, *args, **kwargs) # type: ignore
44
45
 
45
- return decorator
46
+ wrapper.__wrapped__ = func # type: ignore
47
+ return wrapper # type: ignore[return-value]
46
48
 
47
49
 
48
50
  class _BaseTableMixins(_BaseTable, t.Generic[DF]):
@@ -68,7 +70,7 @@ class _BaseTableMixins(_BaseTable, t.Generic[DF]):
68
70
 
69
71
 
70
72
  class UpdateSupportMixin(_BaseTableMixins, t.Generic[DF]):
71
- @ensure_cte()
73
+ @ensure_cte
72
74
  def update(
73
75
  self,
74
76
  set_: t.Dict[t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]],
@@ -119,7 +121,7 @@ class UpdateSupportMixin(_BaseTableMixins, t.Generic[DF]):
119
121
 
120
122
 
121
123
  class DeleteSupportMixin(_BaseTableMixins, t.Generic[DF]):
122
- @ensure_cte()
124
+ @ensure_cte
123
125
  def delete(
124
126
  self,
125
127
  where: t.Optional[t.Union[Column, str, bool]] = None,
@@ -141,7 +143,7 @@ class MergeSupportMixin(_BaseTable, t.Generic[DF]):
141
143
  ]
142
144
  _merge_support_star: bool
143
145
 
144
- @ensure_cte()
146
+ @ensure_cte
145
147
  def merge(
146
148
  self,
147
149
  other_df: DF,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.36.0
3
+ Version: 3.36.2
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <26.32,>=24.0.0
20
+ Requires-Dist: sqlglot <26.34,>=24.0.0
21
21
  Requires-Dist: typing-extensions
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -37,7 +37,7 @@ Requires-Dist: pyarrow <21,>=10 ; extra == 'dev'
37
37
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'dev'
38
38
  Requires-Dist: pytest-forked ; extra == 'dev'
39
39
  Requires-Dist: pytest-postgresql <8,>=6 ; extra == 'dev'
40
- Requires-Dist: pytest-xdist <3.8,>=3.6 ; extra == 'dev'
40
+ Requires-Dist: pytest-xdist <3.9,>=3.6 ; extra == 'dev'
41
41
  Requires-Dist: pytest <8.5,>=8.2.0 ; extra == 'dev'
42
42
  Requires-Dist: ruff <0.13,>=0.4.4 ; extra == 'dev'
43
43
  Requires-Dist: types-psycopg2 <3,>=2.9 ; extra == 'dev'
@@ -59,7 +59,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
59
59
  Provides-Extra: redshift
60
60
  Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
61
61
  Provides-Extra: snowflake
62
- Requires-Dist: snowflake-connector-python[secure-local-storage] <3.16,>=3.10.0 ; extra == 'snowflake'
62
+ Requires-Dist: snowflake-connector-python[secure-local-storage] <3.17,>=3.10.0 ; extra == 'snowflake'
63
63
  Provides-Extra: spark
64
64
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
65
65
 
@@ -1,5 +1,5 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=bkUPQ6OdlXKrD5knIV3EChl0OWjLm_VJDu9m0db4vwg,513
2
+ sqlframe/_version.py,sha256=8n9kfLZeiKlOVTLEVFfs0B2MLJQ8xc2SyKauUUuFT3s,513
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
@@ -9,8 +9,8 @@ sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
12
- sqlframe/base/functions.py,sha256=qyV-4R4CPSkuS-0S3dPza0BZykoKAanxjQq83tu8L34,225778
13
- sqlframe/base/group.py,sha256=PGxUAnZkNlYKBIVNzoEDtoHbsP9Rhy1bGcSg2eYuWF4,9015
12
+ sqlframe/base/functions.py,sha256=n1MsfJt2WWUk7-YwbfByWG065g0W45AwJVIME5H-QJU,225875
13
+ sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
16
16
  sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
@@ -25,7 +25,7 @@ sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
25
25
  sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
26
26
  sqlframe/base/mixins/dataframe_mixins.py,sha256=8D2AFtfc0tj9Q5qzlNAXdXOYw9RuD8kpe8wixo8pY5o,1534
27
27
  sqlframe/base/mixins/readwriter_mixins.py,sha256=ItQ_0jZ5RljgmLjGDIzLMRP_NQdy3wAyKwJ6K5NjaqA,4954
28
- sqlframe/base/mixins/table_mixins.py,sha256=2TnGFpbDSGw_NswpZwLACqvdD4zCA7hXekQ9IEkoTOk,13784
28
+ sqlframe/base/mixins/table_mixins.py,sha256=3MhsOARkplwED1GRD0wq1vR8GNuop34kt3Jg8MATIjc,13791
29
29
  sqlframe/bigquery/__init__.py,sha256=kbaomhYAANPdxeDQhajv8IHfMg_ENKivtYK-rPwaV08,939
30
30
  sqlframe/bigquery/catalog.py,sha256=Dcpp1JKftc3ukdYpn6M1ujqixA-6_1k8aY21Y5Johyc,11899
31
31
  sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.36.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.36.0.dist-info/METADATA,sha256=F56M3UKMA8CZN2Ps3dAkputINvX8rhBcPKTiAuC5iEs,8987
135
- sqlframe-3.36.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.36.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.36.0.dist-info/RECORD,,
133
+ sqlframe-3.36.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.36.2.dist-info/METADATA,sha256=G40goRUAdQg115DFuLq6-RYZ_6OSyBJ0zRsPHu7mhMQ,8987
135
+ sqlframe-3.36.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.36.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.36.2.dist-info/RECORD,,