sqlframe 3.27.1__py3-none-any.whl → 3.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.27.1'
21
- __version_tuple__ = version_tuple = (3, 27, 1)
20
+ __version__ = version = '3.28.1'
21
+ __version_tuple__ = version_tuple = (3, 28, 1)
@@ -910,6 +910,8 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
910
910
 
911
911
  @operation(Operation.GROUP_BY)
912
912
  def groupBy(self, *cols, **kwargs) -> GROUP_DATA:
913
+ if cols and isinstance(cols[0], list):
914
+ cols = cols[0] # type: ignore
913
915
  columns = self._ensure_and_normalize_cols(cols)
914
916
  return self._group_data(self, columns, self.last_op)
915
917
 
@@ -1084,7 +1086,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1084
1086
  for left_col, right_col in join_column_pairs
1085
1087
  ]
1086
1088
  # To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list
1087
- select_column_names = [
1089
+ select_column_names: list[str | Column] = [
1088
1090
  (
1089
1091
  column.alias_or_name
1090
1092
  if not isinstance(column.expression.this, exp.Star)
@@ -1,19 +1,23 @@
1
- from __future__ import annotations
2
-
1
+ import functools
3
2
  import re
4
3
  import typing as t
5
4
 
6
5
  from sqlglot import exp
7
6
  from sqlglot.helper import ensure_list
7
+ from typing_extensions import ParamSpec
8
8
 
9
9
  from sqlframe.base.column import Column
10
10
 
11
- CALLING_CLASS = t.TypeVar("CALLING_CLASS")
11
+ P = ParamSpec("P")
12
+ T = t.TypeVar("T")
12
13
 
13
14
 
14
- def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None) -> t.Callable:
15
- def _metadata(func: t.Callable) -> t.Callable:
16
- def wrapper(*args, **kwargs):
15
+ def func_metadata(
16
+ unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None,
17
+ ) -> t.Callable[[t.Callable[P, T]], t.Callable[P, T]]:
18
+ def _metadata(func: t.Callable[P, T]) -> t.Callable[P, T]:
19
+ @functools.wraps(func)
20
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
17
21
  funcs_to_not_auto_alias = [
18
22
  "posexplode",
19
23
  "explode_outer",
@@ -34,14 +38,17 @@ def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = N
34
38
  and not isinstance(result.expression, exp.Alias)
35
39
  and func.__name__ not in funcs_to_not_auto_alias
36
40
  ):
37
- col_name = result.column_expression.find(exp.Identifier)
38
- if col_name:
39
- col_name = col_name.name
41
+ col_name = ""
42
+ col_name_exp: t.Optional[exp.Expression] = result.column_expression.find(
43
+ exp.Identifier
44
+ )
45
+ if col_name_exp:
46
+ col_name = col_name_exp.name
40
47
  else:
41
- col_name = result.column_expression.find(exp.Literal)
42
- if col_name:
43
- col_name = col_name.this
44
- alias_name = f"{func.__name__}__{col_name or ''}__"
48
+ col_name_exp = result.column_expression.find(exp.Literal)
49
+ if col_name_exp:
50
+ col_name = col_name_exp.this
51
+ alias_name = f"{func.__name__}__{col_name}__"
45
52
  # BigQuery has restrictions on alias names so we constrain it to alphanumeric characters and underscores
46
53
  return result.alias(re.sub(r"\W", "_", alias_name)) # type: ignore
47
54
  return result
@@ -3481,12 +3481,9 @@ def hll_union(
3481
3481
  +------------+
3482
3482
  """
3483
3483
  if allowDifferentLgConfigK is not None:
3484
- allowDifferentLgConfigK = (
3485
- lit(allowDifferentLgConfigK)
3486
- if isinstance(allowDifferentLgConfigK, bool)
3487
- else allowDifferentLgConfigK
3484
+ return Column.invoke_anonymous_function(
3485
+ col1, "hll_union", col2, lit(allowDifferentLgConfigK)
3488
3486
  )
3489
- return Column.invoke_anonymous_function(col1, "hll_union", col2, allowDifferentLgConfigK) # type: ignore
3490
3487
  else:
3491
3488
  return Column.invoke_anonymous_function(col1, "hll_union", col2)
3492
3489
 
sqlframe/base/group.py CHANGED
@@ -47,7 +47,10 @@ class _BaseGroupedData(t.Generic[DF]):
47
47
  from sqlframe.base.column import Column
48
48
 
49
49
  columns = (
50
- [Column(f"{agg_func}({column_name})") for column_name, agg_func in exprs[0].items()]
50
+ [
51
+ self._get_function_applied_columns(agg_func, (column_name,))[0]
52
+ for column_name, agg_func in exprs[0].items()
53
+ ]
51
54
  if isinstance(exprs[0], dict)
52
55
  else exprs
53
56
  )
@@ -55,7 +58,8 @@ class _BaseGroupedData(t.Generic[DF]):
55
58
 
56
59
  if not self.group_by_cols or not isinstance(self.group_by_cols[0], (list, tuple, set)):
57
60
  expression = self._df.expression.group_by(
58
- *[x.expression for x in self.group_by_cols] # type: ignore
61
+ # User column_expression for group by to avoid alias in group by
62
+ *[x.column_expression for x in self.group_by_cols] # type: ignore
59
63
  ).select(*[x.expression for x in self.group_by_cols + cols], append=False) # type: ignore
60
64
  group_by_cols = self.group_by_cols
61
65
  else:
@@ -66,7 +70,7 @@ class _BaseGroupedData(t.Generic[DF]):
66
70
  group_by_cols = []
67
71
  for grouping_set in self.group_by_cols:
68
72
  all_grouping_sets.append(
69
- exp.Tuple(expressions=[x.expression for x in grouping_set]) # type: ignore
73
+ exp.Tuple(expressions=[x.column_expression for x in grouping_set]) # type: ignore
70
74
  )
71
75
  group_by_cols.extend(grouping_set) # type: ignore
72
76
  group_by_cols = list(dict.fromkeys(group_by_cols))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.27.1
3
+ Version: 3.28.1
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <26.12,>=24.0.0
20
+ Requires-Dist: sqlglot <26.13,>=24.0.0
21
21
  Requires-Dist: typing-extensions
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -1,16 +1,16 @@
1
1
  sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=ZAp4ahp_nzqXiTlSnHrC3b9ZrxOfEiZs09wNO5x1QHo,513
2
+ sqlframe/_version.py,sha256=A-DozXRgJ1GLBl_eHEhQbTd1M98mV_YRq5bK5pd8Y5s,513
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
6
6
  sqlframe/base/catalog.py,sha256=ZuU_qmt4yjSoTYgecSGnOhitOdh3rJbGCUjnUBp5mlc,38564
7
7
  sqlframe/base/column.py,sha256=AG9Z_6RNhVxLhLU29kRCgzMgDNSm-_GFg96xLqk1-bs,19838
8
- sqlframe/base/dataframe.py,sha256=OgEUlDI5Y4rWrVngW5LttCUMC40WR-Pyr5af6aqbMNU,84028
9
- sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
8
+ sqlframe/base/dataframe.py,sha256=YaXf2rixLRz8CIqyH__XZNz6zhtW3cLwJcHMezUv_BU,84138
9
+ sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=Bs1bwl25fN3Yy9rb4GnUWBGunQ1C_yelkb2yV9DSZIY,53918
12
- sqlframe/base/functions.py,sha256=mazXkUuyvs_8YWD_ssesDXPO0TZ2sIjvUsaeUpIeNE0,224343
13
- sqlframe/base/group.py,sha256=4R9sOZm4ZRlTfShq2j3_HQOiL_Tj1bYkouenYsgnlII,4115
12
+ sqlframe/base/functions.py,sha256=Rc9GfD3tIDg73FPqBAgqIDlrxH-A1c-ZWO76bcViX3w,224169
13
+ sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
14
14
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
15
15
  sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
16
16
  sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.27.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.27.1.dist-info/METADATA,sha256=WxKSgFqAdUeUmo7Jtln6JLRZsz950oGjDzM7uyFJUig,8971
135
- sqlframe-3.27.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.27.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.27.1.dist-info/RECORD,,
133
+ sqlframe-3.28.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.28.1.dist-info/METADATA,sha256=Uj6Y7-cFVwC36CpmSaZLn3Ual76hfPkqr2jjF7TqFLg,8971
135
+ sqlframe-3.28.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.28.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.28.1.dist-info/RECORD,,