sqlframe 3.27.1__py3-none-any.whl → 3.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +3 -1
- sqlframe/base/decorators.py +20 -13
- sqlframe/base/functions.py +2 -5
- sqlframe/base/group.py +7 -3
- {sqlframe-3.27.1.dist-info → sqlframe-3.28.1.dist-info}/METADATA +2 -2
- {sqlframe-3.27.1.dist-info → sqlframe-3.28.1.dist-info}/RECORD +10 -10
- {sqlframe-3.27.1.dist-info → sqlframe-3.28.1.dist-info}/LICENSE +0 -0
- {sqlframe-3.27.1.dist-info → sqlframe-3.28.1.dist-info}/WHEEL +0 -0
- {sqlframe-3.27.1.dist-info → sqlframe-3.28.1.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -910,6 +910,8 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
910
910
|
|
911
911
|
@operation(Operation.GROUP_BY)
|
912
912
|
def groupBy(self, *cols, **kwargs) -> GROUP_DATA:
|
913
|
+
if cols and isinstance(cols[0], list):
|
914
|
+
cols = cols[0] # type: ignore
|
913
915
|
columns = self._ensure_and_normalize_cols(cols)
|
914
916
|
return self._group_data(self, columns, self.last_op)
|
915
917
|
|
@@ -1084,7 +1086,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1084
1086
|
for left_col, right_col in join_column_pairs
|
1085
1087
|
]
|
1086
1088
|
# To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list
|
1087
|
-
select_column_names = [
|
1089
|
+
select_column_names: list[str | Column] = [
|
1088
1090
|
(
|
1089
1091
|
column.alias_or_name
|
1090
1092
|
if not isinstance(column.expression.this, exp.Star)
|
sqlframe/base/decorators.py
CHANGED
@@ -1,19 +1,23 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
import functools
|
3
2
|
import re
|
4
3
|
import typing as t
|
5
4
|
|
6
5
|
from sqlglot import exp
|
7
6
|
from sqlglot.helper import ensure_list
|
7
|
+
from typing_extensions import ParamSpec
|
8
8
|
|
9
9
|
from sqlframe.base.column import Column
|
10
10
|
|
11
|
-
|
11
|
+
P = ParamSpec("P")
|
12
|
+
T = t.TypeVar("T")
|
12
13
|
|
13
14
|
|
14
|
-
def func_metadata(
|
15
|
-
|
16
|
-
|
15
|
+
def func_metadata(
|
16
|
+
unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = None,
|
17
|
+
) -> t.Callable[[t.Callable[P, T]], t.Callable[P, T]]:
|
18
|
+
def _metadata(func: t.Callable[P, T]) -> t.Callable[P, T]:
|
19
|
+
@functools.wraps(func)
|
20
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
|
17
21
|
funcs_to_not_auto_alias = [
|
18
22
|
"posexplode",
|
19
23
|
"explode_outer",
|
@@ -34,14 +38,17 @@ def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = N
|
|
34
38
|
and not isinstance(result.expression, exp.Alias)
|
35
39
|
and func.__name__ not in funcs_to_not_auto_alias
|
36
40
|
):
|
37
|
-
col_name =
|
38
|
-
|
39
|
-
|
41
|
+
col_name = ""
|
42
|
+
col_name_exp: t.Optional[exp.Expression] = result.column_expression.find(
|
43
|
+
exp.Identifier
|
44
|
+
)
|
45
|
+
if col_name_exp:
|
46
|
+
col_name = col_name_exp.name
|
40
47
|
else:
|
41
|
-
|
42
|
-
if
|
43
|
-
col_name =
|
44
|
-
alias_name = f"{func.__name__}__{col_name
|
48
|
+
col_name_exp = result.column_expression.find(exp.Literal)
|
49
|
+
if col_name_exp:
|
50
|
+
col_name = col_name_exp.this
|
51
|
+
alias_name = f"{func.__name__}__{col_name}__"
|
45
52
|
# BigQuery has restrictions on alias names so we constrain it to alphanumeric characters and underscores
|
46
53
|
return result.alias(re.sub(r"\W", "_", alias_name)) # type: ignore
|
47
54
|
return result
|
sqlframe/base/functions.py
CHANGED
@@ -3481,12 +3481,9 @@ def hll_union(
|
|
3481
3481
|
+------------+
|
3482
3482
|
"""
|
3483
3483
|
if allowDifferentLgConfigK is not None:
|
3484
|
-
|
3485
|
-
lit(allowDifferentLgConfigK)
|
3486
|
-
if isinstance(allowDifferentLgConfigK, bool)
|
3487
|
-
else allowDifferentLgConfigK
|
3484
|
+
return Column.invoke_anonymous_function(
|
3485
|
+
col1, "hll_union", col2, lit(allowDifferentLgConfigK)
|
3488
3486
|
)
|
3489
|
-
return Column.invoke_anonymous_function(col1, "hll_union", col2, allowDifferentLgConfigK) # type: ignore
|
3490
3487
|
else:
|
3491
3488
|
return Column.invoke_anonymous_function(col1, "hll_union", col2)
|
3492
3489
|
|
sqlframe/base/group.py
CHANGED
@@ -47,7 +47,10 @@ class _BaseGroupedData(t.Generic[DF]):
|
|
47
47
|
from sqlframe.base.column import Column
|
48
48
|
|
49
49
|
columns = (
|
50
|
-
[
|
50
|
+
[
|
51
|
+
self._get_function_applied_columns(agg_func, (column_name,))[0]
|
52
|
+
for column_name, agg_func in exprs[0].items()
|
53
|
+
]
|
51
54
|
if isinstance(exprs[0], dict)
|
52
55
|
else exprs
|
53
56
|
)
|
@@ -55,7 +58,8 @@ class _BaseGroupedData(t.Generic[DF]):
|
|
55
58
|
|
56
59
|
if not self.group_by_cols or not isinstance(self.group_by_cols[0], (list, tuple, set)):
|
57
60
|
expression = self._df.expression.group_by(
|
58
|
-
|
61
|
+
# User column_expression for group by to avoid alias in group by
|
62
|
+
*[x.column_expression for x in self.group_by_cols] # type: ignore
|
59
63
|
).select(*[x.expression for x in self.group_by_cols + cols], append=False) # type: ignore
|
60
64
|
group_by_cols = self.group_by_cols
|
61
65
|
else:
|
@@ -66,7 +70,7 @@ class _BaseGroupedData(t.Generic[DF]):
|
|
66
70
|
group_by_cols = []
|
67
71
|
for grouping_set in self.group_by_cols:
|
68
72
|
all_grouping_sets.append(
|
69
|
-
exp.Tuple(expressions=[x.
|
73
|
+
exp.Tuple(expressions=[x.column_expression for x in grouping_set]) # type: ignore
|
70
74
|
)
|
71
75
|
group_by_cols.extend(grouping_set) # type: ignore
|
72
76
|
group_by_cols = list(dict.fromkeys(group_by_cols))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.28.1
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
|
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: prettytable <4
|
20
|
-
Requires-Dist: sqlglot <26.
|
20
|
+
Requires-Dist: sqlglot <26.13,>=24.0.0
|
21
21
|
Requires-Dist: typing-extensions
|
22
22
|
Provides-Extra: bigquery
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -1,16 +1,16 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=A-DozXRgJ1GLBl_eHEhQbTd1M98mV_YRq5bK5pd8Y5s,513
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=ZuU_qmt4yjSoTYgecSGnOhitOdh3rJbGCUjnUBp5mlc,38564
|
7
7
|
sqlframe/base/column.py,sha256=AG9Z_6RNhVxLhLU29kRCgzMgDNSm-_GFg96xLqk1-bs,19838
|
8
|
-
sqlframe/base/dataframe.py,sha256=
|
9
|
-
sqlframe/base/decorators.py,sha256=
|
8
|
+
sqlframe/base/dataframe.py,sha256=YaXf2rixLRz8CIqyH__XZNz6zhtW3cLwJcHMezUv_BU,84138
|
9
|
+
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=Bs1bwl25fN3Yy9rb4GnUWBGunQ1C_yelkb2yV9DSZIY,53918
|
12
|
-
sqlframe/base/functions.py,sha256=
|
13
|
-
sqlframe/base/group.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=Rc9GfD3tIDg73FPqBAgqIDlrxH-A1c-ZWO76bcViX3w,224169
|
13
|
+
sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
|
14
14
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
137
|
-
sqlframe-3.
|
133
|
+
sqlframe-3.28.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.28.1.dist-info/METADATA,sha256=Uj6Y7-cFVwC36CpmSaZLn3Ual76hfPkqr2jjF7TqFLg,8971
|
135
|
+
sqlframe-3.28.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.28.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.28.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|