sqlframe 3.21.0__py3-none-any.whl → 3.21.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +23 -14
- sqlframe/base/function_alternatives.py +0 -4
- sqlframe/base/functions.py +22 -4
- sqlframe/base/util.py +1 -5
- {sqlframe-3.21.0.dist-info → sqlframe-3.21.1.dist-info}/METADATA +1 -1
- {sqlframe-3.21.0.dist-info → sqlframe-3.21.1.dist-info}/RECORD +10 -10
- {sqlframe-3.21.0.dist-info → sqlframe-3.21.1.dist-info}/LICENSE +0 -0
- {sqlframe-3.21.0.dist-info → sqlframe-3.21.1.dist-info}/WHEEL +0 -0
- {sqlframe-3.21.0.dist-info → sqlframe-3.21.1.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -296,6 +296,12 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
296
296
|
|
297
297
|
@property
|
298
298
|
def columns(self) -> t.List[str]:
|
299
|
+
expression_display_names = self.expression.copy()
|
300
|
+
self._set_display_names(expression_display_names)
|
301
|
+
return expression_display_names.named_selects
|
302
|
+
|
303
|
+
@property
|
304
|
+
def _columns(self) -> t.List[str]:
|
299
305
|
return self.expression.named_selects
|
300
306
|
|
301
307
|
@property
|
@@ -611,6 +617,18 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
611
617
|
}
|
612
618
|
self.display_name_mapping.update(zipped)
|
613
619
|
|
620
|
+
def _set_display_names(self, select_expression: exp.Select) -> None:
|
621
|
+
for index, column in enumerate(select_expression.expressions):
|
622
|
+
column_name = quote_preserving_alias_or_name(column)
|
623
|
+
if column_name in self.display_name_mapping:
|
624
|
+
display_name_identifier = exp.to_identifier(
|
625
|
+
self.display_name_mapping[column_name], quoted=True
|
626
|
+
)
|
627
|
+
display_name_identifier._meta = {"case_sensitive": True, **(column._meta or {})}
|
628
|
+
select_expression.expressions[index] = exp.alias_(
|
629
|
+
column.unalias(), display_name_identifier, quoted=True
|
630
|
+
)
|
631
|
+
|
614
632
|
def _get_expressions(
|
615
633
|
self,
|
616
634
|
optimize: bool = True,
|
@@ -631,16 +649,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
631
649
|
select_expression = select_expression.transform(
|
632
650
|
replace_id_value, replacement_mapping
|
633
651
|
).assert_is(exp.Select)
|
634
|
-
|
635
|
-
column_name = quote_preserving_alias_or_name(column)
|
636
|
-
if column_name in self.display_name_mapping:
|
637
|
-
display_name_identifier = exp.to_identifier(
|
638
|
-
self.display_name_mapping[column_name], quoted=True
|
639
|
-
)
|
640
|
-
display_name_identifier._meta = {"case_sensitive": True, **(column._meta or {})}
|
641
|
-
select_expression.expressions[index] = exp.alias_(
|
642
|
-
column.unalias(), display_name_identifier, quoted=True
|
643
|
-
)
|
652
|
+
self._set_display_names(select_expression)
|
644
653
|
if optimize:
|
645
654
|
select_expression = t.cast(
|
646
655
|
exp.Select,
|
@@ -1158,8 +1167,8 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1158
1167
|
|
1159
1168
|
@operation(Operation.FROM)
|
1160
1169
|
def unionByName(self, other: Self, allowMissingColumns: bool = False) -> Self:
|
1161
|
-
l_columns = self.
|
1162
|
-
r_columns = other.
|
1170
|
+
l_columns = self._columns
|
1171
|
+
r_columns = other._columns
|
1163
1172
|
if not allowMissingColumns:
|
1164
1173
|
l_expressions = l_columns
|
1165
1174
|
r_expressions = l_columns
|
@@ -1619,9 +1628,9 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1619
1628
|
| 16| Bob|
|
1620
1629
|
+---+-----+
|
1621
1630
|
"""
|
1622
|
-
if len(cols) != len(self.
|
1631
|
+
if len(cols) != len(self._columns):
|
1623
1632
|
raise ValueError(
|
1624
|
-
f"Number of column names does not match number of columns: {len(cols)} != {len(self.
|
1633
|
+
f"Number of column names does not match number of columns: {len(cols)} != {len(self._columns)}"
|
1625
1634
|
)
|
1626
1635
|
expression = self.expression.copy()
|
1627
1636
|
expression = expression.select(
|
@@ -193,10 +193,6 @@ def factorial_ensure_int(col: ColumnOrName) -> Column:
|
|
193
193
|
return Column.invoke_anonymous_function(col_func(col).cast("integer"), "FACTORIAL")
|
194
194
|
|
195
195
|
|
196
|
-
def skewness_from_skew(col: ColumnOrName) -> Column:
|
197
|
-
return Column.invoke_anonymous_function(col, "SKEW")
|
198
|
-
|
199
|
-
|
200
196
|
def isnan_using_equal(col: ColumnOrName) -> Column:
|
201
197
|
lit = get_func_from_session("lit")
|
202
198
|
return Column(
|
sqlframe/base/functions.py
CHANGED
@@ -486,14 +486,32 @@ def var_pop(col: ColumnOrName) -> Column:
|
|
486
486
|
|
487
487
|
@meta(unsupported_engines=["bigquery", "postgres"])
|
488
488
|
def skewness(col: ColumnOrName) -> Column:
|
489
|
-
from sqlframe.base.function_alternatives import skewness_from_skew
|
490
|
-
|
491
489
|
session = _get_session()
|
492
490
|
|
491
|
+
func_name = "SKEWNESS"
|
492
|
+
|
493
493
|
if session._is_snowflake:
|
494
|
-
|
494
|
+
func_name = "SKEW"
|
495
|
+
|
496
|
+
if session._is_duckdb or session._is_snowflake:
|
497
|
+
when_func = get_func_from_session("when")
|
498
|
+
count_func = get_func_from_session("count")
|
499
|
+
count_star = count_func("*")
|
500
|
+
lit_func = get_func_from_session("lit")
|
501
|
+
sqrt_func = get_func_from_session("sqrt")
|
502
|
+
col = Column.ensure_col(col)
|
503
|
+
return (
|
504
|
+
when_func(count_star == lit_func(0), lit_func(None))
|
505
|
+
.when(count_star == lit_func(1), lit_func(float("nan")))
|
506
|
+
.when(count_star == lit_func(2), lit_func(0.0))
|
507
|
+
.otherwise(
|
508
|
+
Column.invoke_anonymous_function(col, func_name)
|
509
|
+
* (count_star - lit_func(2))
|
510
|
+
/ (sqrt_func(count_star * (count_star - lit_func(1))))
|
511
|
+
)
|
512
|
+
)
|
495
513
|
|
496
|
-
return Column.invoke_anonymous_function(col,
|
514
|
+
return Column.invoke_anonymous_function(col, func_name)
|
497
515
|
|
498
516
|
|
499
517
|
@meta(unsupported_engines=["bigquery", "postgres"])
|
sqlframe/base/util.py
CHANGED
@@ -97,12 +97,8 @@ def get_column_mapping_from_schema_input(
|
|
97
97
|
else:
|
98
98
|
value = {x.strip(): None for x in schema}
|
99
99
|
return {
|
100
|
-
|
101
|
-
if v is not None
|
102
|
-
else v
|
103
|
-
for k, v in value.items()
|
100
|
+
k: exp.DataType.build(v, dialect=dialect) if v is not None else v for k, v in value.items()
|
104
101
|
}
|
105
|
-
# return {x.strip(): None for x in schema} # type: ignore
|
106
102
|
|
107
103
|
|
108
104
|
def get_tables_from_expression_with_join(expression: exp.Select) -> t.List[exp.Table]:
|
@@ -1,14 +1,14 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=fmhKf9XPZdwZdKpQ-ESJ_LGssm7Q8K_NJEGVKwXLGQM,413
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
6
6
|
sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
7
|
+
sqlframe/base/dataframe.py,sha256=FOgLdCpscLsBntkRvutcgSVqXqMgXo9DYa892mXu00E,83907
|
8
8
|
sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
10
|
-
sqlframe/base/function_alternatives.py,sha256=
|
11
|
-
sqlframe/base/functions.py,sha256=
|
10
|
+
sqlframe/base/function_alternatives.py,sha256=KFkEm0aIHzajvQmiPZnzTLh-Ud9wjeg4lJ4Rk0vk-YU,53674
|
11
|
+
sqlframe/base/functions.py,sha256=jfLgboldiTB9CPkoZMtKUAwx6XSvFnEOIpCZQfoEJJU,223060
|
12
12
|
sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
14
14
|
sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
|
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
|
18
18
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
19
19
|
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
20
20
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
21
|
-
sqlframe/base/util.py,sha256=
|
21
|
+
sqlframe/base/util.py,sha256=_s2M-qHzTLgyGu1v8laRHJorUpUO6-fr3kk7CsvcuXw,15161
|
22
22
|
sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
|
23
23
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
24
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
|
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
129
129
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
130
130
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
131
131
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
132
|
-
sqlframe-3.21.
|
133
|
-
sqlframe-3.21.
|
134
|
-
sqlframe-3.21.
|
135
|
-
sqlframe-3.21.
|
136
|
-
sqlframe-3.21.
|
132
|
+
sqlframe-3.21.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
133
|
+
sqlframe-3.21.1.dist-info/METADATA,sha256=AauznGD-zSbh2cqT63w2MIrg_-0SlewyyRMNElL5O2I,8970
|
134
|
+
sqlframe-3.21.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
135
|
+
sqlframe-3.21.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
136
|
+
sqlframe-3.21.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|