sqlframe 3.21.0__py3-none-any.whl → 3.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.21.0'
16
- __version_tuple__ = version_tuple = (3, 21, 0)
15
+ __version__ = version = '3.21.1'
16
+ __version_tuple__ = version_tuple = (3, 21, 1)
@@ -296,6 +296,12 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
296
296
 
297
297
  @property
298
298
  def columns(self) -> t.List[str]:
299
+ expression_display_names = self.expression.copy()
300
+ self._set_display_names(expression_display_names)
301
+ return expression_display_names.named_selects
302
+
303
+ @property
304
+ def _columns(self) -> t.List[str]:
299
305
  return self.expression.named_selects
300
306
 
301
307
  @property
@@ -611,6 +617,18 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
611
617
  }
612
618
  self.display_name_mapping.update(zipped)
613
619
 
620
+ def _set_display_names(self, select_expression: exp.Select) -> None:
621
+ for index, column in enumerate(select_expression.expressions):
622
+ column_name = quote_preserving_alias_or_name(column)
623
+ if column_name in self.display_name_mapping:
624
+ display_name_identifier = exp.to_identifier(
625
+ self.display_name_mapping[column_name], quoted=True
626
+ )
627
+ display_name_identifier._meta = {"case_sensitive": True, **(column._meta or {})}
628
+ select_expression.expressions[index] = exp.alias_(
629
+ column.unalias(), display_name_identifier, quoted=True
630
+ )
631
+
614
632
  def _get_expressions(
615
633
  self,
616
634
  optimize: bool = True,
@@ -631,16 +649,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
631
649
  select_expression = select_expression.transform(
632
650
  replace_id_value, replacement_mapping
633
651
  ).assert_is(exp.Select)
634
- for index, column in enumerate(select_expression.expressions):
635
- column_name = quote_preserving_alias_or_name(column)
636
- if column_name in self.display_name_mapping:
637
- display_name_identifier = exp.to_identifier(
638
- self.display_name_mapping[column_name], quoted=True
639
- )
640
- display_name_identifier._meta = {"case_sensitive": True, **(column._meta or {})}
641
- select_expression.expressions[index] = exp.alias_(
642
- column.unalias(), display_name_identifier, quoted=True
643
- )
652
+ self._set_display_names(select_expression)
644
653
  if optimize:
645
654
  select_expression = t.cast(
646
655
  exp.Select,
@@ -1158,8 +1167,8 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1158
1167
 
1159
1168
  @operation(Operation.FROM)
1160
1169
  def unionByName(self, other: Self, allowMissingColumns: bool = False) -> Self:
1161
- l_columns = self.columns
1162
- r_columns = other.columns
1170
+ l_columns = self._columns
1171
+ r_columns = other._columns
1163
1172
  if not allowMissingColumns:
1164
1173
  l_expressions = l_columns
1165
1174
  r_expressions = l_columns
@@ -1619,9 +1628,9 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1619
1628
  | 16| Bob|
1620
1629
  +---+-----+
1621
1630
  """
1622
- if len(cols) != len(self.columns):
1631
+ if len(cols) != len(self._columns):
1623
1632
  raise ValueError(
1624
- f"Number of column names does not match number of columns: {len(cols)} != {len(self.columns)}"
1633
+ f"Number of column names does not match number of columns: {len(cols)} != {len(self._columns)}"
1625
1634
  )
1626
1635
  expression = self.expression.copy()
1627
1636
  expression = expression.select(
@@ -193,10 +193,6 @@ def factorial_ensure_int(col: ColumnOrName) -> Column:
193
193
  return Column.invoke_anonymous_function(col_func(col).cast("integer"), "FACTORIAL")
194
194
 
195
195
 
196
- def skewness_from_skew(col: ColumnOrName) -> Column:
197
- return Column.invoke_anonymous_function(col, "SKEW")
198
-
199
-
200
196
  def isnan_using_equal(col: ColumnOrName) -> Column:
201
197
  lit = get_func_from_session("lit")
202
198
  return Column(
@@ -486,14 +486,32 @@ def var_pop(col: ColumnOrName) -> Column:
486
486
 
487
487
  @meta(unsupported_engines=["bigquery", "postgres"])
488
488
  def skewness(col: ColumnOrName) -> Column:
489
- from sqlframe.base.function_alternatives import skewness_from_skew
490
-
491
489
  session = _get_session()
492
490
 
491
+ func_name = "SKEWNESS"
492
+
493
493
  if session._is_snowflake:
494
- return skewness_from_skew(col)
494
+ func_name = "SKEW"
495
+
496
+ if session._is_duckdb or session._is_snowflake:
497
+ when_func = get_func_from_session("when")
498
+ count_func = get_func_from_session("count")
499
+ count_star = count_func("*")
500
+ lit_func = get_func_from_session("lit")
501
+ sqrt_func = get_func_from_session("sqrt")
502
+ col = Column.ensure_col(col)
503
+ return (
504
+ when_func(count_star == lit_func(0), lit_func(None))
505
+ .when(count_star == lit_func(1), lit_func(float("nan")))
506
+ .when(count_star == lit_func(2), lit_func(0.0))
507
+ .otherwise(
508
+ Column.invoke_anonymous_function(col, func_name)
509
+ * (count_star - lit_func(2))
510
+ / (sqrt_func(count_star * (count_star - lit_func(1))))
511
+ )
512
+ )
495
513
 
496
- return Column.invoke_anonymous_function(col, "SKEWNESS")
514
+ return Column.invoke_anonymous_function(col, func_name)
497
515
 
498
516
 
499
517
  @meta(unsupported_engines=["bigquery", "postgres"])
sqlframe/base/util.py CHANGED
@@ -97,12 +97,8 @@ def get_column_mapping_from_schema_input(
97
97
  else:
98
98
  value = {x.strip(): None for x in schema}
99
99
  return {
100
- exp.to_column(k).sql(dialect=dialect): exp.DataType.build(v, dialect=dialect)
101
- if v is not None
102
- else v
103
- for k, v in value.items()
100
+ k: exp.DataType.build(v, dialect=dialect) if v is not None else v for k, v in value.items()
104
101
  }
105
- # return {x.strip(): None for x in schema} # type: ignore
106
102
 
107
103
 
108
104
  def get_tables_from_expression_with_join(expression: exp.Select) -> t.List[exp.Table]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.21.0
3
+ Version: 3.21.1
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,14 +1,14 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
2
+ sqlframe/_version.py,sha256=fmhKf9XPZdwZdKpQ-ESJ_LGssm7Q8K_NJEGVKwXLGQM,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
7
+ sqlframe/base/dataframe.py,sha256=FOgLdCpscLsBntkRvutcgSVqXqMgXo9DYa892mXu00E,83907
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
- sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
11
- sqlframe/base/functions.py,sha256=nfDf2oKoBq2hrutTfuVHKmGvkm_X_ZvhfnFPv1rn0oU,222350
10
+ sqlframe/base/function_alternatives.py,sha256=KFkEm0aIHzajvQmiPZnzTLh-Ud9wjeg4lJ4Rk0vk-YU,53674
11
+ sqlframe/base/functions.py,sha256=jfLgboldiTB9CPkoZMtKUAwx6XSvFnEOIpCZQfoEJJU,223060
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
18
18
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
19
19
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
20
20
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
21
- sqlframe/base/util.py,sha256=rdnH3Kg6gZVT3DehU_ZHjfum79vc-I5W_Il6OiCtWF4,15284
21
+ sqlframe/base/util.py,sha256=_s2M-qHzTLgyGu1v8laRHJorUpUO6-fr3kk7CsvcuXw,15161
22
22
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
23
23
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
134
- sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.21.0.dist-info/RECORD,,
132
+ sqlframe-3.21.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.21.1.dist-info/METADATA,sha256=AauznGD-zSbh2cqT63w2MIrg_-0SlewyyRMNElL5O2I,8970
134
+ sqlframe-3.21.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.21.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.21.1.dist-info/RECORD,,