sqlframe 3.21.0__py3-none-any.whl → 3.21.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.21.0'
16
- __version_tuple__ = version_tuple = (3, 21, 0)
15
+ __version__ = version = '3.21.1'
16
+ __version_tuple__ = version_tuple = (3, 21, 1)
@@ -296,6 +296,12 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
296
296
 
297
297
  @property
298
298
  def columns(self) -> t.List[str]:
299
+ expression_display_names = self.expression.copy()
300
+ self._set_display_names(expression_display_names)
301
+ return expression_display_names.named_selects
302
+
303
+ @property
304
+ def _columns(self) -> t.List[str]:
299
305
  return self.expression.named_selects
300
306
 
301
307
  @property
@@ -611,6 +617,18 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
611
617
  }
612
618
  self.display_name_mapping.update(zipped)
613
619
 
620
+ def _set_display_names(self, select_expression: exp.Select) -> None:
621
+ for index, column in enumerate(select_expression.expressions):
622
+ column_name = quote_preserving_alias_or_name(column)
623
+ if column_name in self.display_name_mapping:
624
+ display_name_identifier = exp.to_identifier(
625
+ self.display_name_mapping[column_name], quoted=True
626
+ )
627
+ display_name_identifier._meta = {"case_sensitive": True, **(column._meta or {})}
628
+ select_expression.expressions[index] = exp.alias_(
629
+ column.unalias(), display_name_identifier, quoted=True
630
+ )
631
+
614
632
  def _get_expressions(
615
633
  self,
616
634
  optimize: bool = True,
@@ -631,16 +649,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
631
649
  select_expression = select_expression.transform(
632
650
  replace_id_value, replacement_mapping
633
651
  ).assert_is(exp.Select)
634
- for index, column in enumerate(select_expression.expressions):
635
- column_name = quote_preserving_alias_or_name(column)
636
- if column_name in self.display_name_mapping:
637
- display_name_identifier = exp.to_identifier(
638
- self.display_name_mapping[column_name], quoted=True
639
- )
640
- display_name_identifier._meta = {"case_sensitive": True, **(column._meta or {})}
641
- select_expression.expressions[index] = exp.alias_(
642
- column.unalias(), display_name_identifier, quoted=True
643
- )
652
+ self._set_display_names(select_expression)
644
653
  if optimize:
645
654
  select_expression = t.cast(
646
655
  exp.Select,
@@ -1158,8 +1167,8 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1158
1167
 
1159
1168
  @operation(Operation.FROM)
1160
1169
  def unionByName(self, other: Self, allowMissingColumns: bool = False) -> Self:
1161
- l_columns = self.columns
1162
- r_columns = other.columns
1170
+ l_columns = self._columns
1171
+ r_columns = other._columns
1163
1172
  if not allowMissingColumns:
1164
1173
  l_expressions = l_columns
1165
1174
  r_expressions = l_columns
@@ -1619,9 +1628,9 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1619
1628
  | 16| Bob|
1620
1629
  +---+-----+
1621
1630
  """
1622
- if len(cols) != len(self.columns):
1631
+ if len(cols) != len(self._columns):
1623
1632
  raise ValueError(
1624
- f"Number of column names does not match number of columns: {len(cols)} != {len(self.columns)}"
1633
+ f"Number of column names does not match number of columns: {len(cols)} != {len(self._columns)}"
1625
1634
  )
1626
1635
  expression = self.expression.copy()
1627
1636
  expression = expression.select(
@@ -193,10 +193,6 @@ def factorial_ensure_int(col: ColumnOrName) -> Column:
193
193
  return Column.invoke_anonymous_function(col_func(col).cast("integer"), "FACTORIAL")
194
194
 
195
195
 
196
- def skewness_from_skew(col: ColumnOrName) -> Column:
197
- return Column.invoke_anonymous_function(col, "SKEW")
198
-
199
-
200
196
  def isnan_using_equal(col: ColumnOrName) -> Column:
201
197
  lit = get_func_from_session("lit")
202
198
  return Column(
@@ -486,14 +486,32 @@ def var_pop(col: ColumnOrName) -> Column:
486
486
 
487
487
  @meta(unsupported_engines=["bigquery", "postgres"])
488
488
  def skewness(col: ColumnOrName) -> Column:
489
- from sqlframe.base.function_alternatives import skewness_from_skew
490
-
491
489
  session = _get_session()
492
490
 
491
+ func_name = "SKEWNESS"
492
+
493
493
  if session._is_snowflake:
494
- return skewness_from_skew(col)
494
+ func_name = "SKEW"
495
+
496
+ if session._is_duckdb or session._is_snowflake:
497
+ when_func = get_func_from_session("when")
498
+ count_func = get_func_from_session("count")
499
+ count_star = count_func("*")
500
+ lit_func = get_func_from_session("lit")
501
+ sqrt_func = get_func_from_session("sqrt")
502
+ col = Column.ensure_col(col)
503
+ return (
504
+ when_func(count_star == lit_func(0), lit_func(None))
505
+ .when(count_star == lit_func(1), lit_func(float("nan")))
506
+ .when(count_star == lit_func(2), lit_func(0.0))
507
+ .otherwise(
508
+ Column.invoke_anonymous_function(col, func_name)
509
+ * (count_star - lit_func(2))
510
+ / (sqrt_func(count_star * (count_star - lit_func(1))))
511
+ )
512
+ )
495
513
 
496
- return Column.invoke_anonymous_function(col, "SKEWNESS")
514
+ return Column.invoke_anonymous_function(col, func_name)
497
515
 
498
516
 
499
517
  @meta(unsupported_engines=["bigquery", "postgres"])
sqlframe/base/util.py CHANGED
@@ -97,12 +97,8 @@ def get_column_mapping_from_schema_input(
97
97
  else:
98
98
  value = {x.strip(): None for x in schema}
99
99
  return {
100
- exp.to_column(k).sql(dialect=dialect): exp.DataType.build(v, dialect=dialect)
101
- if v is not None
102
- else v
103
- for k, v in value.items()
100
+ k: exp.DataType.build(v, dialect=dialect) if v is not None else v for k, v in value.items()
104
101
  }
105
- # return {x.strip(): None for x in schema} # type: ignore
106
102
 
107
103
 
108
104
  def get_tables_from_expression_with_join(expression: exp.Select) -> t.List[exp.Table]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.21.0
3
+ Version: 3.21.1
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,14 +1,14 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
2
+ sqlframe/_version.py,sha256=fmhKf9XPZdwZdKpQ-ESJ_LGssm7Q8K_NJEGVKwXLGQM,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
7
+ sqlframe/base/dataframe.py,sha256=FOgLdCpscLsBntkRvutcgSVqXqMgXo9DYa892mXu00E,83907
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
- sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
11
- sqlframe/base/functions.py,sha256=nfDf2oKoBq2hrutTfuVHKmGvkm_X_ZvhfnFPv1rn0oU,222350
10
+ sqlframe/base/function_alternatives.py,sha256=KFkEm0aIHzajvQmiPZnzTLh-Ud9wjeg4lJ4Rk0vk-YU,53674
11
+ sqlframe/base/functions.py,sha256=jfLgboldiTB9CPkoZMtKUAwx6XSvFnEOIpCZQfoEJJU,223060
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
18
18
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
19
19
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
20
20
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
21
- sqlframe/base/util.py,sha256=rdnH3Kg6gZVT3DehU_ZHjfum79vc-I5W_Il6OiCtWF4,15284
21
+ sqlframe/base/util.py,sha256=_s2M-qHzTLgyGu1v8laRHJorUpUO6-fr3kk7CsvcuXw,15161
22
22
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
23
23
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
134
- sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.21.0.dist-info/RECORD,,
132
+ sqlframe-3.21.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.21.1.dist-info/METADATA,sha256=AauznGD-zSbh2cqT63w2MIrg_-0SlewyyRMNElL5O2I,8970
134
+ sqlframe-3.21.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.21.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.21.1.dist-info/RECORD,,