sqlframe 3.19.0__py3-none-any.whl → 3.21.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.19.0'
16
- __version_tuple__ = version_tuple = (3, 19, 0)
15
+ __version__ = version = '3.21.0'
16
+ __version_tuple__ = version_tuple = (3, 21, 0)
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1718
1718
  grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
1719
1719
  return self._group_data(self, grouping_columns, self.last_op)
1720
1720
 
1721
+ @operation(Operation.SELECT)
1722
+ def unpivot(
1723
+ self,
1724
+ ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
1725
+ values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
1726
+ variableColumnName: str,
1727
+ valueColumnName: str,
1728
+ ) -> Self:
1729
+ """
1730
+ Unpivot a DataFrame from wide format to long format, optionally leaving
1731
+ identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
1732
+ except for the aggregation, which cannot be reversed.
1733
+
1734
+ This function is useful to massage a DataFrame into a format where some
1735
+ columns are identifier columns ("ids"), while all other columns ("values")
1736
+ are "unpivoted" to the rows, leaving just two non-id columns, named as given
1737
+ by `variableColumnName` and `valueColumnName`.
1738
+
1739
+ When no "id" columns are given, the unpivoted DataFrame consists of only the
1740
+ "variable" and "value" columns.
1741
+
1742
+ The `values` columns must not be empty so at least one value must be given to be unpivoted.
1743
+ When `values` is `None`, all non-id columns will be unpivoted.
1744
+
1745
+ All "value" columns must share a least common data type. Unless they are the same data type,
1746
+ all "value" columns are cast to the nearest common data type. For instance, types
1747
+ `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
1748
+ do not have a common data type and `unpivot` fails.
1749
+
1750
+ .. versionadded:: 3.4.0
1751
+
1752
+ Parameters
1753
+ ----------
1754
+ ids : str, Column, tuple, list
1755
+ Column(s) to use as identifiers. Can be a single column or column name,
1756
+ or a list or tuple for multiple columns.
1757
+ values : str, Column, tuple, list, optional
1758
+ Column(s) to unpivot. Can be a single column or column name, or a list or tuple
1759
+ for multiple columns. If specified, must not be empty. If not specified, uses all
1760
+ columns that are not set as `ids`.
1761
+ variableColumnName : str
1762
+ Name of the variable column.
1763
+ valueColumnName : str
1764
+ Name of the value column.
1765
+
1766
+ Returns
1767
+ -------
1768
+ :class:`DataFrame`
1769
+ Unpivoted DataFrame.
1770
+
1771
+ Notes
1772
+ -----
1773
+ Supports Spark Connect.
1774
+
1775
+ Examples
1776
+ --------
1777
+ >>> df = spark.createDataFrame(
1778
+ ... [(1, 11, 1.1), (2, 12, 1.2)],
1779
+ ... ["id", "int", "double"],
1780
+ ... )
1781
+ >>> df.show()
1782
+ +---+---+------+
1783
+ | id|int|double|
1784
+ +---+---+------+
1785
+ | 1| 11| 1.1|
1786
+ | 2| 12| 1.2|
1787
+ +---+---+------+
1788
+
1789
+ >>> df.unpivot("id", ["int", "double"], "var", "val").show()
1790
+ +---+------+----+
1791
+ | id| var| val|
1792
+ +---+------+----+
1793
+ | 1| int|11.0|
1794
+ | 1|double| 1.1|
1795
+ | 2| int|12.0|
1796
+ | 2|double| 1.2|
1797
+ +---+------+----+
1798
+
1799
+ See Also
1800
+ --------
1801
+ DataFrame.melt
1802
+ """
1803
+ from sqlframe.base import functions as F
1804
+
1805
+ id_columns = self._ensure_and_normalize_cols(ids)
1806
+ if not values:
1807
+ outer_selects = self._get_outer_select_columns(self.expression)
1808
+ values = [
1809
+ column
1810
+ for column in outer_selects
1811
+ if column.alias_or_name not in {x.alias_or_name for x in id_columns}
1812
+ ]
1813
+ value_columns = self._ensure_and_normalize_cols(values)
1814
+
1815
+ df = self._convert_leaf_to_cte()
1816
+ selects = []
1817
+ for value in value_columns:
1818
+ selects.append(
1819
+ exp.select(
1820
+ *[x.column_expression for x in id_columns],
1821
+ F.lit(value.alias_or_name).alias(variableColumnName).expression,
1822
+ value.alias(valueColumnName).expression,
1823
+ ).from_(df.expression.ctes[-1].alias_or_name)
1824
+ )
1825
+ unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
1826
+ final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
1827
+ return self.copy(expression=final_expression)._convert_leaf_to_cte()
1828
+
1721
1829
  def collect(self) -> t.List[Row]:
1722
1830
  return self._collect()
1723
1831
 
@@ -3133,20 +3133,14 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column:
3133
3133
 
3134
3134
  @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
3135
3135
  def day(col: ColumnOrName) -> Column:
3136
- from sqlframe.base.function_alternatives import day_with_try_to_timestamp
3137
-
3138
3136
  session = _get_session()
3139
3137
 
3140
3138
  if session._is_duckdb:
3141
3139
  try_to_timestamp = get_func_from_session("try_to_timestamp")
3142
3140
  to_date = get_func_from_session("to_date")
3143
- when = get_func_from_session("when")
3144
3141
  _is_string = get_func_from_session("_is_string")
3145
3142
  coalesce = get_func_from_session("coalesce")
3146
- col = when(
3147
- _is_string(col),
3148
- coalesce(try_to_timestamp(col), to_date(col)),
3149
- ).otherwise(col)
3143
+ col = coalesce(try_to_timestamp(Column.ensure_col(col).cast("VARCHAR")), to_date(col))
3150
3144
 
3151
3145
  return Column.invoke_expression_over_column(col, expression.Day)
3152
3146
 
sqlframe/base/util.py CHANGED
@@ -316,6 +316,7 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
316
316
  exp.DataType.Type.INT: types.IntegerType,
317
317
  exp.DataType.Type.BIGINT: types.LongType,
318
318
  exp.DataType.Type.SMALLINT: types.ShortType,
319
+ exp.DataType.Type.TINYINT: types.ByteType,
319
320
  exp.DataType.Type.FLOAT: types.FloatType,
320
321
  exp.DataType.Type.DOUBLE: types.DoubleType,
321
322
  exp.DataType.Type.DECIMAL: types.DecimalType,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.19.0
3
+ Version: 3.21.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,14 +1,14 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=bRGLbmtauY86O6qq58KRvSDdCcwrGM24X-Zm0Elw0sU,413
2
+ sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
7
+ sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
11
- sqlframe/base/functions.py,sha256=j_Sh4qIcR-2lesJT_2TzBlTIM46os35AcmMuwBm86DE,222512
11
+ sqlframe/base/functions.py,sha256=nfDf2oKoBq2hrutTfuVHKmGvkm_X_ZvhfnFPv1rn0oU,222350
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
18
18
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
19
19
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
20
20
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
21
- sqlframe/base/util.py,sha256=ggiGdegJ-Re-xk6PLB5tt6yAW9S7pg3xsrFm0xU3XCc,15233
21
+ sqlframe/base/util.py,sha256=rdnH3Kg6gZVT3DehU_ZHjfum79vc-I5W_Il6OiCtWF4,15284
22
22
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
23
23
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.19.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.19.0.dist-info/METADATA,sha256=t_G87pTEVYezUc-A5TIumPN-sHNsgTjW8vNgZ4Jvjpw,8970
134
- sqlframe-3.19.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.19.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.19.0.dist-info/RECORD,,
132
+ sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
134
+ sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.21.0.dist-info/RECORD,,