sqlframe 3.20.0__py3-none-any.whl → 3.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.20.0'
16
- __version_tuple__ = version_tuple = (3, 20, 0)
15
+ __version__ = version = '3.21.0'
16
+ __version_tuple__ = version_tuple = (3, 21, 0)
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1718
1718
  grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
1719
1719
  return self._group_data(self, grouping_columns, self.last_op)
1720
1720
 
1721
+ @operation(Operation.SELECT)
1722
+ def unpivot(
1723
+ self,
1724
+ ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
1725
+ values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
1726
+ variableColumnName: str,
1727
+ valueColumnName: str,
1728
+ ) -> Self:
1729
+ """
1730
+ Unpivot a DataFrame from wide format to long format, optionally leaving
1731
+ identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
1732
+ except for the aggregation, which cannot be reversed.
1733
+
1734
+ This function is useful to massage a DataFrame into a format where some
1735
+ columns are identifier columns ("ids"), while all other columns ("values")
1736
+ are "unpivoted" to the rows, leaving just two non-id columns, named as given
1737
+ by `variableColumnName` and `valueColumnName`.
1738
+
1739
+ When no "id" columns are given, the unpivoted DataFrame consists of only the
1740
+ "variable" and "value" columns.
1741
+
1742
+ The `values` columns must not be empty so at least one value must be given to be unpivoted.
1743
+ When `values` is `None`, all non-id columns will be unpivoted.
1744
+
1745
+ All "value" columns must share a least common data type. Unless they are the same data type,
1746
+ all "value" columns are cast to the nearest common data type. For instance, types
1747
+ `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
1748
+ do not have a common data type and `unpivot` fails.
1749
+
1750
+ .. versionadded:: 3.4.0
1751
+
1752
+ Parameters
1753
+ ----------
1754
+ ids : str, Column, tuple, list
1755
+ Column(s) to use as identifiers. Can be a single column or column name,
1756
+ or a list or tuple for multiple columns.
1757
+ values : str, Column, tuple, list, optional
1758
+ Column(s) to unpivot. Can be a single column or column name, or a list or tuple
1759
+ for multiple columns. If specified, must not be empty. If not specified, uses all
1760
+ columns that are not set as `ids`.
1761
+ variableColumnName : str
1762
+ Name of the variable column.
1763
+ valueColumnName : str
1764
+ Name of the value column.
1765
+
1766
+ Returns
1767
+ -------
1768
+ :class:`DataFrame`
1769
+ Unpivoted DataFrame.
1770
+
1771
+ Notes
1772
+ -----
1773
+ Supports Spark Connect.
1774
+
1775
+ Examples
1776
+ --------
1777
+ >>> df = spark.createDataFrame(
1778
+ ... [(1, 11, 1.1), (2, 12, 1.2)],
1779
+ ... ["id", "int", "double"],
1780
+ ... )
1781
+ >>> df.show()
1782
+ +---+---+------+
1783
+ | id|int|double|
1784
+ +---+---+------+
1785
+ | 1| 11| 1.1|
1786
+ | 2| 12| 1.2|
1787
+ +---+---+------+
1788
+
1789
+ >>> df.unpivot("id", ["int", "double"], "var", "val").show()
1790
+ +---+------+----+
1791
+ | id| var| val|
1792
+ +---+------+----+
1793
+ | 1| int|11.0|
1794
+ | 1|double| 1.1|
1795
+ | 2| int|12.0|
1796
+ | 2|double| 1.2|
1797
+ +---+------+----+
1798
+
1799
+ See Also
1800
+ --------
1801
+ DataFrame.melt
1802
+ """
1803
+ from sqlframe.base import functions as F
1804
+
1805
+ id_columns = self._ensure_and_normalize_cols(ids)
1806
+ if not values:
1807
+ outer_selects = self._get_outer_select_columns(self.expression)
1808
+ values = [
1809
+ column
1810
+ for column in outer_selects
1811
+ if column.alias_or_name not in {x.alias_or_name for x in id_columns}
1812
+ ]
1813
+ value_columns = self._ensure_and_normalize_cols(values)
1814
+
1815
+ df = self._convert_leaf_to_cte()
1816
+ selects = []
1817
+ for value in value_columns:
1818
+ selects.append(
1819
+ exp.select(
1820
+ *[x.column_expression for x in id_columns],
1821
+ F.lit(value.alias_or_name).alias(variableColumnName).expression,
1822
+ value.alias(valueColumnName).expression,
1823
+ ).from_(df.expression.ctes[-1].alias_or_name)
1824
+ )
1825
+ unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
1826
+ final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
1827
+ return self.copy(expression=final_expression)._convert_leaf_to_cte()
1828
+
1721
1829
  def collect(self) -> t.List[Row]:
1722
1830
  return self._collect()
1723
1831
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.20.0
3
+ Version: 3.21.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,10 +1,10 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=nzt1OjXbH5tyyHQvLpmIr9I_E9sBcud1ZUXFSGz-12c,413
2
+ sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
7
+ sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.20.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.20.0.dist-info/METADATA,sha256=vEauG8vJY6ak5FN5oJpsaGRKgzD7uaodpdlFFu3uN04,8970
134
- sqlframe-3.20.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.20.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.20.0.dist-info/RECORD,,
132
+ sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
134
+ sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.21.0.dist-info/RECORD,,