sqlframe 3.20.0__py3-none-any.whl → 3.21.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.20.0'
16
- __version_tuple__ = version_tuple = (3, 20, 0)
15
+ __version__ = version = '3.21.0'
16
+ __version_tuple__ = version_tuple = (3, 21, 0)
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1718
1718
  grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
1719
1719
  return self._group_data(self, grouping_columns, self.last_op)
1720
1720
 
1721
+ @operation(Operation.SELECT)
1722
+ def unpivot(
1723
+ self,
1724
+ ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
1725
+ values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
1726
+ variableColumnName: str,
1727
+ valueColumnName: str,
1728
+ ) -> Self:
1729
+ """
1730
+ Unpivot a DataFrame from wide format to long format, optionally leaving
1731
+ identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
1732
+ except for the aggregation, which cannot be reversed.
1733
+
1734
+ This function is useful to massage a DataFrame into a format where some
1735
+ columns are identifier columns ("ids"), while all other columns ("values")
1736
+ are "unpivoted" to the rows, leaving just two non-id columns, named as given
1737
+ by `variableColumnName` and `valueColumnName`.
1738
+
1739
+ When no "id" columns are given, the unpivoted DataFrame consists of only the
1740
+ "variable" and "value" columns.
1741
+
1742
+ The `values` columns must not be empty so at least one value must be given to be unpivoted.
1743
+ When `values` is `None`, all non-id columns will be unpivoted.
1744
+
1745
+ All "value" columns must share a least common data type. Unless they are the same data type,
1746
+ all "value" columns are cast to the nearest common data type. For instance, types
1747
+ `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
1748
+ do not have a common data type and `unpivot` fails.
1749
+
1750
+ .. versionadded:: 3.4.0
1751
+
1752
+ Parameters
1753
+ ----------
1754
+ ids : str, Column, tuple, list
1755
+ Column(s) to use as identifiers. Can be a single column or column name,
1756
+ or a list or tuple for multiple columns.
1757
+ values : str, Column, tuple, list, optional
1758
+ Column(s) to unpivot. Can be a single column or column name, or a list or tuple
1759
+ for multiple columns. If specified, must not be empty. If not specified, uses all
1760
+ columns that are not set as `ids`.
1761
+ variableColumnName : str
1762
+ Name of the variable column.
1763
+ valueColumnName : str
1764
+ Name of the value column.
1765
+
1766
+ Returns
1767
+ -------
1768
+ :class:`DataFrame`
1769
+ Unpivoted DataFrame.
1770
+
1771
+ Notes
1772
+ -----
1773
+ Supports Spark Connect.
1774
+
1775
+ Examples
1776
+ --------
1777
+ >>> df = spark.createDataFrame(
1778
+ ... [(1, 11, 1.1), (2, 12, 1.2)],
1779
+ ... ["id", "int", "double"],
1780
+ ... )
1781
+ >>> df.show()
1782
+ +---+---+------+
1783
+ | id|int|double|
1784
+ +---+---+------+
1785
+ | 1| 11| 1.1|
1786
+ | 2| 12| 1.2|
1787
+ +---+---+------+
1788
+
1789
+ >>> df.unpivot("id", ["int", "double"], "var", "val").show()
1790
+ +---+------+----+
1791
+ | id| var| val|
1792
+ +---+------+----+
1793
+ | 1| int|11.0|
1794
+ | 1|double| 1.1|
1795
+ | 2| int|12.0|
1796
+ | 2|double| 1.2|
1797
+ +---+------+----+
1798
+
1799
+ See Also
1800
+ --------
1801
+ DataFrame.melt
1802
+ """
1803
+ from sqlframe.base import functions as F
1804
+
1805
+ id_columns = self._ensure_and_normalize_cols(ids)
1806
+ if not values:
1807
+ outer_selects = self._get_outer_select_columns(self.expression)
1808
+ values = [
1809
+ column
1810
+ for column in outer_selects
1811
+ if column.alias_or_name not in {x.alias_or_name for x in id_columns}
1812
+ ]
1813
+ value_columns = self._ensure_and_normalize_cols(values)
1814
+
1815
+ df = self._convert_leaf_to_cte()
1816
+ selects = []
1817
+ for value in value_columns:
1818
+ selects.append(
1819
+ exp.select(
1820
+ *[x.column_expression for x in id_columns],
1821
+ F.lit(value.alias_or_name).alias(variableColumnName).expression,
1822
+ value.alias(valueColumnName).expression,
1823
+ ).from_(df.expression.ctes[-1].alias_or_name)
1824
+ )
1825
+ unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
1826
+ final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
1827
+ return self.copy(expression=final_expression)._convert_leaf_to_cte()
1828
+
1721
1829
  def collect(self) -> t.List[Row]:
1722
1830
  return self._collect()
1723
1831
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.20.0
3
+ Version: 3.21.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,10 +1,10 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=nzt1OjXbH5tyyHQvLpmIr9I_E9sBcud1ZUXFSGz-12c,413
2
+ sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
7
+ sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.20.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.20.0.dist-info/METADATA,sha256=vEauG8vJY6ak5FN5oJpsaGRKgzD7uaodpdlFFu3uN04,8970
134
- sqlframe-3.20.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.20.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.20.0.dist-info/RECORD,,
132
+ sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
134
+ sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.21.0.dist-info/RECORD,,