sqlframe 3.20.0__py3-none-any.whl → 3.21.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +108 -0
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/METADATA +1 -1
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/RECORD +7 -7
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1718
1718
|
grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
|
1719
1719
|
return self._group_data(self, grouping_columns, self.last_op)
|
1720
1720
|
|
1721
|
+
@operation(Operation.SELECT)
|
1722
|
+
def unpivot(
|
1723
|
+
self,
|
1724
|
+
ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
|
1725
|
+
values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
|
1726
|
+
variableColumnName: str,
|
1727
|
+
valueColumnName: str,
|
1728
|
+
) -> Self:
|
1729
|
+
"""
|
1730
|
+
Unpivot a DataFrame from wide format to long format, optionally leaving
|
1731
|
+
identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
|
1732
|
+
except for the aggregation, which cannot be reversed.
|
1733
|
+
|
1734
|
+
This function is useful to massage a DataFrame into a format where some
|
1735
|
+
columns are identifier columns ("ids"), while all other columns ("values")
|
1736
|
+
are "unpivoted" to the rows, leaving just two non-id columns, named as given
|
1737
|
+
by `variableColumnName` and `valueColumnName`.
|
1738
|
+
|
1739
|
+
When no "id" columns are given, the unpivoted DataFrame consists of only the
|
1740
|
+
"variable" and "value" columns.
|
1741
|
+
|
1742
|
+
The `values` columns must not be empty so at least one value must be given to be unpivoted.
|
1743
|
+
When `values` is `None`, all non-id columns will be unpivoted.
|
1744
|
+
|
1745
|
+
All "value" columns must share a least common data type. Unless they are the same data type,
|
1746
|
+
all "value" columns are cast to the nearest common data type. For instance, types
|
1747
|
+
`IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
|
1748
|
+
do not have a common data type and `unpivot` fails.
|
1749
|
+
|
1750
|
+
.. versionadded:: 3.4.0
|
1751
|
+
|
1752
|
+
Parameters
|
1753
|
+
----------
|
1754
|
+
ids : str, Column, tuple, list
|
1755
|
+
Column(s) to use as identifiers. Can be a single column or column name,
|
1756
|
+
or a list or tuple for multiple columns.
|
1757
|
+
values : str, Column, tuple, list, optional
|
1758
|
+
Column(s) to unpivot. Can be a single column or column name, or a list or tuple
|
1759
|
+
for multiple columns. If specified, must not be empty. If not specified, uses all
|
1760
|
+
columns that are not set as `ids`.
|
1761
|
+
variableColumnName : str
|
1762
|
+
Name of the variable column.
|
1763
|
+
valueColumnName : str
|
1764
|
+
Name of the value column.
|
1765
|
+
|
1766
|
+
Returns
|
1767
|
+
-------
|
1768
|
+
:class:`DataFrame`
|
1769
|
+
Unpivoted DataFrame.
|
1770
|
+
|
1771
|
+
Notes
|
1772
|
+
-----
|
1773
|
+
Supports Spark Connect.
|
1774
|
+
|
1775
|
+
Examples
|
1776
|
+
--------
|
1777
|
+
>>> df = spark.createDataFrame(
|
1778
|
+
... [(1, 11, 1.1), (2, 12, 1.2)],
|
1779
|
+
... ["id", "int", "double"],
|
1780
|
+
... )
|
1781
|
+
>>> df.show()
|
1782
|
+
+---+---+------+
|
1783
|
+
| id|int|double|
|
1784
|
+
+---+---+------+
|
1785
|
+
| 1| 11| 1.1|
|
1786
|
+
| 2| 12| 1.2|
|
1787
|
+
+---+---+------+
|
1788
|
+
|
1789
|
+
>>> df.unpivot("id", ["int", "double"], "var", "val").show()
|
1790
|
+
+---+------+----+
|
1791
|
+
| id| var| val|
|
1792
|
+
+---+------+----+
|
1793
|
+
| 1| int|11.0|
|
1794
|
+
| 1|double| 1.1|
|
1795
|
+
| 2| int|12.0|
|
1796
|
+
| 2|double| 1.2|
|
1797
|
+
+---+------+----+
|
1798
|
+
|
1799
|
+
See Also
|
1800
|
+
--------
|
1801
|
+
DataFrame.melt
|
1802
|
+
"""
|
1803
|
+
from sqlframe.base import functions as F
|
1804
|
+
|
1805
|
+
id_columns = self._ensure_and_normalize_cols(ids)
|
1806
|
+
if not values:
|
1807
|
+
outer_selects = self._get_outer_select_columns(self.expression)
|
1808
|
+
values = [
|
1809
|
+
column
|
1810
|
+
for column in outer_selects
|
1811
|
+
if column.alias_or_name not in {x.alias_or_name for x in id_columns}
|
1812
|
+
]
|
1813
|
+
value_columns = self._ensure_and_normalize_cols(values)
|
1814
|
+
|
1815
|
+
df = self._convert_leaf_to_cte()
|
1816
|
+
selects = []
|
1817
|
+
for value in value_columns:
|
1818
|
+
selects.append(
|
1819
|
+
exp.select(
|
1820
|
+
*[x.column_expression for x in id_columns],
|
1821
|
+
F.lit(value.alias_or_name).alias(variableColumnName).expression,
|
1822
|
+
value.alias(valueColumnName).expression,
|
1823
|
+
).from_(df.expression.ctes[-1].alias_or_name)
|
1824
|
+
)
|
1825
|
+
unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
|
1826
|
+
final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
|
1827
|
+
return self.copy(expression=final_expression)._convert_leaf_to_cte()
|
1828
|
+
|
1721
1829
|
def collect(self) -> t.List[Row]:
|
1722
1830
|
return self._collect()
|
1723
1831
|
|
@@ -1,10 +1,10 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
6
6
|
sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
7
|
+
sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
|
8
8
|
sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
|
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
129
129
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
130
130
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
131
131
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
132
|
-
sqlframe-3.
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
132
|
+
sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
133
|
+
sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
|
134
|
+
sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
135
|
+
sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
136
|
+
sqlframe-3.21.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|