sqlframe 3.20.0__py3-none-any.whl → 3.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +108 -0
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/METADATA +1 -1
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/RECORD +7 -7
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.20.0.dist-info → sqlframe-3.21.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1718
1718
|
grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
|
1719
1719
|
return self._group_data(self, grouping_columns, self.last_op)
|
1720
1720
|
|
1721
|
+
@operation(Operation.SELECT)
|
1722
|
+
def unpivot(
|
1723
|
+
self,
|
1724
|
+
ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
|
1725
|
+
values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
|
1726
|
+
variableColumnName: str,
|
1727
|
+
valueColumnName: str,
|
1728
|
+
) -> Self:
|
1729
|
+
"""
|
1730
|
+
Unpivot a DataFrame from wide format to long format, optionally leaving
|
1731
|
+
identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
|
1732
|
+
except for the aggregation, which cannot be reversed.
|
1733
|
+
|
1734
|
+
This function is useful to massage a DataFrame into a format where some
|
1735
|
+
columns are identifier columns ("ids"), while all other columns ("values")
|
1736
|
+
are "unpivoted" to the rows, leaving just two non-id columns, named as given
|
1737
|
+
by `variableColumnName` and `valueColumnName`.
|
1738
|
+
|
1739
|
+
When no "id" columns are given, the unpivoted DataFrame consists of only the
|
1740
|
+
"variable" and "value" columns.
|
1741
|
+
|
1742
|
+
The `values` columns must not be empty so at least one value must be given to be unpivoted.
|
1743
|
+
When `values` is `None`, all non-id columns will be unpivoted.
|
1744
|
+
|
1745
|
+
All "value" columns must share a least common data type. Unless they are the same data type,
|
1746
|
+
all "value" columns are cast to the nearest common data type. For instance, types
|
1747
|
+
`IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
|
1748
|
+
do not have a common data type and `unpivot` fails.
|
1749
|
+
|
1750
|
+
.. versionadded:: 3.4.0
|
1751
|
+
|
1752
|
+
Parameters
|
1753
|
+
----------
|
1754
|
+
ids : str, Column, tuple, list
|
1755
|
+
Column(s) to use as identifiers. Can be a single column or column name,
|
1756
|
+
or a list or tuple for multiple columns.
|
1757
|
+
values : str, Column, tuple, list, optional
|
1758
|
+
Column(s) to unpivot. Can be a single column or column name, or a list or tuple
|
1759
|
+
for multiple columns. If specified, must not be empty. If not specified, uses all
|
1760
|
+
columns that are not set as `ids`.
|
1761
|
+
variableColumnName : str
|
1762
|
+
Name of the variable column.
|
1763
|
+
valueColumnName : str
|
1764
|
+
Name of the value column.
|
1765
|
+
|
1766
|
+
Returns
|
1767
|
+
-------
|
1768
|
+
:class:`DataFrame`
|
1769
|
+
Unpivoted DataFrame.
|
1770
|
+
|
1771
|
+
Notes
|
1772
|
+
-----
|
1773
|
+
Supports Spark Connect.
|
1774
|
+
|
1775
|
+
Examples
|
1776
|
+
--------
|
1777
|
+
>>> df = spark.createDataFrame(
|
1778
|
+
... [(1, 11, 1.1), (2, 12, 1.2)],
|
1779
|
+
... ["id", "int", "double"],
|
1780
|
+
... )
|
1781
|
+
>>> df.show()
|
1782
|
+
+---+---+------+
|
1783
|
+
| id|int|double|
|
1784
|
+
+---+---+------+
|
1785
|
+
| 1| 11| 1.1|
|
1786
|
+
| 2| 12| 1.2|
|
1787
|
+
+---+---+------+
|
1788
|
+
|
1789
|
+
>>> df.unpivot("id", ["int", "double"], "var", "val").show()
|
1790
|
+
+---+------+----+
|
1791
|
+
| id| var| val|
|
1792
|
+
+---+------+----+
|
1793
|
+
| 1| int|11.0|
|
1794
|
+
| 1|double| 1.1|
|
1795
|
+
| 2| int|12.0|
|
1796
|
+
| 2|double| 1.2|
|
1797
|
+
+---+------+----+
|
1798
|
+
|
1799
|
+
See Also
|
1800
|
+
--------
|
1801
|
+
DataFrame.melt
|
1802
|
+
"""
|
1803
|
+
from sqlframe.base import functions as F
|
1804
|
+
|
1805
|
+
id_columns = self._ensure_and_normalize_cols(ids)
|
1806
|
+
if not values:
|
1807
|
+
outer_selects = self._get_outer_select_columns(self.expression)
|
1808
|
+
values = [
|
1809
|
+
column
|
1810
|
+
for column in outer_selects
|
1811
|
+
if column.alias_or_name not in {x.alias_or_name for x in id_columns}
|
1812
|
+
]
|
1813
|
+
value_columns = self._ensure_and_normalize_cols(values)
|
1814
|
+
|
1815
|
+
df = self._convert_leaf_to_cte()
|
1816
|
+
selects = []
|
1817
|
+
for value in value_columns:
|
1818
|
+
selects.append(
|
1819
|
+
exp.select(
|
1820
|
+
*[x.column_expression for x in id_columns],
|
1821
|
+
F.lit(value.alias_or_name).alias(variableColumnName).expression,
|
1822
|
+
value.alias(valueColumnName).expression,
|
1823
|
+
).from_(df.expression.ctes[-1].alias_or_name)
|
1824
|
+
)
|
1825
|
+
unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
|
1826
|
+
final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
|
1827
|
+
return self.copy(expression=final_expression)._convert_leaf_to_cte()
|
1828
|
+
|
1721
1829
|
def collect(self) -> t.List[Row]:
|
1722
1830
|
return self._collect()
|
1723
1831
|
|
@@ -1,10 +1,10 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
6
6
|
sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
7
|
+
sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
|
8
8
|
sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
|
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
129
129
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
130
130
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
131
131
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
132
|
-
sqlframe-3.
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
132
|
+
sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
133
|
+
sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
|
134
|
+
sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
135
|
+
sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
136
|
+
sqlframe-3.21.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|