sqlframe 3.19.0__py3-none-any.whl → 3.21.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +108 -0
- sqlframe/base/functions.py +1 -7
- sqlframe/base/util.py +1 -0
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/METADATA +1 -1
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/RECORD +9 -9
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1718
1718
|
grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
|
1719
1719
|
return self._group_data(self, grouping_columns, self.last_op)
|
1720
1720
|
|
1721
|
+
@operation(Operation.SELECT)
|
1722
|
+
def unpivot(
|
1723
|
+
self,
|
1724
|
+
ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
|
1725
|
+
values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
|
1726
|
+
variableColumnName: str,
|
1727
|
+
valueColumnName: str,
|
1728
|
+
) -> Self:
|
1729
|
+
"""
|
1730
|
+
Unpivot a DataFrame from wide format to long format, optionally leaving
|
1731
|
+
identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
|
1732
|
+
except for the aggregation, which cannot be reversed.
|
1733
|
+
|
1734
|
+
This function is useful to massage a DataFrame into a format where some
|
1735
|
+
columns are identifier columns ("ids"), while all other columns ("values")
|
1736
|
+
are "unpivoted" to the rows, leaving just two non-id columns, named as given
|
1737
|
+
by `variableColumnName` and `valueColumnName`.
|
1738
|
+
|
1739
|
+
When no "id" columns are given, the unpivoted DataFrame consists of only the
|
1740
|
+
"variable" and "value" columns.
|
1741
|
+
|
1742
|
+
The `values` columns must not be empty so at least one value must be given to be unpivoted.
|
1743
|
+
When `values` is `None`, all non-id columns will be unpivoted.
|
1744
|
+
|
1745
|
+
All "value" columns must share a least common data type. Unless they are the same data type,
|
1746
|
+
all "value" columns are cast to the nearest common data type. For instance, types
|
1747
|
+
`IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
|
1748
|
+
do not have a common data type and `unpivot` fails.
|
1749
|
+
|
1750
|
+
.. versionadded:: 3.4.0
|
1751
|
+
|
1752
|
+
Parameters
|
1753
|
+
----------
|
1754
|
+
ids : str, Column, tuple, list
|
1755
|
+
Column(s) to use as identifiers. Can be a single column or column name,
|
1756
|
+
or a list or tuple for multiple columns.
|
1757
|
+
values : str, Column, tuple, list, optional
|
1758
|
+
Column(s) to unpivot. Can be a single column or column name, or a list or tuple
|
1759
|
+
for multiple columns. If specified, must not be empty. If not specified, uses all
|
1760
|
+
columns that are not set as `ids`.
|
1761
|
+
variableColumnName : str
|
1762
|
+
Name of the variable column.
|
1763
|
+
valueColumnName : str
|
1764
|
+
Name of the value column.
|
1765
|
+
|
1766
|
+
Returns
|
1767
|
+
-------
|
1768
|
+
:class:`DataFrame`
|
1769
|
+
Unpivoted DataFrame.
|
1770
|
+
|
1771
|
+
Notes
|
1772
|
+
-----
|
1773
|
+
Supports Spark Connect.
|
1774
|
+
|
1775
|
+
Examples
|
1776
|
+
--------
|
1777
|
+
>>> df = spark.createDataFrame(
|
1778
|
+
... [(1, 11, 1.1), (2, 12, 1.2)],
|
1779
|
+
... ["id", "int", "double"],
|
1780
|
+
... )
|
1781
|
+
>>> df.show()
|
1782
|
+
+---+---+------+
|
1783
|
+
| id|int|double|
|
1784
|
+
+---+---+------+
|
1785
|
+
| 1| 11| 1.1|
|
1786
|
+
| 2| 12| 1.2|
|
1787
|
+
+---+---+------+
|
1788
|
+
|
1789
|
+
>>> df.unpivot("id", ["int", "double"], "var", "val").show()
|
1790
|
+
+---+------+----+
|
1791
|
+
| id| var| val|
|
1792
|
+
+---+------+----+
|
1793
|
+
| 1| int|11.0|
|
1794
|
+
| 1|double| 1.1|
|
1795
|
+
| 2| int|12.0|
|
1796
|
+
| 2|double| 1.2|
|
1797
|
+
+---+------+----+
|
1798
|
+
|
1799
|
+
See Also
|
1800
|
+
--------
|
1801
|
+
DataFrame.melt
|
1802
|
+
"""
|
1803
|
+
from sqlframe.base import functions as F
|
1804
|
+
|
1805
|
+
id_columns = self._ensure_and_normalize_cols(ids)
|
1806
|
+
if not values:
|
1807
|
+
outer_selects = self._get_outer_select_columns(self.expression)
|
1808
|
+
values = [
|
1809
|
+
column
|
1810
|
+
for column in outer_selects
|
1811
|
+
if column.alias_or_name not in {x.alias_or_name for x in id_columns}
|
1812
|
+
]
|
1813
|
+
value_columns = self._ensure_and_normalize_cols(values)
|
1814
|
+
|
1815
|
+
df = self._convert_leaf_to_cte()
|
1816
|
+
selects = []
|
1817
|
+
for value in value_columns:
|
1818
|
+
selects.append(
|
1819
|
+
exp.select(
|
1820
|
+
*[x.column_expression for x in id_columns],
|
1821
|
+
F.lit(value.alias_or_name).alias(variableColumnName).expression,
|
1822
|
+
value.alias(valueColumnName).expression,
|
1823
|
+
).from_(df.expression.ctes[-1].alias_or_name)
|
1824
|
+
)
|
1825
|
+
unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
|
1826
|
+
final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
|
1827
|
+
return self.copy(expression=final_expression)._convert_leaf_to_cte()
|
1828
|
+
|
1721
1829
|
def collect(self) -> t.List[Row]:
|
1722
1830
|
return self._collect()
|
1723
1831
|
|
sqlframe/base/functions.py
CHANGED
@@ -3133,20 +3133,14 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column:
|
|
3133
3133
|
|
3134
3134
|
@meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
|
3135
3135
|
def day(col: ColumnOrName) -> Column:
|
3136
|
-
from sqlframe.base.function_alternatives import day_with_try_to_timestamp
|
3137
|
-
|
3138
3136
|
session = _get_session()
|
3139
3137
|
|
3140
3138
|
if session._is_duckdb:
|
3141
3139
|
try_to_timestamp = get_func_from_session("try_to_timestamp")
|
3142
3140
|
to_date = get_func_from_session("to_date")
|
3143
|
-
when = get_func_from_session("when")
|
3144
3141
|
_is_string = get_func_from_session("_is_string")
|
3145
3142
|
coalesce = get_func_from_session("coalesce")
|
3146
|
-
col =
|
3147
|
-
_is_string(col),
|
3148
|
-
coalesce(try_to_timestamp(col), to_date(col)),
|
3149
|
-
).otherwise(col)
|
3143
|
+
col = coalesce(try_to_timestamp(Column.ensure_col(col).cast("VARCHAR")), to_date(col))
|
3150
3144
|
|
3151
3145
|
return Column.invoke_expression_over_column(col, expression.Day)
|
3152
3146
|
|
sqlframe/base/util.py
CHANGED
@@ -316,6 +316,7 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
|
|
316
316
|
exp.DataType.Type.INT: types.IntegerType,
|
317
317
|
exp.DataType.Type.BIGINT: types.LongType,
|
318
318
|
exp.DataType.Type.SMALLINT: types.ShortType,
|
319
|
+
exp.DataType.Type.TINYINT: types.ByteType,
|
319
320
|
exp.DataType.Type.FLOAT: types.FloatType,
|
320
321
|
exp.DataType.Type.DOUBLE: types.DoubleType,
|
321
322
|
exp.DataType.Type.DECIMAL: types.DecimalType,
|
@@ -1,14 +1,14 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
6
6
|
sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
7
|
+
sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
|
8
8
|
sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
|
11
|
-
sqlframe/base/functions.py,sha256=
|
11
|
+
sqlframe/base/functions.py,sha256=nfDf2oKoBq2hrutTfuVHKmGvkm_X_ZvhfnFPv1rn0oU,222350
|
12
12
|
sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
14
14
|
sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
|
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
|
18
18
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
19
19
|
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
20
20
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
21
|
-
sqlframe/base/util.py,sha256=
|
21
|
+
sqlframe/base/util.py,sha256=rdnH3Kg6gZVT3DehU_ZHjfum79vc-I5W_Il6OiCtWF4,15284
|
22
22
|
sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
|
23
23
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
24
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
|
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
129
129
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
130
130
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
131
131
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
132
|
-
sqlframe-3.
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
132
|
+
sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
133
|
+
sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
|
134
|
+
sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
135
|
+
sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
136
|
+
sqlframe-3.21.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|