sqlframe 3.19.0__py3-none-any.whl → 3.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +108 -0
- sqlframe/base/functions.py +1 -7
- sqlframe/base/util.py +1 -0
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/METADATA +1 -1
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/RECORD +9 -9
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1718
1718
|
grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
|
1719
1719
|
return self._group_data(self, grouping_columns, self.last_op)
|
1720
1720
|
|
1721
|
+
@operation(Operation.SELECT)
|
1722
|
+
def unpivot(
|
1723
|
+
self,
|
1724
|
+
ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
|
1725
|
+
values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
|
1726
|
+
variableColumnName: str,
|
1727
|
+
valueColumnName: str,
|
1728
|
+
) -> Self:
|
1729
|
+
"""
|
1730
|
+
Unpivot a DataFrame from wide format to long format, optionally leaving
|
1731
|
+
identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
|
1732
|
+
except for the aggregation, which cannot be reversed.
|
1733
|
+
|
1734
|
+
This function is useful to massage a DataFrame into a format where some
|
1735
|
+
columns are identifier columns ("ids"), while all other columns ("values")
|
1736
|
+
are "unpivoted" to the rows, leaving just two non-id columns, named as given
|
1737
|
+
by `variableColumnName` and `valueColumnName`.
|
1738
|
+
|
1739
|
+
When no "id" columns are given, the unpivoted DataFrame consists of only the
|
1740
|
+
"variable" and "value" columns.
|
1741
|
+
|
1742
|
+
The `values` columns must not be empty so at least one value must be given to be unpivoted.
|
1743
|
+
When `values` is `None`, all non-id columns will be unpivoted.
|
1744
|
+
|
1745
|
+
All "value" columns must share a least common data type. Unless they are the same data type,
|
1746
|
+
all "value" columns are cast to the nearest common data type. For instance, types
|
1747
|
+
`IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
|
1748
|
+
do not have a common data type and `unpivot` fails.
|
1749
|
+
|
1750
|
+
.. versionadded:: 3.4.0
|
1751
|
+
|
1752
|
+
Parameters
|
1753
|
+
----------
|
1754
|
+
ids : str, Column, tuple, list
|
1755
|
+
Column(s) to use as identifiers. Can be a single column or column name,
|
1756
|
+
or a list or tuple for multiple columns.
|
1757
|
+
values : str, Column, tuple, list, optional
|
1758
|
+
Column(s) to unpivot. Can be a single column or column name, or a list or tuple
|
1759
|
+
for multiple columns. If specified, must not be empty. If not specified, uses all
|
1760
|
+
columns that are not set as `ids`.
|
1761
|
+
variableColumnName : str
|
1762
|
+
Name of the variable column.
|
1763
|
+
valueColumnName : str
|
1764
|
+
Name of the value column.
|
1765
|
+
|
1766
|
+
Returns
|
1767
|
+
-------
|
1768
|
+
:class:`DataFrame`
|
1769
|
+
Unpivoted DataFrame.
|
1770
|
+
|
1771
|
+
Notes
|
1772
|
+
-----
|
1773
|
+
Supports Spark Connect.
|
1774
|
+
|
1775
|
+
Examples
|
1776
|
+
--------
|
1777
|
+
>>> df = spark.createDataFrame(
|
1778
|
+
... [(1, 11, 1.1), (2, 12, 1.2)],
|
1779
|
+
... ["id", "int", "double"],
|
1780
|
+
... )
|
1781
|
+
>>> df.show()
|
1782
|
+
+---+---+------+
|
1783
|
+
| id|int|double|
|
1784
|
+
+---+---+------+
|
1785
|
+
| 1| 11| 1.1|
|
1786
|
+
| 2| 12| 1.2|
|
1787
|
+
+---+---+------+
|
1788
|
+
|
1789
|
+
>>> df.unpivot("id", ["int", "double"], "var", "val").show()
|
1790
|
+
+---+------+----+
|
1791
|
+
| id| var| val|
|
1792
|
+
+---+------+----+
|
1793
|
+
| 1| int|11.0|
|
1794
|
+
| 1|double| 1.1|
|
1795
|
+
| 2| int|12.0|
|
1796
|
+
| 2|double| 1.2|
|
1797
|
+
+---+------+----+
|
1798
|
+
|
1799
|
+
See Also
|
1800
|
+
--------
|
1801
|
+
DataFrame.melt
|
1802
|
+
"""
|
1803
|
+
from sqlframe.base import functions as F
|
1804
|
+
|
1805
|
+
id_columns = self._ensure_and_normalize_cols(ids)
|
1806
|
+
if not values:
|
1807
|
+
outer_selects = self._get_outer_select_columns(self.expression)
|
1808
|
+
values = [
|
1809
|
+
column
|
1810
|
+
for column in outer_selects
|
1811
|
+
if column.alias_or_name not in {x.alias_or_name for x in id_columns}
|
1812
|
+
]
|
1813
|
+
value_columns = self._ensure_and_normalize_cols(values)
|
1814
|
+
|
1815
|
+
df = self._convert_leaf_to_cte()
|
1816
|
+
selects = []
|
1817
|
+
for value in value_columns:
|
1818
|
+
selects.append(
|
1819
|
+
exp.select(
|
1820
|
+
*[x.column_expression for x in id_columns],
|
1821
|
+
F.lit(value.alias_or_name).alias(variableColumnName).expression,
|
1822
|
+
value.alias(valueColumnName).expression,
|
1823
|
+
).from_(df.expression.ctes[-1].alias_or_name)
|
1824
|
+
)
|
1825
|
+
unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
|
1826
|
+
final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
|
1827
|
+
return self.copy(expression=final_expression)._convert_leaf_to_cte()
|
1828
|
+
|
1721
1829
|
def collect(self) -> t.List[Row]:
|
1722
1830
|
return self._collect()
|
1723
1831
|
|
sqlframe/base/functions.py
CHANGED
@@ -3133,20 +3133,14 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column:
|
|
3133
3133
|
|
3134
3134
|
@meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
|
3135
3135
|
def day(col: ColumnOrName) -> Column:
|
3136
|
-
from sqlframe.base.function_alternatives import day_with_try_to_timestamp
|
3137
|
-
|
3138
3136
|
session = _get_session()
|
3139
3137
|
|
3140
3138
|
if session._is_duckdb:
|
3141
3139
|
try_to_timestamp = get_func_from_session("try_to_timestamp")
|
3142
3140
|
to_date = get_func_from_session("to_date")
|
3143
|
-
when = get_func_from_session("when")
|
3144
3141
|
_is_string = get_func_from_session("_is_string")
|
3145
3142
|
coalesce = get_func_from_session("coalesce")
|
3146
|
-
col =
|
3147
|
-
_is_string(col),
|
3148
|
-
coalesce(try_to_timestamp(col), to_date(col)),
|
3149
|
-
).otherwise(col)
|
3143
|
+
col = coalesce(try_to_timestamp(Column.ensure_col(col).cast("VARCHAR")), to_date(col))
|
3150
3144
|
|
3151
3145
|
return Column.invoke_expression_over_column(col, expression.Day)
|
3152
3146
|
|
sqlframe/base/util.py
CHANGED
@@ -316,6 +316,7 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
|
|
316
316
|
exp.DataType.Type.INT: types.IntegerType,
|
317
317
|
exp.DataType.Type.BIGINT: types.LongType,
|
318
318
|
exp.DataType.Type.SMALLINT: types.ShortType,
|
319
|
+
exp.DataType.Type.TINYINT: types.ByteType,
|
319
320
|
exp.DataType.Type.FLOAT: types.FloatType,
|
320
321
|
exp.DataType.Type.DOUBLE: types.DoubleType,
|
321
322
|
exp.DataType.Type.DECIMAL: types.DecimalType,
|
@@ -1,14 +1,14 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
6
6
|
sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
7
|
+
sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
|
8
8
|
sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
|
11
|
-
sqlframe/base/functions.py,sha256=
|
11
|
+
sqlframe/base/functions.py,sha256=nfDf2oKoBq2hrutTfuVHKmGvkm_X_ZvhfnFPv1rn0oU,222350
|
12
12
|
sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
14
14
|
sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
|
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
|
18
18
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
19
19
|
sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
|
20
20
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
21
|
-
sqlframe/base/util.py,sha256=
|
21
|
+
sqlframe/base/util.py,sha256=rdnH3Kg6gZVT3DehU_ZHjfum79vc-I5W_Il6OiCtWF4,15284
|
22
22
|
sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
|
23
23
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
24
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
|
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
129
129
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
130
130
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
131
131
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
132
|
-
sqlframe-3.
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
132
|
+
sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
133
|
+
sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
|
134
|
+
sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
135
|
+
sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
136
|
+
sqlframe-3.21.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|