sqlframe 3.19.0__py3-none-any.whl → 3.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.19.0'
16
- __version_tuple__ = version_tuple = (3, 19, 0)
15
+ __version__ = version = '3.21.0'
16
+ __version_tuple__ = version_tuple = (3, 21, 0)
@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1718
1718
  grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
1719
1719
  return self._group_data(self, grouping_columns, self.last_op)
1720
1720
 
1721
+ @operation(Operation.SELECT)
1722
+ def unpivot(
1723
+ self,
1724
+ ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
1725
+ values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
1726
+ variableColumnName: str,
1727
+ valueColumnName: str,
1728
+ ) -> Self:
1729
+ """
1730
+ Unpivot a DataFrame from wide format to long format, optionally leaving
1731
+ identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
1732
+ except for the aggregation, which cannot be reversed.
1733
+
1734
+ This function is useful to massage a DataFrame into a format where some
1735
+ columns are identifier columns ("ids"), while all other columns ("values")
1736
+ are "unpivoted" to the rows, leaving just two non-id columns, named as given
1737
+ by `variableColumnName` and `valueColumnName`.
1738
+
1739
+ When no "id" columns are given, the unpivoted DataFrame consists of only the
1740
+ "variable" and "value" columns.
1741
+
1742
+ The `values` columns must not be empty so at least one value must be given to be unpivoted.
1743
+ When `values` is `None`, all non-id columns will be unpivoted.
1744
+
1745
+ All "value" columns must share a least common data type. Unless they are the same data type,
1746
+ all "value" columns are cast to the nearest common data type. For instance, types
1747
+ `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
1748
+ do not have a common data type and `unpivot` fails.
1749
+
1750
+ .. versionadded:: 3.4.0
1751
+
1752
+ Parameters
1753
+ ----------
1754
+ ids : str, Column, tuple, list
1755
+ Column(s) to use as identifiers. Can be a single column or column name,
1756
+ or a list or tuple for multiple columns.
1757
+ values : str, Column, tuple, list, optional
1758
+ Column(s) to unpivot. Can be a single column or column name, or a list or tuple
1759
+ for multiple columns. If specified, must not be empty. If not specified, uses all
1760
+ columns that are not set as `ids`.
1761
+ variableColumnName : str
1762
+ Name of the variable column.
1763
+ valueColumnName : str
1764
+ Name of the value column.
1765
+
1766
+ Returns
1767
+ -------
1768
+ :class:`DataFrame`
1769
+ Unpivoted DataFrame.
1770
+
1771
+ Notes
1772
+ -----
1773
+ Supports Spark Connect.
1774
+
1775
+ Examples
1776
+ --------
1777
+ >>> df = spark.createDataFrame(
1778
+ ... [(1, 11, 1.1), (2, 12, 1.2)],
1779
+ ... ["id", "int", "double"],
1780
+ ... )
1781
+ >>> df.show()
1782
+ +---+---+------+
1783
+ | id|int|double|
1784
+ +---+---+------+
1785
+ | 1| 11| 1.1|
1786
+ | 2| 12| 1.2|
1787
+ +---+---+------+
1788
+
1789
+ >>> df.unpivot("id", ["int", "double"], "var", "val").show()
1790
+ +---+------+----+
1791
+ | id| var| val|
1792
+ +---+------+----+
1793
+ | 1| int|11.0|
1794
+ | 1|double| 1.1|
1795
+ | 2| int|12.0|
1796
+ | 2|double| 1.2|
1797
+ +---+------+----+
1798
+
1799
+ See Also
1800
+ --------
1801
+ DataFrame.melt
1802
+ """
1803
+ from sqlframe.base import functions as F
1804
+
1805
+ id_columns = self._ensure_and_normalize_cols(ids)
1806
+ if not values:
1807
+ outer_selects = self._get_outer_select_columns(self.expression)
1808
+ values = [
1809
+ column
1810
+ for column in outer_selects
1811
+ if column.alias_or_name not in {x.alias_or_name for x in id_columns}
1812
+ ]
1813
+ value_columns = self._ensure_and_normalize_cols(values)
1814
+
1815
+ df = self._convert_leaf_to_cte()
1816
+ selects = []
1817
+ for value in value_columns:
1818
+ selects.append(
1819
+ exp.select(
1820
+ *[x.column_expression for x in id_columns],
1821
+ F.lit(value.alias_or_name).alias(variableColumnName).expression,
1822
+ value.alias(valueColumnName).expression,
1823
+ ).from_(df.expression.ctes[-1].alias_or_name)
1824
+ )
1825
+ unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects) # type: ignore
1826
+ final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
1827
+ return self.copy(expression=final_expression)._convert_leaf_to_cte()
1828
+
1721
1829
  def collect(self) -> t.List[Row]:
1722
1830
  return self._collect()
1723
1831
 
@@ -3133,20 +3133,14 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column:
3133
3133
 
3134
3134
  @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
3135
3135
  def day(col: ColumnOrName) -> Column:
3136
- from sqlframe.base.function_alternatives import day_with_try_to_timestamp
3137
-
3138
3136
  session = _get_session()
3139
3137
 
3140
3138
  if session._is_duckdb:
3141
3139
  try_to_timestamp = get_func_from_session("try_to_timestamp")
3142
3140
  to_date = get_func_from_session("to_date")
3143
- when = get_func_from_session("when")
3144
3141
  _is_string = get_func_from_session("_is_string")
3145
3142
  coalesce = get_func_from_session("coalesce")
3146
- col = when(
3147
- _is_string(col),
3148
- coalesce(try_to_timestamp(col), to_date(col)),
3149
- ).otherwise(col)
3143
+ col = coalesce(try_to_timestamp(Column.ensure_col(col).cast("VARCHAR")), to_date(col))
3150
3144
 
3151
3145
  return Column.invoke_expression_over_column(col, expression.Day)
3152
3146
 
sqlframe/base/util.py CHANGED
@@ -316,6 +316,7 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
316
316
  exp.DataType.Type.INT: types.IntegerType,
317
317
  exp.DataType.Type.BIGINT: types.LongType,
318
318
  exp.DataType.Type.SMALLINT: types.ShortType,
319
+ exp.DataType.Type.TINYINT: types.ByteType,
319
320
  exp.DataType.Type.FLOAT: types.FloatType,
320
321
  exp.DataType.Type.DOUBLE: types.DoubleType,
321
322
  exp.DataType.Type.DECIMAL: types.DecimalType,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.19.0
3
+ Version: 3.21.0
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,14 +1,14 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=bRGLbmtauY86O6qq58KRvSDdCcwrGM24X-Zm0Elw0sU,413
2
+ sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
7
- sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
7
+ sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
8
8
  sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
11
- sqlframe/base/functions.py,sha256=j_Sh4qIcR-2lesJT_2TzBlTIM46os35AcmMuwBm86DE,222512
11
+ sqlframe/base/functions.py,sha256=nfDf2oKoBq2hrutTfuVHKmGvkm_X_ZvhfnFPv1rn0oU,222350
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
18
18
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
19
19
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
20
20
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
21
- sqlframe/base/util.py,sha256=ggiGdegJ-Re-xk6PLB5tt6yAW9S7pg3xsrFm0xU3XCc,15233
21
+ sqlframe/base/util.py,sha256=rdnH3Kg6gZVT3DehU_ZHjfum79vc-I5W_Il6OiCtWF4,15284
22
22
  sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
23
23
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.19.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.19.0.dist-info/METADATA,sha256=t_G87pTEVYezUc-A5TIumPN-sHNsgTjW8vNgZ4Jvjpw,8970
134
- sqlframe-3.19.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.19.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.19.0.dist-info/RECORD,,
132
+ sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
134
+ sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.21.0.dist-info/RECORD,,