PyPI - sqlframe - Versions diffs - 3.19.0__py3-none-any.whl → 3.21.0__py3-none-any.whl - Mend

sqlframe 3.19.0py3-none-any.whl → 3.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

sqlframe/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.19.0'
-__version_tuple__ = version_tuple = (3, 19, 0)
+__version__ = version = '3.21.0'
+__version_tuple__ = version_tuple = (3, 21, 0)

sqlframe/base/dataframe.py CHANGED Viewed

@@ -1718,6 +1718,114 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
             grouping_columns.extend([list(x) for x in itertools.combinations(columns, i)])
         return self._group_data(self, grouping_columns, self.last_op)
+    @operation(Operation.SELECT)
+    def unpivot(
+        self,
+        ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
+        values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
+        variableColumnName: str,
+        valueColumnName: str,
+    ) -> Self:
+        """
+        Unpivot a DataFrame from wide format to long format, optionally leaving
+        identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
+        except for the aggregation, which cannot be reversed.
+        This function is useful to massage a DataFrame into a format where some
+        columns are identifier columns ("ids"), while all other columns ("values")
+        are "unpivoted" to the rows, leaving just two non-id columns, named as given
+        by `variableColumnName` and `valueColumnName`.
+        When no "id" columns are given, the unpivoted DataFrame consists of only the
+        "variable" and "value" columns.
+        The `values` columns must not be empty so at least one value must be given to be unpivoted.
+        When `values` is `None`, all non-id columns will be unpivoted.
+        All "value" columns must share a least common data type. Unless they are the same data type,
+        all "value" columns are cast to the nearest common data type. For instance, types
+        `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
+        do not have a common data type and `unpivot` fails.
+        .. versionadded:: 3.4.0
+        Parameters
+        ----------
+        ids : str, Column, tuple, list
+            Column(s) to use as identifiers. Can be a single column or column name,
+            or a list or tuple for multiple columns.
+        values : str, Column, tuple, list, optional
+            Column(s) to unpivot. Can be a single column or column name, or a list or tuple
+            for multiple columns. If specified, must not be empty. If not specified, uses all
+            columns that are not set as `ids`.
+        variableColumnName : str
+            Name of the variable column.
+        valueColumnName : str
+            Name of the value column.
+        Returns
+        -------
+        :class:`DataFrame`
+            Unpivoted DataFrame.
+        Notes
+        -----
+        Supports Spark Connect.
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...     [(1, 11, 1.1), (2, 12, 1.2)],
+        ...     ["id", "int", "double"],
+        ... )
+        >>> df.show()
+        +---+---+------+
+        | id|int|double|
+        +---+---+------+
+        |  1| 11|   1.1|
+        |  2| 12|   1.2|
+        +---+---+------+
+        >>> df.unpivot("id", ["int", "double"], "var", "val").show()
+        +---+------+----+
+        | id|   var| val|
+        +---+------+----+
+        |  1|   int|11.0|
+        |  1|double| 1.1|
+        |  2|   int|12.0|
+        |  2|double| 1.2|
+        +---+------+----+
+        See Also
+        --------
+        DataFrame.melt
+        """
+        from sqlframe.base import functions as F
+        id_columns = self._ensure_and_normalize_cols(ids)
+        if not values:
+            outer_selects = self._get_outer_select_columns(self.expression)
+            values = [
+                column
+                for column in outer_selects
+                if column.alias_or_name not in {x.alias_or_name for x in id_columns}
+            ]
+        value_columns = self._ensure_and_normalize_cols(values)
+        df = self._convert_leaf_to_cte()
+        selects = []
+        for value in value_columns:
+            selects.append(
+                exp.select(
+                    *[x.column_expression for x in id_columns],
+                    F.lit(value.alias_or_name).alias(variableColumnName).expression,
+                    value.alias(valueColumnName).expression,
+                ).from_(df.expression.ctes[-1].alias_or_name)
+            )
+        unioned_expression = functools.reduce(lambda x, y: x.union(y, distinct=False), selects)  # type: ignore
+        final_expression = self._add_ctes_to_expression(unioned_expression, df.expression.ctes)
+        return self.copy(expression=final_expression)._convert_leaf_to_cte()
     def collect(self) -> t.List[Row]:
         return self._collect()

sqlframe/base/functions.py CHANGED Viewed

@@ -3133,20 +3133,14 @@ def datepart(field: ColumnOrName, source: ColumnOrName) -> Column:
 @meta(unsupported_engines=["bigquery", "postgres", "snowflake"])
 def day(col: ColumnOrName) -> Column:
-    from sqlframe.base.function_alternatives import day_with_try_to_timestamp
     session = _get_session()
     if session._is_duckdb:
         try_to_timestamp = get_func_from_session("try_to_timestamp")
         to_date = get_func_from_session("to_date")
-        when = get_func_from_session("when")
         _is_string = get_func_from_session("_is_string")
         coalesce = get_func_from_session("coalesce")
-        col = when(
-            _is_string(col),
-            coalesce(try_to_timestamp(col), to_date(col)),
-        ).otherwise(col)
+        col = coalesce(try_to_timestamp(Column.ensure_col(col).cast("VARCHAR")), to_date(col))
     return Column.invoke_expression_over_column(col, expression.Day)

sqlframe/base/util.py CHANGED Viewed

@@ -316,6 +316,7 @@ def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
         exp.DataType.Type.INT: types.IntegerType,
         exp.DataType.Type.BIGINT: types.LongType,
         exp.DataType.Type.SMALLINT: types.ShortType,
+        exp.DataType.Type.TINYINT: types.ByteType,
         exp.DataType.Type.FLOAT: types.FloatType,
         exp.DataType.Type.DOUBLE: types.DoubleType,
         exp.DataType.Type.DECIMAL: types.DecimalType,

{sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 3.19.0
+Version: 3.21.0
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
 sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
-sqlframe/_version.py,sha256=bRGLbmtauY86O6qq58KRvSDdCcwrGM24X-Zm0Elw0sU,413
+sqlframe/_version.py,sha256=TJ7uVN2zVQAjIGbjv5aK_3Ly4C1owCoTivN0RRSBWsU,413
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
 sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
 sqlframe/base/column.py,sha256=oHVwkSWABO3ZlAbgBShsxSSlgbI06BOup5XJrRhgqJI,18097
-sqlframe/base/dataframe.py,sha256=mKXbIKYiKH5mh6qj0Dg7L_znmCL85q9kHlmHtCW4kJ4,79352
+sqlframe/base/dataframe.py,sha256=TGIU6VMjeDS1VxlC35XcPCmpNBPNFnTxl2IaatKzR-4,83590
 sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
 sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
 sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
-sqlframe/base/functions.py,sha256=j_Sh4qIcR-2lesJT_2TzBlTIM46os35AcmMuwBm86DE,222512
+sqlframe/base/functions.py,sha256=nfDf2oKoBq2hrutTfuVHKmGvkm_X_ZvhfnFPv1rn0oU,222350
 sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
 sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
 sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
@@ -18,7 +18,7 @@ sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
 sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
 sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
 sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
-sqlframe/base/util.py,sha256=ggiGdegJ-Re-xk6PLB5tt6yAW9S7pg3xsrFm0xU3XCc,15233
+sqlframe/base/util.py,sha256=rdnH3Kg6gZVT3DehU_ZHjfum79vc-I5W_Il6OiCtWF4,15284
 sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
 sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/mixins/catalog_mixins.py,sha256=9tn0mK8oPoqIIjNItystD5tdBMdK9YpkxTG7G9KQl8k,18619
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
 sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
-sqlframe-3.19.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-3.19.0.dist-info/METADATA,sha256=t_G87pTEVYezUc-A5TIumPN-sHNsgTjW8vNgZ4Jvjpw,8970
-sqlframe-3.19.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-sqlframe-3.19.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-3.19.0.dist-info/RECORD,,
+sqlframe-3.21.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-3.21.0.dist-info/METADATA,sha256=WijzmVzIYe83YaO5ZzqLG0mVl75PjYhhksrSMbDCCGw,8970
+sqlframe-3.21.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+sqlframe-3.21.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-3.21.0.dist-info/RECORD,,

{sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-3.19.0.dist-info → sqlframe-3.21.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 3.19.0__py3-none-any.whl → 3.21.0__py3-none-any.whl

sqlframe 3.19.0py3-none-any.whl → 3.21.0py3-none-any.whl