PyPI - sqlframe - Versions diffs - 3.35.1__py3-none-any.whl → 3.36.0__py3-none-any.whl - Mend

sqlframe 3.35.1py3-none-any.whl → 3.36.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

sqlframe/_version.py +2 -2
sqlframe/base/function_alternatives.py +0 -4
sqlframe/base/functions.py +14 -17
sqlframe/base/group.py +121 -2
sqlframe/databricks/session.py +51 -2
{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/METADATA +3 -3
{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/RECORD +10 -10
{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/LICENSE +0 -0
{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/WHEEL +0 -0
{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/top_level.txt +0 -0

sqlframe/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.35.1'
-__version_tuple__ = version_tuple = (3, 35, 1)
+__version__ = version = '3.36.0'
+__version_tuple__ = version_tuple = (3, 36, 0)

sqlframe/base/function_alternatives.py CHANGED Viewed

@@ -1300,10 +1300,6 @@ def day_with_try_to_timestamp(col: ColumnOrName) -> Column:
     )
-def endswith_with_underscore(str: ColumnOrName, suffix: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(str, "ENDS_WITH", suffix)
 def endswith_using_like(str: ColumnOrName, suffix: ColumnOrName) -> Column:
     concat = get_func_from_session("concat")
     lit = get_func_from_session("lit")

sqlframe/base/functions.py CHANGED Viewed

@@ -2288,14 +2288,14 @@ def array_distinct(col: ColumnOrName) -> Column:
 @meta(unsupported_engines=["bigquery", "postgres"])
 def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column:
-    from sqlframe.base.function_alternatives import array_intersect_using_intersection
-    session = _get_session()
-    if session._is_snowflake:
-        return array_intersect_using_intersection(col1, col2)
-    return Column.invoke_anonymous_function(col1, "ARRAY_INTERSECT", Column.ensure_col(col2))
+    return Column(
+        expression.ArrayIntersect(
+            expressions=[
+                Column.ensure_col(col1).column_expression,
+                Column.ensure_col(col2).column_expression,
+            ]
+        )
+    )
 @meta(unsupported_engines=["postgres"])
@@ -3226,18 +3226,16 @@ def elt(*inputs: ColumnOrName) -> Column:
 def endswith(str: ColumnOrName, suffix: ColumnOrName) -> Column:
     from sqlframe.base.function_alternatives import (
         endswith_using_like,
-        endswith_with_underscore,
     )
     session = _get_session()
-    if session._is_bigquery or session._is_duckdb:
-        return endswith_with_underscore(str, suffix)
     if session._is_postgres:
         return endswith_using_like(str, suffix)
-    return Column.invoke_anonymous_function(str, "endswith", suffix)
+    return Column.invoke_expression_over_column(
+        str, expression.EndsWith, expression=Column.ensure_col(suffix).column_expression
+    )
 @meta(unsupported_engines="*")
@@ -5655,10 +5653,9 @@ def replace(
     ):
         replace = expression.Literal.string("")  # type: ignore
-    if replace is not None:
-        return Column.invoke_anonymous_function(src, "replace", search, replace)
-    else:
-        return Column.invoke_anonymous_function(src, "replace", search)
+    return Column.invoke_expression_over_column(
+        src, expression.Replace, expression=search, replacement=replace
+    )
 @meta()

sqlframe/base/group.py CHANGED Viewed

@@ -2,10 +2,16 @@
 from __future__ import annotations
+import sys
 import typing as t
 from sqlframe.base.operations import Operation, group_operation, operation
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.base.column import Column
     from sqlframe.base.session import DF
@@ -28,6 +34,8 @@ class _BaseGroupedData(t.Generic[DF]):
         self.session = df.session
         self.last_op = last_op
         self.group_by_cols = group_by_cols
+        self.pivot_col: t.Optional[str] = None
+        self.pivot_values: t.Optional[t.List[t.Any]] = None
     def _get_function_applied_columns(
         self, func_name: str, cols: t.Tuple[str, ...]
@@ -56,6 +64,79 @@ class _BaseGroupedData(t.Generic[DF]):
         )
         cols = self._df._ensure_and_normalize_cols(columns)
+        # Handle pivot transformation
+        if self.pivot_col is not None and self.pivot_values is not None:
+            from sqlglot import exp
+            from sqlframe.base import functions as F
+            # Build the pivot expression
+            # First, we need to convert the DataFrame to include the pivot logic
+            df = self._df.copy()
+            # Create the base query with group by columns, pivot column, and aggregation columns
+            select_cols = []
+            # Add group by columns
+            for col in self.group_by_cols:
+                select_cols.append(col.expression)  # type: ignore
+            # Add pivot column
+            select_cols.append(Column.ensure_col(self.pivot_col).expression)
+            # Add the value columns that will be aggregated
+            for agg_col in cols:
+                # Extract the column being aggregated from the aggregation function
+                # For example, from SUM(earnings), we want to extract 'earnings'
+                if (
+                    isinstance(agg_col.column_expression, exp.AggFunc)
+                    and agg_col.column_expression.this
+                ):
+                    if agg_col.column_expression.this not in select_cols:
+                        select_cols.append(agg_col.column_expression.this)
+            # Create the base query
+            base_query = df.expression.select(*select_cols, append=False)
+            # Build pivot expression
+            pivot_expressions = []
+            for agg_col in cols:
+                if isinstance(agg_col.column_expression, exp.AggFunc):
+                    # Clone the aggregation function
+                    # Snowflake doesn't support alias in the pivot, so we need to use the column_expression
+                    agg_func = (
+                        agg_col.column_expression.copy()
+                        if self.session._is_snowflake
+                        else agg_col.expression.copy()
+                    )
+                    pivot_expressions.append(agg_func)
+            # Create the IN clause with pivot values
+            in_values = []
+            for v in self.pivot_values:
+                if isinstance(v, str):
+                    in_values.append(exp.Literal.string(v))
+                else:
+                    in_values.append(exp.Literal.number(v))
+            # Build the pivot node with the fields parameter
+            pivot = exp.Pivot(
+                expressions=pivot_expressions,
+                fields=[
+                    exp.In(
+                        this=Column.ensure_col(self.pivot_col).column_expression,
+                        expressions=in_values,
+                    )
+                ],
+            )
+            # Create a subquery with the pivot attached
+            subquery = base_query.subquery()
+            subquery.set("pivots", [pivot])
+            # Create the final select from the pivoted subquery
+            expression = exp.select("*").from_(subquery)
+            return self._df.copy(expression=expression)
+        # Original non-pivot logic
         if not self.group_by_cols or not isinstance(self.group_by_cols[0], (list, tuple, set)):
             expression = self._df.expression.group_by(
                 # User column_expression for group by to avoid alias in group by
@@ -104,5 +185,43 @@ class _BaseGroupedData(t.Generic[DF]):
     def sum(self, *cols: str) -> DF:
         return self.agg(*self._get_function_applied_columns("sum", cols))
-    def pivot(self, *cols: str) -> DF:
-        raise NotImplementedError("Sum distinct is not currently implemented")
+    def pivot(self, pivot_col: str, values: t.Optional[t.List[t.Any]] = None) -> Self:
+        """
+        Pivots a column of the current DataFrame and perform the specified aggregation.
+        There are two versions of the pivot function: one that requires the caller
+        to specify the list of distinct values to pivot on, and one that does not.
+        The latter is more concise but less efficient, because Spark needs to first
+        compute the list of distinct values internally.
+        Parameters
+        ----------
+        pivot_col : str
+            Name of the column to pivot.
+        values : list, optional
+            List of values that will be translated to columns in the output DataFrame.
+        Returns
+        -------
+        GroupedData
+            Returns self to allow chaining with aggregation methods.
+        """
+        if self.session._is_postgres:
+            raise NotImplementedError(
+                "Pivot operation is not supported in Postgres. Please create an issue if you would like a workaround implemented."
+            )
+        self.pivot_col = pivot_col
+        if values is None:
+            # Eagerly compute distinct values
+            from sqlframe.base.column import Column
+            distinct_df = self._df.select(pivot_col).distinct()
+            distinct_rows = distinct_df.collect()
+            # Sort to make the results deterministic
+            self.pivot_values = sorted([row[0] for row in distinct_rows])
+        else:
+            self.pivot_values = values
+        return self

sqlframe/databricks/session.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
+import logging
 import typing as t
-import warnings
 from sqlframe.base.session import _BaseSession
 from sqlframe.databricks.catalog import DatabricksCatalog
@@ -19,6 +19,9 @@ else:
     DatabricksConnection = t.Any
+logger = logging.getLogger(__name__)
 class DatabricksSession(
     _BaseSession[  # type: ignore
         DatabricksCatalog,
@@ -43,14 +46,60 @@ class DatabricksSession(
         server_hostname: t.Optional[str] = None,
         http_path: t.Optional[str] = None,
         access_token: t.Optional[str] = None,
+        **kwargs: t.Any,
     ):
         from databricks import sql
+        self._conn_kwargs = (
+            {}
+            if conn
+            else {
+                "server_hostname": server_hostname,
+                "http_path": http_path,
+                "access_token": access_token,
+                "disable_pandas": True,
+                **kwargs,
+            }
+        )
         if not hasattr(self, "_conn"):
             super().__init__(
-                conn or sql.connect(server_hostname, http_path, access_token, disable_pandas=True)
+                conn or sql.connect(**self._conn_kwargs),
             )
+    def _execute(self, sql: str) -> None:
+        from databricks.sql import connect
+        from databricks.sql.exc import DatabaseError, RequestError
+        try:
+            super()._execute(sql)
+        except (DatabaseError, RequestError) as e:
+            logger.warning("Failed to execute query")
+            if not self._is_session_expired_error(e):
+                logger.error("Error is not related to session expiration, re-raising")
+                raise e
+            if self._conn_kwargs:
+                logger.info("Attempting to reconnect with provided connection parameters")
+                self._connection = connect(**self._conn_kwargs)
+                # Clear the cached cursor
+                if hasattr(self, "_cur"):
+                    delattr(self, "_cur")
+                super()._execute(sql)
+            else:
+                logger.error("No connection parameters provided so could not reconnect")
+                raise
+    def _is_session_expired_error(self, error: Exception) -> bool:
+        error_str = str(error).lower()
+        session_keywords = [
+            "invalid sessionhandle",
+            "session is closed",
+            "session expired",
+            "session not found",
+            "sessionhandle",
+        ]
+        return any(keyword in error_str for keyword in session_keywords)
     @classmethod
     def _try_get_map(cls, value: t.Any) -> t.Optional[t.Dict[str, t.Any]]:
         if (

{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 3.35.1
+Version: 3.36.0
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: prettytable <4
-Requires-Dist: sqlglot <26.26,>=24.0.0
+Requires-Dist: sqlglot <26.32,>=24.0.0
 Requires-Dist: typing-extensions
 Provides-Extra: bigquery
 Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -39,7 +39,7 @@ Requires-Dist: pytest-forked ; extra == 'dev'
 Requires-Dist: pytest-postgresql <8,>=6 ; extra == 'dev'
 Requires-Dist: pytest-xdist <3.8,>=3.6 ; extra == 'dev'
 Requires-Dist: pytest <8.5,>=8.2.0 ; extra == 'dev'
-Requires-Dist: ruff <0.12,>=0.4.4 ; extra == 'dev'
+Requires-Dist: ruff <0.13,>=0.4.4 ; extra == 'dev'
 Requires-Dist: types-psycopg2 <3,>=2.9 ; extra == 'dev'
 Provides-Extra: docs
 Requires-Dist: mkdocs-include-markdown-plugin ==6.0.6 ; extra == 'docs'

{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
-sqlframe/_version.py,sha256=kPcRtrGIJvBSXjEXIsPZ4vA33McEBwn6hXm6zOraFmM,513
+sqlframe/_version.py,sha256=bkUPQ6OdlXKrD5knIV3EChl0OWjLm_VJDu9m0db4vwg,513
 sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
@@ -8,9 +8,9 @@ sqlframe/base/column.py,sha256=5ZnZcn6gCCrAL53-EEHxVQWXG2oijN3RCOhlWmsjbJM,21147
 sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85974
 sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
 sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
-sqlframe/base/function_alternatives.py,sha256=EKtDgYyaJSfaSfhs_IemDkpy6VK2E8V6fDvjAqKR_tM,51880
-sqlframe/base/functions.py,sha256=geB8QRQvyOipB3v_gOC5KhhB--UpKJH0z2dbyRNCNaI,225983
-sqlframe/base/group.py,sha256=OY4w1WRsCqLgW-Pi7DjF63zbbxSLISCF3qjAbzI2CQ4,4283
+sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
+sqlframe/base/functions.py,sha256=qyV-4R4CPSkuS-0S3dPza0BZykoKAanxjQq83tu8L34,225778
+sqlframe/base/group.py,sha256=PGxUAnZkNlYKBIVNzoEDtoHbsP9Rhy1bGcSg2eYuWF4,9015
 sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
 sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
 sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
@@ -47,7 +47,7 @@ sqlframe/databricks/functions.py,sha256=La8rjAwO0hD4FBO0QxW5CtZtFAPvOrVc6lG4OtPG
 sqlframe/databricks/functions.pyi,sha256=FzVBpzXCJzxIp73sIAo_R8Wx8uOJrix-W12HsgyeTcQ,23799
 sqlframe/databricks/group.py,sha256=dU3g0DVLRlfOSCamKchQFXRd1WTFbdxoXkpEX8tPD6Y,399
 sqlframe/databricks/readwriter.py,sha256=cuGRI1G627JEZgGNtirrT8LAwT6xQCdgkSAETmLKNXU,14777
-sqlframe/databricks/session.py,sha256=iw4uczkJHkpVO8vusEEmfCrhxHWyAHpCFmOZ-0qlkms,2343
+sqlframe/databricks/session.py,sha256=i2CgrLIHJb53Cx1qu_rE1-cmmm19S-Sw1MhTISX1zYU,4013
 sqlframe/databricks/table.py,sha256=Q0Vnrl5aUqnqFTQpTwfWMRyQ9AQnagtpnSnXmP6IKRs,678
 sqlframe/databricks/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
 sqlframe/databricks/udf.py,sha256=3rmxv_6zSLfIxH8P8P050ZO-ki0aqBb9wWuUQBtl4m8,272
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
 sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
-sqlframe-3.35.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-3.35.1.dist-info/METADATA,sha256=T1Zjv7wX8XssCXYaXa0mrRAR8Br1Udv8Brw0ZqeWj3I,8987
-sqlframe-3.35.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-sqlframe-3.35.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-3.35.1.dist-info/RECORD,,
+sqlframe-3.36.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-3.36.0.dist-info/METADATA,sha256=F56M3UKMA8CZN2Ps3dAkputINvX8rhBcPKTiAuC5iEs,8987
+sqlframe-3.36.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+sqlframe-3.36.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-3.36.0.dist-info/RECORD,,

{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-3.35.1.dist-info → sqlframe-3.36.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 3.35.1__py3-none-any.whl → 3.36.0__py3-none-any.whl

sqlframe 3.35.1py3-none-any.whl → 3.36.0py3-none-any.whl