PyPI - sqlframe - Versions diffs - 3.39.1__py3-none-any.whl → 3.39.3__py3-none-any.whl - Mend

sqlframe 3.39.1py3-none-any.whl → 3.39.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

sqlframe/_version.py +3 -3
sqlframe/base/dataframe.py +53 -24
sqlframe/base/functions.py +12 -4
sqlframe/base/operations.py +4 -3
sqlframe/base/session.py +6 -1
sqlframe/base/util.py +15 -0
{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/METADATA +3 -2
{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/RECORD +11 -11
{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/LICENSE +0 -0
{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/WHEEL +0 -0
{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/top_level.txt +0 -0

sqlframe/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '3.39.1'
-__version_tuple__ = version_tuple = (3, 39, 1)
+__version__ = version = '3.39.3'
+__version_tuple__ = version_tuple = (3, 39, 3)
-__commit_id__ = commit_id = 'g0245f44bb'
+__commit_id__ = commit_id = 'g9d915cb1e'

sqlframe/base/dataframe.py CHANGED Viewed

@@ -16,6 +16,7 @@ from dataclasses import dataclass
 from uuid import uuid4
 import sqlglot
+from more_itertools import partition
 from prettytable import PrettyTable
 from sqlglot import Dialect, maybe_parse
 from sqlglot import expressions as exp
@@ -31,6 +32,7 @@ from sqlframe.base.util import (
     get_func_from_session,
     get_tables_from_expression_with_join,
     normalize_string,
+    partition_to,
     quote_preserving_alias_or_name,
     sqlglot_to_spark,
     verify_openai_installed,
@@ -540,16 +542,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         expression.set("with", exp.With(expressions=existing_ctes))
         return expression
+    @classmethod
+    def _get_outer_select_expressions(
+        cls, item: exp.Expression
+    ) -> t.List[t.Union[exp.Column, exp.Alias]]:
+        outer_select = item.find(exp.Select)
+        if outer_select:
+            return outer_select.expressions
+        return []
     @classmethod
     def _get_outer_select_columns(cls, item: exp.Expression) -> t.List[Column]:
         from sqlframe.base.session import _BaseSession
         col = get_func_from_session("col", _BaseSession())
-        outer_select = item.find(exp.Select)
-        if outer_select:
-            return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
-        return []
+        outer_expressions = cls._get_outer_select_expressions(item)
+        return [col(quote_preserving_alias_or_name(x)) for x in outer_expressions]
     def _create_hash_from_expression(self, expression: exp.Expression) -> str:
         from sqlframe.base.session import _BaseSession
@@ -1503,20 +1512,23 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         """
         return func(self, *args, **kwargs)  # type: ignore
-    @operation(Operation.SELECT)
+    @operation(Operation.SELECT_CONSTRAINED)
     def withColumn(self, colName: str, col: Column) -> Self:
         return self.withColumns.__wrapped__(self, {colName: col})  # type: ignore
-    @operation(Operation.SELECT)
+    @operation(Operation.SELECT_CONSTRAINED)
     def withColumnRenamed(self, existing: str, new: str) -> Self:
+        col_func = get_func_from_session("col", self.session)
         expression = self.expression.copy()
         existing = self.session._normalize_string(existing)
-        columns = self._get_outer_select_columns(expression)
+        outer_expressions = self._get_outer_select_expressions(expression)
         results = []
         found_match = False
-        for column in columns:
-            if column.alias_or_name == existing:
-                column = column.alias(new)
+        for expr in outer_expressions:
+            column = col_func(expr.copy())
+            if existing == quote_preserving_alias_or_name(expr):
+                if isinstance(column.expression, exp.Alias):
+                    column.expression.set("alias", exp.to_identifier(new))
                 self._update_display_name_mapping([column], [new])
                 found_match = True
             results.append(column)
@@ -1524,7 +1536,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
             raise ValueError("Tried to rename a column that doesn't exist")
         return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True)  # type: ignore
-    @operation(Operation.SELECT)
+    @operation(Operation.SELECT_CONSTRAINED)
     def withColumnsRenamed(self, colsMap: t.Dict[str, str]) -> Self:
         """
         Returns a new :class:`DataFrame` by renaming multiple columns. If a non-existing column is
@@ -1570,7 +1582,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True)  # type: ignore
-    @operation(Operation.SELECT)
+    @operation(Operation.SELECT_CONSTRAINED)
     def withColumns(self, *colsMap: t.Dict[str, Column]) -> Self:
         """
         Returns a new :class:`DataFrame` by adding multiple columns or replacing the
@@ -1608,13 +1620,14 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         """
         if len(colsMap) != 1:
             raise ValueError("Only a single map is supported")
+        col_func = get_func_from_session("col")
         col_map = {
             self._ensure_and_normalize_col(k): (self._ensure_and_normalize_col(v), k)
             for k, v in colsMap[0].items()
         }
-        existing_cols = self._get_outer_select_columns(self.expression)
-        existing_col_names = [x.alias_or_name for x in existing_cols]
-        select_columns = existing_cols
+        existing_expr = self._get_outer_select_expressions(self.expression)
+        existing_col_names = [x.alias_or_name for x in existing_expr]
+        select_columns = [col_func(x) for x in existing_expr]
         for col, (col_value, display_name) in col_map.items():
             column_name = col.alias_or_name
             existing_col_index = (
@@ -1631,16 +1644,32 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         )
         return self.select.__wrapped__(self, *select_columns, skip_update_display_name_mapping=True)  # type: ignore
-    @operation(Operation.SELECT)
+    @operation(Operation.SELECT_CONSTRAINED)
     def drop(self, *cols: t.Union[str, Column]) -> Self:
-        all_columns = self._get_outer_select_columns(self.expression)
-        drop_cols = self._ensure_and_normalize_cols(cols)
-        new_columns = [
-            col
-            for col in all_columns
-            if col.alias_or_name not in [drop_column.alias_or_name for drop_column in drop_cols]
-        ]
-        return self.copy().select(*new_columns, append=False)
+        # Separate string column names from Column objects for different handling
+        column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)
+        # Normalize only the Column objects (strings will be handled as unqualified)
+        drop_cols = self._ensure_and_normalize_cols(column_objs) if column_objs else []
+        # Work directly with the expression's select columns to preserve table qualifiers
+        current_expressions = self.expression.expressions
+        drop_sql = {drop_col.expression.sql() for drop_col in drop_cols}
+        # Create a more sophisticated matching function that considers table qualifiers
+        def should_drop_expression(expr: exp.Expression) -> bool:
+            # Check against fully qualified Column objects and
+            # Check against unqualified string column names (drop ALL columns with this name)
+            if expr.sql() in drop_sql or (
+                isinstance(expr, exp.Column) and expr.alias_or_name in column_names
+            ):
+                return True
+            return False
+        new_expressions = [expr for expr in current_expressions if not should_drop_expression(expr)]
+        return self.select.__wrapped__(  # type: ignore
+            self, *new_expressions, skip_update_display_name_mapping=True
+        )
     @operation(Operation.LIMIT)
     def limit(self, num: int) -> Self:

sqlframe/base/functions.py CHANGED Viewed

@@ -1450,6 +1450,9 @@ def unix_timestamp(
     session = _get_session()
+    if session._is_duckdb or session._is_postgres or session._is_snowflake or session._is_bigquery:
+        timestamp = Column.ensure_col(timestamp).cast("string")
     if session._is_bigquery:
         return unix_timestamp_bgutil(timestamp, format)
@@ -1984,7 +1987,7 @@ def initcap(col: ColumnOrName) -> Column:
 @meta()
 def soundex(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "SOUNDEX")
+    return Column.invoke_expression_over_column(col, expression.Soundex)
 @meta(unsupported_engines=["postgres", "snowflake"])
@@ -2053,7 +2056,11 @@ def bit_length(col: ColumnOrName) -> Column:
 @meta()
 def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column:
-    return Column.invoke_anonymous_function(srcCol, "TRANSLATE", lit(matching), lit(replace))
+    return Column.invoke_expression_over_column(
+        srcCol,
+        expression.Translate,
+        **{"from": lit(matching).column_expression, "to": lit(replace).column_expression},
+    )
 @meta()
@@ -3380,7 +3387,7 @@ def get_active_spark_context() -> SparkContext:
     return session.spark_session.sparkContext
-@meta(unsupported_engines="*")
+@meta()
 def grouping(col: ColumnOrName) -> Column:
     """
     Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated
@@ -3413,7 +3420,7 @@ def grouping(col: ColumnOrName) -> Column:
     |  Bob|             0|       5|
     +-----+--------------+--------+
     """
-    return Column.invoke_anonymous_function(col, "grouping")
+    return Column(expression.Grouping(expressions=[Column.ensure_col(col).column_expression]))
 @meta(unsupported_engines="*")
@@ -6338,6 +6345,7 @@ def to_unix_timestamp(
     if session._is_duckdb:
         format = format or _BaseSession().default_time_format
+        timestamp = Column.ensure_col(timestamp).cast("string")
     if format is not None:
         return Column.invoke_expression_over_column(

sqlframe/base/operations.py CHANGED Viewed

@@ -27,9 +27,10 @@ class Operation(IntEnum):
     WHERE = 2
     GROUP_BY = 3
     HAVING = 4
-    SELECT = 5
-    ORDER_BY = 6
-    LIMIT = 7
+    SELECT_CONSTRAINED = 5
+    SELECT = 6
+    ORDER_BY = 7
+    LIMIT = 8
 # We want to decorate a function (self: DF, *args, **kwargs) -> T

sqlframe/base/session.py CHANGED Viewed

@@ -179,7 +179,7 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
         return self._table(self, *args, **kwargs)
     def __new__(cls, *args, **kwargs):
-        if _BaseSession._instance is None:
+        if _BaseSession._instance is None or not isinstance(_BaseSession._instance, cls):
             _BaseSession._instance = super().__new__(cls)
         return _BaseSession._instance
@@ -194,6 +194,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
     def getActiveSession(self) -> Self:
         return self
+    def stop(self) -> None:
+        if connection := getattr(self, "_connection", None):
+            connection.close()
+        _BaseSession._instance = None
     def range(
         self,
         start: int,

sqlframe/base/util.py CHANGED Viewed

@@ -6,6 +6,7 @@ import string
 import typing as t
 import unicodedata
+from more_itertools import partition
 from sqlglot import expressions as exp
 from sqlglot import parse_one, to_table
 from sqlglot.dialects import DuckDB
@@ -537,3 +538,17 @@ def is_relativedelta_like(value: t.Any) -> bool:
         and hasattr(value, "weeks")
         and hasattr(value, "leapdays")
     )
+T = t.TypeVar("T")
+R1 = t.TypeVar("R1")
+R2 = t.TypeVar("R2")
+def partition_to(
+    pred: t.Callable[[T], bool],
+    iterable: t.Iterable[T],
+    result1: t.Type[R1],
+    result2: t.Type[R2],
+) -> tuple[R1, R2]:
+    return (lambda x, y: (result1(x), result2(y)))(*partition(pred, iterable))  # type: ignore

{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 3.39.1
+Version: 3.39.3
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman
@@ -16,8 +16,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: more-itertools
 Requires-Dist: prettytable <4
-Requires-Dist: sqlglot <27.8,>=24.0.0
+Requires-Dist: sqlglot <27.9,>=24.0.0
 Requires-Dist: typing-extensions
 Provides-Extra: bigquery
 Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'

{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/RECORD RENAMED Viewed

@@ -1,25 +1,25 @@
 sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
-sqlframe/_version.py,sha256=Magv3v4P13Mop8QdiPMhhrwCJ78Gp6qb0vlJrw80tno,714
+sqlframe/_version.py,sha256=Vixv4hfZnHHXCXSmZD4wlHJUBkhCMzDLIyo5HqkJdes,714
 sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
 sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
 sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
-sqlframe/base/dataframe.py,sha256=0diYONDlet8iZt49LC3vcmfXHAAZ2MovPL2pTXYHj2U,85974
+sqlframe/base/dataframe.py,sha256=HHjDaeap4_w4HRRj87lhQjFTczxLKhFD8b-9vhK2KsY,87592
 sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
 sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
 sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
-sqlframe/base/functions.py,sha256=Hd77xVVOBeD4wr08OeCwFJa89LHAZHsMjZXl3cg_RQs,227630
+sqlframe/base/functions.py,sha256=RVNoRzM19BUwypdc0izYrrQe2Fe4_e9SbtpDkdD2bec,227981
 sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
 sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
-sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
+sqlframe/base/operations.py,sha256=8dkMNqjG3xP1w_6euAj8FpwweD7t590HYjoeoCr5LqI,4465
 sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
-sqlframe/base/session.py,sha256=8oaEgGbyctKKEaI0GW6k7Praku7nwx3YRYgAW3mZNk0,27481
+sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
 sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
 sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
 sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
 sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
-sqlframe/base/util.py,sha256=D4HAhtu4DMz5mXyxlUHRP_GrsjLJACpBYlLriyGoT0g,19435
+sqlframe/base/util.py,sha256=11rBF_GBFXGBCllSdlWWWo8EiZZATJn4me3u7OUNIFg,19782
 sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
 sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
 sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
-sqlframe-3.39.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-3.39.1.dist-info/METADATA,sha256=1WWnSl5RkOZOCniSSzeNgHSUztaC4FbMmCjxakLC6E0,9039
-sqlframe-3.39.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-sqlframe-3.39.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-3.39.1.dist-info/RECORD,,
+sqlframe-3.39.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-3.39.3.dist-info/METADATA,sha256=eyKm8nGawKAujUOiCBn4PEFpSh_UzsnEV7LpKQVecRM,9069
+sqlframe-3.39.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+sqlframe-3.39.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-3.39.3.dist-info/RECORD,,

{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-3.39.1.dist-info → sqlframe-3.39.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 3.39.1__py3-none-any.whl → 3.39.3__py3-none-any.whl

sqlframe 3.39.1py3-none-any.whl → 3.39.3py3-none-any.whl