PyPI - sqlframe - Versions diffs - 3.15.1__py3-none-any.whl → 3.16.0__py3-none-any.whl - Mend

sqlframe 3.15.1py3-none-any.whl → 3.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

sqlframe/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.15.1'
-__version_tuple__ = version_tuple = (3, 15, 1)
+__version__ = version = '3.16.0'
+__version_tuple__ = version_tuple = (3, 16, 0)

sqlframe/base/dataframe.py CHANGED Viewed

@@ -391,7 +391,9 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         cols = self._ensure_list_of_columns(cols)
         normalize(self.session, expression or self.expression, cols)
-        return list(flatten([self._expand_star(col) for col in cols]))
+        cols = list(flatten([self._expand_star(col) for col in cols]))
+        self._resolve_ambiguous_columns(cols)
+        return cols
     def _ensure_and_normalize_col(self, col):
         from sqlframe.base.column import Column
@@ -399,6 +401,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         col = Column.ensure_col(col)
         normalize(self.session, self.expression, col)
+        self._resolve_ambiguous_columns(col)
         return col
     def _convert_leaf_to_cte(
@@ -745,10 +748,55 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         kwargs["join_on_uuid"] = str(uuid4())
         return self.__class__(**object_to_dict(self, **kwargs))
+    def _resolve_ambiguous_columns(self, columns: t.Union[Column, t.List[Column]]) -> None:
+        if "joins" not in self.expression.args:
+            return
+        columns = ensure_list(columns)
+        ambiguous_cols: t.List[exp.Column] = list(
+            flatten(
+                [
+                    sub_col
+                    for col in columns
+                    for sub_col in col.expression.find_all(exp.Column)
+                    if not sub_col.table
+                ]
+            )
+        )
+        if ambiguous_cols:
+            join_table_identifiers = [
+                x.this for x in get_tables_from_expression_with_join(self.expression)
+            ]
+            cte_names_in_join = [x.this for x in join_table_identifiers]
+            # If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right
+            # (or right to left if a right join) and therefore we allow multiple columns with the same
+            # name in the result. This matches the behavior of Spark.
+            resolved_column_position: t.Dict[exp.Column, int] = {
+                col.copy(): -1 for col in ambiguous_cols
+            }
+            for ambiguous_col in ambiguous_cols:
+                ctes = (
+                    list(reversed(self.expression.ctes))
+                    if self.expression.args["joins"][0].args.get("side", "") == "right"
+                    else self.expression.ctes
+                )
+                ctes_with_column = [
+                    cte
+                    for cte in ctes
+                    if cte.alias_or_name in cte_names_in_join
+                    and ambiguous_col.alias_or_name in cte.this.named_selects
+                ]
+                # Check if there is a CTE with this column that we haven't used before. If so, use it. Otherwise,
+                # use the same CTE we used before
+                cte = seq_get(ctes_with_column, resolved_column_position[ambiguous_col] + 1)
+                if cte:
+                    resolved_column_position[ambiguous_col] += 1
+                else:
+                    cte = ctes_with_column[resolved_column_position[ambiguous_col]]
+                ambiguous_col.set("table", exp.to_identifier(cte.alias_or_name))
     @operation(Operation.SELECT)
     def select(self, *cols, **kwargs) -> Self:
-        from sqlframe.base.column import Column
         if not cols:
             return self
@@ -756,48 +804,6 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
             cols = cols[0]  # type: ignore
         columns = self._ensure_and_normalize_cols(cols)
         kwargs["append"] = kwargs.get("append", False)
-        if self.expression.args.get("joins"):
-            ambiguous_cols: t.List[exp.Column] = list(
-                flatten(
-                    [
-                        sub_col
-                        for col in columns
-                        for sub_col in col.expression.find_all(exp.Column)
-                        if not sub_col.table
-                    ]
-                )
-            )
-            if ambiguous_cols:
-                join_table_identifiers = [
-                    x.this for x in get_tables_from_expression_with_join(self.expression)
-                ]
-                cte_names_in_join = [x.this for x in join_table_identifiers]
-                # If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right
-                # (or right to left if a right join) and therefore we allow multiple columns with the same
-                # name in the result. This matches the behavior of Spark.
-                resolved_column_position: t.Dict[exp.Column, int] = {
-                    col.copy(): -1 for col in ambiguous_cols
-                }
-                for ambiguous_col in ambiguous_cols:
-                    ctes = (
-                        list(reversed(self.expression.ctes))
-                        if self.expression.args["joins"][0].args.get("side", "") == "right"
-                        else self.expression.ctes
-                    )
-                    ctes_with_column = [
-                        cte
-                        for cte in ctes
-                        if cte.alias_or_name in cte_names_in_join
-                        and ambiguous_col.alias_or_name in cte.this.named_selects
-                    ]
-                    # Check if there is a CTE with this column that we haven't used before. If so, use it. Otherwise,
-                    # use the same CTE we used before
-                    cte = seq_get(ctes_with_column, resolved_column_position[ambiguous_col] + 1)
-                    if cte:
-                        resolved_column_position[ambiguous_col] += 1
-                    else:
-                        cte = ctes_with_column[resolved_column_position[ambiguous_col]]
-                    ambiguous_col.set("table", exp.to_identifier(cte.alias_or_name))
         # If an expression is `CAST(x AS DATETYPE)` then we want to alias so that `x` is the result column name
         columns = [
             col.alias(col.expression.alias_or_name)

sqlframe/base/decorators.py CHANGED Viewed

@@ -43,7 +43,7 @@ def func_metadata(unsupported_engines: t.Optional[t.Union[str, t.List[str]]] = N
                         col_name = col_name.this
                 alias_name = f"{func.__name__}__{col_name or ''}__"
                 # BigQuery has restrictions on alias names so we constrain it to alphanumeric characters and underscores
-                return result.alias(re.sub("\W", "_", alias_name))  # type: ignore
+                return result.alias(re.sub(r"\W", "_", alias_name))  # type: ignore
             return result
         wrapper.unsupported_engines = (  # type: ignore

sqlframe/base/functions.py CHANGED Viewed

@@ -2851,12 +2851,14 @@ def bool_or(col: ColumnOrName) -> Column:
     return Column.invoke_expression_over_column(col, expression.LogicalOr)
-@meta(unsupported_engines="*")
+@meta()
 def btrim(str: ColumnOrName, trim: t.Optional[ColumnOrName] = None) -> Column:
     if trim is not None:
-        return Column.invoke_anonymous_function(str, "btrim", trim)
+        return Column.invoke_expression_over_column(
+            str, expression.Trim, expression=Column.ensure_col(trim).column_expression
+        )
     else:
-        return Column.invoke_anonymous_function(str, "btrim")
+        return Column.invoke_expression_over_column(str, expression.Trim)
 @meta(unsupported_engines="*")

{sqlframe-3.15.1.dist-info → sqlframe-3.16.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 3.15.1
+Version: 3.16.0
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-3.15.1.dist-info → sqlframe-3.16.0.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
 sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
-sqlframe/_version.py,sha256=rNfI2qI8EULJid-fGjytQ8KiqfMi0Ktaq6sNSFSM_1s,413
+sqlframe/_version.py,sha256=CtTis8a_OeN0EsLFoVgtqX-ARqHjuin2ATomgRROY1Y,413
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
 sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
 sqlframe/base/column.py,sha256=wRghgieYAA51aw4WuFQWOvl0TFOToZbBhBuIamEzxx4,18011
-sqlframe/base/dataframe.py,sha256=E1zWlB_a2FNOxjTcQ68MtL_A4c8fnLiHY3MeZttK4Xk,76570
-sqlframe/base/decorators.py,sha256=P56cgs8DANxGRIwVs5uOMnDy-BlXZZYMbf4fdnkpWPI,1889
+sqlframe/base/dataframe.py,sha256=KKBwtn73xNGt2gRwUB8Vri7Ee6_ivP5a_qij4Eq96zE,76622
+sqlframe/base/decorators.py,sha256=ms-CvDOIW3T8IVB9VqDmLwAiaEsqXLYRXEqVQaxktiM,1890
 sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
 sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
-sqlframe/base/functions.py,sha256=9mN54Nx6yqos1njfyW2-WRzfFUsA96P9z1ldJVtovSs,220543
+sqlframe/base/functions.py,sha256=o8zwbS8zCsyNe5arcb6dbAGBL8a1tH99rGyRimwzzUk,220614
 sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
 sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
 sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
 sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
-sqlframe-3.15.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-3.15.1.dist-info/METADATA,sha256=-MxovSCoyQnT-6Ujd4BDA_yVpf9KWra2v1CQGN2TmG4,8970
-sqlframe-3.15.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-sqlframe-3.15.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-3.15.1.dist-info/RECORD,,
+sqlframe-3.16.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-3.16.0.dist-info/METADATA,sha256=SMpgyXmxbVMqeeRuByF19qKm9iLDYubcniTCYBUmyNo,8970
+sqlframe-3.16.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+sqlframe-3.16.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-3.16.0.dist-info/RECORD,,

{sqlframe-3.15.1.dist-info → sqlframe-3.16.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-3.15.1.dist-info → sqlframe-3.16.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-3.15.1.dist-info → sqlframe-3.16.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 3.15.1__py3-none-any.whl → 3.16.0__py3-none-any.whl

sqlframe 3.15.1py3-none-any.whl → 3.16.0py3-none-any.whl