PyPI - sqlframe - Versions diffs - 3.4.0__py3-none-any.whl → 3.5.0__py3-none-any.whl - Mend

sqlframe 3.4.0py3-none-any.whl → 3.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

sqlframe/_version.py +2 -2
sqlframe/base/column.py +4 -1
sqlframe/base/dataframe.py +33 -9
sqlframe/base/function_alternatives.py +2 -2
sqlframe/base/functions.py +6 -4
sqlframe/duckdb/functions.pyi +1 -0
sqlframe/snowflake/functions.pyi +1 -0
{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/METADATA +6 -6
{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/RECORD +12 -12
{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/LICENSE +0 -0
{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/WHEEL +0 -0
{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/top_level.txt +0 -0

sqlframe/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.4.0'
-__version_tuple__ = version_tuple = (3, 4, 0)
+__version__ = version = '3.5.0'
+__version_tuple__ = version_tuple = (3, 5, 0)

sqlframe/base/column.py CHANGED Viewed

@@ -338,6 +338,9 @@ class Column:
         new_expression = exp.Not(this=exp.Is(this=self.column_expression, expression=exp.Null()))
         return Column(new_expression)
+    def eqNullSafe(self, other: ColumnOrLiteral) -> Column:
+        return self.binary_op(exp.NullSafeEQ, other)
     def cast(
         self,
         dataType: t.Union[str, DataType, exp.DataType, exp.DataType.Type],
@@ -385,7 +388,7 @@ class Column:
     def isin(self, *cols: t.Union[ColumnOrLiteral, t.Iterable[ColumnOrLiteral]]):
         columns = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols  # type: ignore
-        expressions = [self._lit(x).expression for x in columns]
+        expressions = [self._lit(x).expression for x in columns]  # type: ignore
         return Column.invoke_expression_over_column(self, exp.In, expressions=expressions)  # type: ignore
     def between(

sqlframe/base/dataframe.py CHANGED Viewed

@@ -15,7 +15,7 @@ from dataclasses import dataclass
 import sqlglot
 from prettytable import PrettyTable
-from sqlglot import Dialect
+from sqlglot import Dialect, maybe_parse
 from sqlglot import expressions as exp
 from sqlglot import lineage as sqlglot_lineage
 from sqlglot.helper import ensure_list, flatten, object_to_dict, seq_get
@@ -460,16 +460,40 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         df.expression.ctes[-1].set("cache_storage_level", storage_level)
         return df
-    @classmethod
-    def _add_ctes_to_expression(cls, expression: exp.Select, ctes: t.List[exp.CTE]) -> exp.Select:
+    def _add_ctes_to_expression(self, expression: exp.Select, ctes: t.List[exp.CTE]) -> exp.Select:
         expression = expression.copy()
         with_expression = expression.args.get("with")
         if with_expression:
             existing_ctes = with_expression.expressions
-            existsing_cte_names = {x.alias_or_name for x in existing_ctes}
+            existing_cte_counts = {x.alias_or_name: 0 for x in existing_ctes}
+            replaced_cte_names = {}  # type: ignore
             for cte in ctes:
-                if cte.alias_or_name not in existsing_cte_names:
-                    existing_ctes.append(cte)
+                if replaced_cte_names:
+                    cte = cte.transform(replace_id_value, replaced_cte_names)  # type: ignore
+                if cte.alias_or_name in existing_cte_counts:
+                    existing_cte_counts[cte.alias_or_name] += 10
+                    cte.set(
+                        "this",
+                        cte.this.where(
+                            exp.EQ(
+                                this=exp.Literal.number(existing_cte_counts[cte.alias_or_name]),
+                                expression=exp.Literal.number(
+                                    existing_cte_counts[cte.alias_or_name]
+                                ),
+                            )
+                        ),
+                    )
+                    new_cte_alias = self._create_hash_from_expression(cte.this)
+                    replaced_cte_names[cte.args["alias"].this] = maybe_parse(
+                        new_cte_alias, dialect=self.session.input_dialect, into=exp.Identifier
+                    )
+                    cte.set(
+                        "alias",
+                        maybe_parse(
+                            new_cte_alias, dialect=self.session.input_dialect, into=exp.TableAlias
+                        ),
+                    )
+                existing_ctes.append(cte)
         else:
             existing_ctes = ctes
         expression.set("with", exp.With(expressions=existing_ctes))
@@ -843,11 +867,11 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
             logger.warning("Got no value for on. This appears to change the join to a cross join.")
             how = "cross"
         other_df = other_df._convert_leaf_to_cte()
+        join_expression = self._add_ctes_to_expression(self.expression, other_df.expression.ctes)
         # We will determine actual "join on" expression later so we don't provide it at first
-        join_expression = self.expression.join(
-            other_df.latest_cte_name, join_type=how.replace("_", " ")
+        join_expression = join_expression.join(
+            join_expression.ctes[-1].alias, join_type=how.replace("_", " ")
         )
-        join_expression = self._add_ctes_to_expression(join_expression, other_df.expression.ctes)
         self_columns = self._get_outer_select_columns(join_expression)
         other_columns = self._get_outer_select_columns(other_df.expression)
         join_columns = self._ensure_and_normalize_cols(on)

sqlframe/base/function_alternatives.py CHANGED Viewed

@@ -1320,8 +1320,8 @@ def flatten_using_array_flatten(col: ColumnOrName) -> Column:
 def map_concat_using_map_cat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
     columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols  # type: ignore
     if len(columns) == 1:
-        return Column.invoke_anonymous_function(columns[0], "MAP_CAT")
-    return Column.invoke_anonymous_function(columns[0], "MAP_CAT", *columns[1:])
+        return Column.invoke_anonymous_function(columns[0], "MAP_CAT")  # type: ignore
+    return Column.invoke_anonymous_function(columns[0], "MAP_CAT", *columns[1:])  # type: ignore
 def sequence_from_generate_series(

sqlframe/base/functions.py CHANGED Viewed

@@ -1652,8 +1652,8 @@ def arrays_zip(*cols: ColumnOrName) -> Column:
 def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
     columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols  # type: ignore
     if len(columns) == 1:
-        return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT")
-    return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT", *columns[1:])
+        return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT")  # type: ignore
+    return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT", *columns[1:])  # type: ignore
 @meta(unsupported_engines="postgres")
@@ -4697,7 +4697,7 @@ def spark_partition_id() -> Column:
     return Column.invoke_anonymous_function(None, "spark_partition_id")
-@meta(unsupported_engines="*")
+@meta(unsupported_engines=["bigquery", "postgres"])
 def split_part(src: ColumnOrName, delimiter: ColumnOrName, partNum: ColumnOrName) -> Column:
     """
     Splits `str` by delimiter and return requested part of the split (1-based).
@@ -4723,7 +4723,9 @@ def split_part(src: ColumnOrName, delimiter: ColumnOrName, partNum: ColumnOrName
     >>> df.select(split_part(df.a, df.b, df.c).alias('r')).collect()
     [Row(r='13')]
     """
-    return Column.invoke_anonymous_function(src, "split_part", delimiter, partNum)
+    return Column.invoke_expression_over_column(
+        src, expression.SplitPart, delimiter=delimiter, part_index=partNum
+    )
 @meta()

sqlframe/duckdb/functions.pyi CHANGED Viewed

@@ -177,6 +177,7 @@ from sqlframe.base.functions import (
     skewness as skewness,
     sort_array as sort_array,
     soundex as soundex,
+    split_part as split_part,
     sqrt as sqrt,
     startswith as startswith,
     stddev as stddev,

sqlframe/snowflake/functions.pyi CHANGED Viewed

@@ -187,6 +187,7 @@ from sqlframe.base.functions import (
     sinh as sinh,
     size as size,
     soundex as soundex,
+    split_part as split_part,
     sqrt as sqrt,
     startswith as startswith,
     stddev as stddev,

{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 3.4.0
+Version: 3.5.0
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman
@@ -18,7 +18,7 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: prettytable (<3.11.1)
-Requires-Dist: sqlglot (<25.25,>=24.0.0)
+Requires-Dist: sqlglot (<25.26,>=24.0.0)
 Requires-Dist: typing-extensions (<5,>=4.8)
 Provides-Extra: bigquery
 Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
@@ -26,8 +26,8 @@ Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
 Provides-Extra: dev
 Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'dev'
 Requires-Dist: findspark (<3,>=2) ; extra == 'dev'
-Requires-Dist: mypy (<1.12,>=1.10.0) ; extra == 'dev'
-Requires-Dist: openai (<1.52,>=1.30) ; extra == 'dev'
+Requires-Dist: mypy (<1.13,>=1.10.0) ; extra == 'dev'
+Requires-Dist: openai (<1.53,>=1.30) ; extra == 'dev'
 Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
 Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
 Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
@@ -36,7 +36,7 @@ Requires-Dist: pyspark (<3.6,>=2) ; extra == 'dev'
 Requires-Dist: pytest-postgresql (<7,>=6) ; extra == 'dev'
 Requires-Dist: pytest-xdist (<3.7,>=3.6) ; extra == 'dev'
 Requires-Dist: pytest (<8.4,>=8.2.0) ; extra == 'dev'
-Requires-Dist: ruff (<0.7,>=0.4.4) ; extra == 'dev'
+Requires-Dist: ruff (<0.8,>=0.4.4) ; extra == 'dev'
 Requires-Dist: types-psycopg2 (<3,>=2.9) ; extra == 'dev'
 Requires-Dist: pre-commit (>=3.5) ; (python_version == "3.8") and extra == 'dev'
 Requires-Dist: pre-commit (<4.1,>=3.7) ; (python_version >= "3.9") and extra == 'dev'
@@ -50,7 +50,7 @@ Provides-Extra: duckdb
 Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'duckdb'
 Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
 Provides-Extra: openai
-Requires-Dist: openai (<1.52,>=1.30) ; extra == 'openai'
+Requires-Dist: openai (<1.53,>=1.30) ; extra == 'openai'
 Provides-Extra: pandas
 Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
 Provides-Extra: postgres

{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
 sqlframe/__init__.py,sha256=E3qCJ4PSEgKz6Hg3ves6LWt3JrQOV8c9HVLSIUOzKNc,3106
-sqlframe/_version.py,sha256=YWGqQYvejjlymmjzg4jncyBgDC760jlRmyon_Rd-2uQ,411
+sqlframe/_version.py,sha256=wy34mXzQ8fLJu7i4fZikKwCDGQODEviQb-OrdMe9F4Q,411
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
 sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
-sqlframe/base/column.py,sha256=C2xj6OHMsJbEgjbI-m5HuIvqHYt2DbbUtCjssKpplNk,17748
-sqlframe/base/dataframe.py,sha256=uTM46auSPniww-ZMtI5s0hrrCG1nOICMEhdZZ8E4cu8,71636
+sqlframe/base/column.py,sha256=06fhVZ2nCn2QLxnfjdK-oYKeTFJC_smgSxu7u2UYlVg,17878
+sqlframe/base/dataframe.py,sha256=coeUwntwYbT1g6YKVwk3ZfWMfJqAzd1ECYabBSsNsV0,72892
 sqlframe/base/decorators.py,sha256=Jy4bf8MhZ-AJ6CWTj59bBJRqamtLbPC0USUMFrY6g0w,449
 sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
-sqlframe/base/function_alternatives.py,sha256=IxNBqplehkAEkpzA625Dif-9Xyi4Hrho81A9U262rV0,50714
-sqlframe/base/functions.py,sha256=b6rjrBHHpTMzBU2N5ubdXMJACM40qw4WnbI_RqusWIU,189878
+sqlframe/base/function_alternatives.py,sha256=Cqt-OjyWewCXZJ8PV-2vc8yrDG2ennLPm6yWaxWVXRc,50746
+sqlframe/base/functions.py,sha256=JDVULt3WI1cwWYboU8ybenIXZPdKSNtnzg7A4xC1Lao,189978
 sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
 sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
 sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
@@ -40,7 +40,7 @@ sqlframe/duckdb/catalog.py,sha256=YYYVmetLUaJOdObKw4AJ7L0P-msshkta4xHlcZQ9zEA,47
 sqlframe/duckdb/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
 sqlframe/duckdb/dataframe.py,sha256=HZg_uMAz4RsubZJT4-MslUQS_0-InF0_P5Yq5HyJ3wE,1708
 sqlframe/duckdb/functions.py,sha256=Ee8o6YFtRdEiq0jNLXxgu5lcbc7Tsg0-lK6oRyxdcjo,1920
-sqlframe/duckdb/functions.pyi,sha256=mKr9hI4snRyXEeWWTYfRNpBJmWd5T2EujdQ0oJxpEjY,5849
+sqlframe/duckdb/functions.pyi,sha256=bWfQl7Cm1eecI39LJAyyRcC4z7epDJ-h9JOozPsEc34,5879
 sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
 sqlframe/duckdb/readwriter.py,sha256=iY0Wsms35gymilETOhAQGQCHfH8LCj9_uY8aCdF77ds,4806
 sqlframe/duckdb/session.py,sha256=b5IrKbTkYUVNQGSG2EJPNV9MTdJw4onN-9aMrskjxck,2721
@@ -75,7 +75,7 @@ sqlframe/snowflake/catalog.py,sha256=Tp_B9oMTInnqLTiZNsT0UNc-a-aKXu3zFThlCwLeKCA
 sqlframe/snowflake/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
 sqlframe/snowflake/dataframe.py,sha256=ANDZ5Dpxz-WTlTtmKNcv-nJlzTeTsAOK0X0gR2euPqQ,2144
 sqlframe/snowflake/functions.py,sha256=iX67xU8Czy5k3HiNjNbN-x-via6yqGNaMPw1maV9HPs,2580
-sqlframe/snowflake/functions.pyi,sha256=DoRyF0zob8kV7qHkHonyyyPex-C_GIQvFDaSxf2lSa4,6207
+sqlframe/snowflake/functions.pyi,sha256=anVqp9rhkkD6uTTiQYGbScFed3dp_uRcyPwq1JnRmRI,6237
 sqlframe/snowflake/group.py,sha256=pPP1l2RRo_LgkXrji8a87n2PKo-63ZRPT-WUtvVcBME,395
 sqlframe/snowflake/readwriter.py,sha256=yhRc2HcMq6PwV3ghZWC-q-qaE7LE4aEjZEXCip4OOlQ,884
 sqlframe/snowflake/session.py,sha256=fnFYrJ9JxoOf4ZKTrQcSKjwr6wNp6A85cMqKczOi3vA,3320
@@ -107,8 +107,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
 sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
-sqlframe-3.4.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-3.4.0.dist-info/METADATA,sha256=IDBAwEcCrJfWwGuAaa8kfV3Jxlte38uOYO87zcFWJfY,8639
-sqlframe-3.4.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-sqlframe-3.4.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-3.4.0.dist-info/RECORD,,
+sqlframe-3.5.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-3.5.0.dist-info/METADATA,sha256=vH1Trx574zbKeuhU0oPb0Q7dxgGvHqaONgNiO3tcbkk,8639
+sqlframe-3.5.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+sqlframe-3.5.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-3.5.0.dist-info/RECORD,,

{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 3.4.0__py3-none-any.whl → 3.5.0__py3-none-any.whl

sqlframe 3.4.0py3-none-any.whl → 3.5.0py3-none-any.whl