PyPI - sqlframe - Versions diffs - 3.14.2__py3-none-any.whl → 3.15.1__py3-none-any.whl - Mend

sqlframe 3.14.2py3-none-any.whl → 3.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

sqlframe/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.14.2'
-__version_tuple__ = version_tuple = (3, 14, 2)
+__version__ = version = '3.15.1'
+__version_tuple__ = version_tuple = (3, 15, 1)

sqlframe/base/function_alternatives.py CHANGED Viewed

@@ -6,9 +6,7 @@ import re
 import typing as t
 from sqlglot import exp as expression
-from sqlglot.dialects.dialect import build_formatted_time
 from sqlglot.helper import ensure_list
-from sqlglot.helper import flatten as _flatten
 from sqlframe.base.column import Column
 from sqlframe.base.util import (
@@ -1410,6 +1408,14 @@ def regexp_replace_global_option(
     )
+def regexp_with_matches(str: ColumnOrName, regexp: ColumnOrName) -> Column:
+    return Column.invoke_anonymous_function(str, "REGEXP_MATCHES", regexp)
+def regexp_with_contains(str: ColumnOrName, regexp: ColumnOrName) -> Column:
+    return Column.invoke_anonymous_function(str, "REGEXP_CONTAINS", regexp)
 def degrees_bgutil(col: ColumnOrName) -> Column:
     return Column(
         expression.Anonymous(
@@ -1519,6 +1525,43 @@ def unix_timestamp_bgutil(
     )
+def unix_seconds_extract_epoch(col: ColumnOrName) -> Column:
+    return Column(
+        expression.Extract(
+            this=expression.Var(this="EPOCH"),
+            expression=Column.ensure_col(col).column_expression,
+        )
+    )
+def unix_millis_multiply_epoch(col: ColumnOrName) -> Column:
+    unix_seconds = get_func_from_session("unix_seconds")
+    return Column(
+        expression.Cast(
+            this=expression.Mul(
+                this=unix_seconds(col).column_expression,
+                expression=expression.Literal.number(1000),
+            ),
+            to=expression.DataType.build("bigint"),
+        )
+    )
+def unix_micros_multiply_epoch(col: ColumnOrName) -> Column:
+    unix_seconds = get_func_from_session("unix_seconds")
+    return Column(
+        expression.Cast(
+            this=expression.Mul(
+                this=unix_seconds(col).column_expression,
+                expression=expression.Literal.number(1000000),
+            ),
+            to=expression.DataType.build("bigint"),
+        )
+    )
 def format_number_bgutil(col: ColumnOrName, d: int) -> Column:
     round = get_func_from_session("round")
     lit = get_func_from_session("lit")

sqlframe/base/functions.py CHANGED Viewed

@@ -51,6 +51,8 @@ def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
 def lit(value: t.Optional[t.Any] = None) -> Column:
     if isinstance(value, str):
         return Column(expression.Literal.string(value))
+    if isinstance(value, float) and value in {float("inf"), float("-inf")}:
+        return Column(expression.Literal.string(str(value)))
     return Column(value)
@@ -961,12 +963,15 @@ def dayofweek(col: ColumnOrName) -> Column:
         return dayofweek_from_extract(col)
     if session._is_postgres:
-        return dayofweek_from_extract_with_isodow(col)
+        return dayofweek_from_extract_with_isodow(col) + 1
-    return Column.invoke_expression_over_column(
+    result = Column.invoke_expression_over_column(
         Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
         expression.DayOfWeek,
     )
+    if session._is_duckdb or session._is_snowflake:
+        return result + 1
+    return result
 @meta()
@@ -2962,14 +2967,14 @@ def char(col: ColumnOrName) -> Column:
     return Column(expression.Chr(expressions=Column.ensure_col(col).column_expression))
-@meta(unsupported_engines="*")
+@meta()
 def char_length(str: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(str, "char_length")
+    return Column.invoke_expression_over_column(str, expression.Length)
-@meta(unsupported_engines="*")
+@meta()
 def character_length(str: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(str, "character_length")
+    return Column.invoke_expression_over_column(str, expression.Length)
 @meta(unsupported_engines=["bigquery", "postgres"])
@@ -4946,7 +4951,7 @@ def reflect(*cols: ColumnOrName) -> Column:
     return Column.invoke_anonymous_function(cols[0], "reflect")
-@meta(unsupported_engines="*")
+@meta(unsupported_engines="snowflake")
 def regexp(str: ColumnOrName, regexp: ColumnOrName) -> Column:
     r"""Returns true if `str` matches the Java regex `regexp`, or false otherwise.
@@ -4996,12 +5001,21 @@ def regexp(str: ColumnOrName, regexp: ColumnOrName) -> Column:
     |               true|
     +-------------------+
     """
-    from sqlframe.base.function_alternatives import regexp_extract_only_one_group
+    from sqlframe.base.function_alternatives import (
+        regexp_with_contains,
+        regexp_with_matches,
+    )
     session = _get_session()
+    if session._is_duckdb:
+        return regexp_with_matches(str, regexp)
+    if session._is_postgres:
+        return Column.invoke_expression_over_column(str, expression.RegexpILike, expression=regexp)
     if session._is_bigquery:
-        return regexp_extract_only_one_group(str, regexp)  # type: ignore
+        return regexp_with_contains(str, regexp)
     return Column.invoke_anonymous_function(str, "regexp", regexp)
@@ -5485,7 +5499,7 @@ def regr_syy(y: ColumnOrName, x: ColumnOrName) -> Column:
     return Column.invoke_anonymous_function(y, "regr_syy", x)
-@meta(unsupported_engines="*")
+@meta()
 def replace(
     src: ColumnOrName, search: ColumnOrName, replace: t.Optional[ColumnOrName] = None
 ) -> Column:
@@ -5513,6 +5527,11 @@ def replace(
     >>> df.select(replace(df.a, df.b).alias('r')).collect()
     [Row(r='ABC')]
     """
+    if replace is None and (
+        _get_session()._is_duckdb or _get_session()._is_postgres or _get_session()._is_bigquery
+    ):
+        replace = expression.Literal.string("")  # type: ignore
     if replace is not None:
         return Column.invoke_anonymous_function(src, "replace", search, replace)
     else:
@@ -6397,7 +6416,7 @@ def unix_date(col: ColumnOrName) -> Column:
     return Column.invoke_expression_over_column(col, expression.UnixDate)
-@meta(unsupported_engines="*")
+@meta()
 def unix_micros(col: ColumnOrName) -> Column:
     """Returns the number of microseconds since 1970-01-01 00:00:00 UTC.
@@ -6411,10 +6430,20 @@ def unix_micros(col: ColumnOrName) -> Column:
     [Row(n=1437584400000000)]
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
+    from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
+    if (
+        _get_session()._is_bigquery
+        or _get_session()._is_duckdb
+        or _get_session()._is_postgres
+        or _get_session()._is_snowflake
+    ):
+        return unix_micros_multiply_epoch(col)
     return Column.invoke_anonymous_function(col, "unix_micros")
-@meta(unsupported_engines="*")
+@meta()
 def unix_millis(col: ColumnOrName) -> Column:
     """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC.
     Truncates higher levels of precision.
@@ -6429,10 +6458,20 @@ def unix_millis(col: ColumnOrName) -> Column:
     [Row(n=1437584400000)]
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
+    from sqlframe.base.function_alternatives import unix_millis_multiply_epoch
+    if (
+        _get_session()._is_bigquery
+        or _get_session()._is_duckdb
+        or _get_session()._is_postgres
+        or _get_session()._is_snowflake
+    ):
+        return unix_millis_multiply_epoch(col)
     return Column.invoke_anonymous_function(col, "unix_millis")
-@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
+@meta()
 def unix_seconds(col: ColumnOrName) -> Column:
     """Returns the number of seconds since 1970-01-01 00:00:00 UTC.
     Truncates higher levels of precision.
@@ -6447,6 +6486,27 @@ def unix_seconds(col: ColumnOrName) -> Column:
     [Row(n=1437584400)]
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
+    from sqlframe.base.function_alternatives import unix_seconds_extract_epoch
+    if _get_session()._is_postgres:
+        return unix_seconds_extract_epoch(col)
+    if _get_session()._is_bigquery:
+        return Column(
+            expression.Anonymous(
+                this="UNIX_SECONDS",
+                expressions=[
+                    expression.Anonymous(
+                        this="TIMESTAMP",
+                        expressions=[
+                            Column.ensure_col(col).column_expression,
+                            expression.Literal.string("UTC"),
+                        ],
+                    )
+                ],
+            )
+        )
     return Column.invoke_expression_over_column(col, expression.UnixSeconds)

sqlframe/base/session.py CHANGED Viewed

@@ -108,6 +108,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
         if not getattr(self, "schema", None) or schema:
             self._schema = schema
+    # https://github.com/eakmanrq/sqlframe/issues/262
+    @property
+    def execution_dialect_name(self) -> str:
+        return self.execution_dialect.__class__.__name__.lower()
     @property
     def read(self) -> READER:
         return self._reader(self)

{sqlframe-3.14.2.dist-info → sqlframe-3.15.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 3.14.2
+Version: 3.15.1
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-3.14.2.dist-info → sqlframe-3.15.1.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
-sqlframe/_version.py,sha256=jOmVUgfrjHuKbVMclbnyeOg5hq5CSFD43rU-r3QVgI8,413
+sqlframe/_version.py,sha256=rNfI2qI8EULJid-fGjytQ8KiqfMi0Ktaq6sNSFSM_1s,413
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
 sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
@@ -7,13 +7,13 @@ sqlframe/base/column.py,sha256=wRghgieYAA51aw4WuFQWOvl0TFOToZbBhBuIamEzxx4,18011
 sqlframe/base/dataframe.py,sha256=E1zWlB_a2FNOxjTcQ68MtL_A4c8fnLiHY3MeZttK4Xk,76570
 sqlframe/base/decorators.py,sha256=P56cgs8DANxGRIwVs5uOMnDy-BlXZZYMbf4fdnkpWPI,1889
 sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
-sqlframe/base/function_alternatives.py,sha256=8kDCh1cOXtdCcBPYBQ8byXxRAZvphS9N8GDs4txBzGg,52544
-sqlframe/base/functions.py,sha256=8gBaQGUnfbwtJk9sg87HQul8d4Q9lCw3rPU9koYWxE0,218776
+sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
+sqlframe/base/functions.py,sha256=9mN54Nx6yqos1njfyW2-WRzfFUsA96P9z1ldJVtovSs,220543
 sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
 sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
 sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
 sqlframe/base/readerwriter.py,sha256=w8926cqIrXF7NGHiINw5UHzP_3xpjsqbijTBTzycBRM,26605
-sqlframe/base/session.py,sha256=LwGYgKOymzlX5CKl_vZG-J2j5fkuGO3uPIRKpMqB6MI,26190
+sqlframe/base/session.py,sha256=s9M9_nbtOQQgLyEBZs-ijkMeHkYkILHfBc8JsU2SLmU,26369
 sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
 sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
 sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
 sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
-sqlframe-3.14.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-3.14.2.dist-info/METADATA,sha256=jaarma0pQSOhwGo8XtkdteTdJadSB4CIiVrjLLQovu0,8970
-sqlframe-3.14.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-sqlframe-3.14.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-3.14.2.dist-info/RECORD,,
+sqlframe-3.15.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-3.15.1.dist-info/METADATA,sha256=-MxovSCoyQnT-6Ujd4BDA_yVpf9KWra2v1CQGN2TmG4,8970
+sqlframe-3.15.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+sqlframe-3.15.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-3.15.1.dist-info/RECORD,,

{sqlframe-3.14.2.dist-info → sqlframe-3.15.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-3.14.2.dist-info → sqlframe-3.15.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-3.14.2.dist-info → sqlframe-3.15.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 3.14.2__py3-none-any.whl → 3.15.1__py3-none-any.whl

sqlframe 3.14.2py3-none-any.whl → 3.15.1py3-none-any.whl