sqlframe 3.14.2__py3-none-any.whl → 3.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.14.2'
16
- __version_tuple__ = version_tuple = (3, 14, 2)
15
+ __version__ = version = '3.15.1'
16
+ __version_tuple__ = version_tuple = (3, 15, 1)
@@ -6,9 +6,7 @@ import re
6
6
  import typing as t
7
7
 
8
8
  from sqlglot import exp as expression
9
- from sqlglot.dialects.dialect import build_formatted_time
10
9
  from sqlglot.helper import ensure_list
11
- from sqlglot.helper import flatten as _flatten
12
10
 
13
11
  from sqlframe.base.column import Column
14
12
  from sqlframe.base.util import (
@@ -1410,6 +1408,14 @@ def regexp_replace_global_option(
1410
1408
  )
1411
1409
 
1412
1410
 
1411
+ def regexp_with_matches(str: ColumnOrName, regexp: ColumnOrName) -> Column:
1412
+ return Column.invoke_anonymous_function(str, "REGEXP_MATCHES", regexp)
1413
+
1414
+
1415
+ def regexp_with_contains(str: ColumnOrName, regexp: ColumnOrName) -> Column:
1416
+ return Column.invoke_anonymous_function(str, "REGEXP_CONTAINS", regexp)
1417
+
1418
+
1413
1419
  def degrees_bgutil(col: ColumnOrName) -> Column:
1414
1420
  return Column(
1415
1421
  expression.Anonymous(
@@ -1519,6 +1525,43 @@ def unix_timestamp_bgutil(
1519
1525
  )
1520
1526
 
1521
1527
 
1528
+ def unix_seconds_extract_epoch(col: ColumnOrName) -> Column:
1529
+ return Column(
1530
+ expression.Extract(
1531
+ this=expression.Var(this="EPOCH"),
1532
+ expression=Column.ensure_col(col).column_expression,
1533
+ )
1534
+ )
1535
+
1536
+
1537
+ def unix_millis_multiply_epoch(col: ColumnOrName) -> Column:
1538
+ unix_seconds = get_func_from_session("unix_seconds")
1539
+
1540
+ return Column(
1541
+ expression.Cast(
1542
+ this=expression.Mul(
1543
+ this=unix_seconds(col).column_expression,
1544
+ expression=expression.Literal.number(1000),
1545
+ ),
1546
+ to=expression.DataType.build("bigint"),
1547
+ )
1548
+ )
1549
+
1550
+
1551
+ def unix_micros_multiply_epoch(col: ColumnOrName) -> Column:
1552
+ unix_seconds = get_func_from_session("unix_seconds")
1553
+
1554
+ return Column(
1555
+ expression.Cast(
1556
+ this=expression.Mul(
1557
+ this=unix_seconds(col).column_expression,
1558
+ expression=expression.Literal.number(1000000),
1559
+ ),
1560
+ to=expression.DataType.build("bigint"),
1561
+ )
1562
+ )
1563
+
1564
+
1522
1565
  def format_number_bgutil(col: ColumnOrName, d: int) -> Column:
1523
1566
  round = get_func_from_session("round")
1524
1567
  lit = get_func_from_session("lit")
@@ -51,6 +51,8 @@ def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
51
51
  def lit(value: t.Optional[t.Any] = None) -> Column:
52
52
  if isinstance(value, str):
53
53
  return Column(expression.Literal.string(value))
54
+ if isinstance(value, float) and value in {float("inf"), float("-inf")}:
55
+ return Column(expression.Literal.string(str(value)))
54
56
  return Column(value)
55
57
 
56
58
 
@@ -961,12 +963,15 @@ def dayofweek(col: ColumnOrName) -> Column:
961
963
  return dayofweek_from_extract(col)
962
964
 
963
965
  if session._is_postgres:
964
- return dayofweek_from_extract_with_isodow(col)
966
+ return dayofweek_from_extract_with_isodow(col) + 1
965
967
 
966
- return Column.invoke_expression_over_column(
968
+ result = Column.invoke_expression_over_column(
967
969
  Column(expression.TsOrDsToDate(this=Column.ensure_col(col).column_expression)),
968
970
  expression.DayOfWeek,
969
971
  )
972
+ if session._is_duckdb or session._is_snowflake:
973
+ return result + 1
974
+ return result
970
975
 
971
976
 
972
977
  @meta()
@@ -2962,14 +2967,14 @@ def char(col: ColumnOrName) -> Column:
2962
2967
  return Column(expression.Chr(expressions=Column.ensure_col(col).column_expression))
2963
2968
 
2964
2969
 
2965
- @meta(unsupported_engines="*")
2970
+ @meta()
2966
2971
  def char_length(str: ColumnOrName) -> Column:
2967
- return Column.invoke_anonymous_function(str, "char_length")
2972
+ return Column.invoke_expression_over_column(str, expression.Length)
2968
2973
 
2969
2974
 
2970
- @meta(unsupported_engines="*")
2975
+ @meta()
2971
2976
  def character_length(str: ColumnOrName) -> Column:
2972
- return Column.invoke_anonymous_function(str, "character_length")
2977
+ return Column.invoke_expression_over_column(str, expression.Length)
2973
2978
 
2974
2979
 
2975
2980
  @meta(unsupported_engines=["bigquery", "postgres"])
@@ -4946,7 +4951,7 @@ def reflect(*cols: ColumnOrName) -> Column:
4946
4951
  return Column.invoke_anonymous_function(cols[0], "reflect")
4947
4952
 
4948
4953
 
4949
- @meta(unsupported_engines="*")
4954
+ @meta(unsupported_engines="snowflake")
4950
4955
  def regexp(str: ColumnOrName, regexp: ColumnOrName) -> Column:
4951
4956
  r"""Returns true if `str` matches the Java regex `regexp`, or false otherwise.
4952
4957
 
@@ -4996,12 +5001,21 @@ def regexp(str: ColumnOrName, regexp: ColumnOrName) -> Column:
4996
5001
  | true|
4997
5002
  +-------------------+
4998
5003
  """
4999
- from sqlframe.base.function_alternatives import regexp_extract_only_one_group
5004
+ from sqlframe.base.function_alternatives import (
5005
+ regexp_with_contains,
5006
+ regexp_with_matches,
5007
+ )
5000
5008
 
5001
5009
  session = _get_session()
5002
5010
 
5011
+ if session._is_duckdb:
5012
+ return regexp_with_matches(str, regexp)
5013
+
5014
+ if session._is_postgres:
5015
+ return Column.invoke_expression_over_column(str, expression.RegexpILike, expression=regexp)
5016
+
5003
5017
  if session._is_bigquery:
5004
- return regexp_extract_only_one_group(str, regexp) # type: ignore
5018
+ return regexp_with_contains(str, regexp)
5005
5019
 
5006
5020
  return Column.invoke_anonymous_function(str, "regexp", regexp)
5007
5021
 
@@ -5485,7 +5499,7 @@ def regr_syy(y: ColumnOrName, x: ColumnOrName) -> Column:
5485
5499
  return Column.invoke_anonymous_function(y, "regr_syy", x)
5486
5500
 
5487
5501
 
5488
- @meta(unsupported_engines="*")
5502
+ @meta()
5489
5503
  def replace(
5490
5504
  src: ColumnOrName, search: ColumnOrName, replace: t.Optional[ColumnOrName] = None
5491
5505
  ) -> Column:
@@ -5513,6 +5527,11 @@ def replace(
5513
5527
  >>> df.select(replace(df.a, df.b).alias('r')).collect()
5514
5528
  [Row(r='ABC')]
5515
5529
  """
5530
+ if replace is None and (
5531
+ _get_session()._is_duckdb or _get_session()._is_postgres or _get_session()._is_bigquery
5532
+ ):
5533
+ replace = expression.Literal.string("") # type: ignore
5534
+
5516
5535
  if replace is not None:
5517
5536
  return Column.invoke_anonymous_function(src, "replace", search, replace)
5518
5537
  else:
@@ -6397,7 +6416,7 @@ def unix_date(col: ColumnOrName) -> Column:
6397
6416
  return Column.invoke_expression_over_column(col, expression.UnixDate)
6398
6417
 
6399
6418
 
6400
- @meta(unsupported_engines="*")
6419
+ @meta()
6401
6420
  def unix_micros(col: ColumnOrName) -> Column:
6402
6421
  """Returns the number of microseconds since 1970-01-01 00:00:00 UTC.
6403
6422
 
@@ -6411,10 +6430,20 @@ def unix_micros(col: ColumnOrName) -> Column:
6411
6430
  [Row(n=1437584400000000)]
6412
6431
  >>> spark.conf.unset("spark.sql.session.timeZone")
6413
6432
  """
6433
+ from sqlframe.base.function_alternatives import unix_micros_multiply_epoch
6434
+
6435
+ if (
6436
+ _get_session()._is_bigquery
6437
+ or _get_session()._is_duckdb
6438
+ or _get_session()._is_postgres
6439
+ or _get_session()._is_snowflake
6440
+ ):
6441
+ return unix_micros_multiply_epoch(col)
6442
+
6414
6443
  return Column.invoke_anonymous_function(col, "unix_micros")
6415
6444
 
6416
6445
 
6417
- @meta(unsupported_engines="*")
6446
+ @meta()
6418
6447
  def unix_millis(col: ColumnOrName) -> Column:
6419
6448
  """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC.
6420
6449
  Truncates higher levels of precision.
@@ -6429,10 +6458,20 @@ def unix_millis(col: ColumnOrName) -> Column:
6429
6458
  [Row(n=1437584400000)]
6430
6459
  >>> spark.conf.unset("spark.sql.session.timeZone")
6431
6460
  """
6461
+ from sqlframe.base.function_alternatives import unix_millis_multiply_epoch
6462
+
6463
+ if (
6464
+ _get_session()._is_bigquery
6465
+ or _get_session()._is_duckdb
6466
+ or _get_session()._is_postgres
6467
+ or _get_session()._is_snowflake
6468
+ ):
6469
+ return unix_millis_multiply_epoch(col)
6470
+
6432
6471
  return Column.invoke_anonymous_function(col, "unix_millis")
6433
6472
 
6434
6473
 
6435
- @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
6474
+ @meta()
6436
6475
  def unix_seconds(col: ColumnOrName) -> Column:
6437
6476
  """Returns the number of seconds since 1970-01-01 00:00:00 UTC.
6438
6477
  Truncates higher levels of precision.
@@ -6447,6 +6486,27 @@ def unix_seconds(col: ColumnOrName) -> Column:
6447
6486
  [Row(n=1437584400)]
6448
6487
  >>> spark.conf.unset("spark.sql.session.timeZone")
6449
6488
  """
6489
+ from sqlframe.base.function_alternatives import unix_seconds_extract_epoch
6490
+
6491
+ if _get_session()._is_postgres:
6492
+ return unix_seconds_extract_epoch(col)
6493
+
6494
+ if _get_session()._is_bigquery:
6495
+ return Column(
6496
+ expression.Anonymous(
6497
+ this="UNIX_SECONDS",
6498
+ expressions=[
6499
+ expression.Anonymous(
6500
+ this="TIMESTAMP",
6501
+ expressions=[
6502
+ Column.ensure_col(col).column_expression,
6503
+ expression.Literal.string("UTC"),
6504
+ ],
6505
+ )
6506
+ ],
6507
+ )
6508
+ )
6509
+
6450
6510
  return Column.invoke_expression_over_column(col, expression.UnixSeconds)
6451
6511
 
6452
6512
 
sqlframe/base/session.py CHANGED
@@ -108,6 +108,11 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGIS
108
108
  if not getattr(self, "schema", None) or schema:
109
109
  self._schema = schema
110
110
 
111
+ # https://github.com/eakmanrq/sqlframe/issues/262
112
+ @property
113
+ def execution_dialect_name(self) -> str:
114
+ return self.execution_dialect.__class__.__name__.lower()
115
+
111
116
  @property
112
117
  def read(self) -> READER:
113
118
  return self._reader(self)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.14.2
3
+ Version: 3.15.1
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,5 +1,5 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=jOmVUgfrjHuKbVMclbnyeOg5hq5CSFD43rU-r3QVgI8,413
2
+ sqlframe/_version.py,sha256=rNfI2qI8EULJid-fGjytQ8KiqfMi0Ktaq6sNSFSM_1s,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
@@ -7,13 +7,13 @@ sqlframe/base/column.py,sha256=wRghgieYAA51aw4WuFQWOvl0TFOToZbBhBuIamEzxx4,18011
7
7
  sqlframe/base/dataframe.py,sha256=E1zWlB_a2FNOxjTcQ68MtL_A4c8fnLiHY3MeZttK4Xk,76570
8
8
  sqlframe/base/decorators.py,sha256=P56cgs8DANxGRIwVs5uOMnDy-BlXZZYMbf4fdnkpWPI,1889
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
- sqlframe/base/function_alternatives.py,sha256=8kDCh1cOXtdCcBPYBQ8byXxRAZvphS9N8GDs4txBzGg,52544
11
- sqlframe/base/functions.py,sha256=8gBaQGUnfbwtJk9sg87HQul8d4Q9lCw3rPU9koYWxE0,218776
10
+ sqlframe/base/function_alternatives.py,sha256=NV31IaEhVYmfUSWetAEFISAvLzs2DxQ7bp-iMNgj0hQ,53786
11
+ sqlframe/base/functions.py,sha256=9mN54Nx6yqos1njfyW2-WRzfFUsA96P9z1ldJVtovSs,220543
12
12
  sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
13
13
  sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
14
14
  sqlframe/base/operations.py,sha256=xSPw74e59wYvNd6U1AlwziNCTG6Aftrbl4SybN9u9VE,3450
15
15
  sqlframe/base/readerwriter.py,sha256=w8926cqIrXF7NGHiINw5UHzP_3xpjsqbijTBTzycBRM,26605
16
- sqlframe/base/session.py,sha256=LwGYgKOymzlX5CKl_vZG-J2j5fkuGO3uPIRKpMqB6MI,26190
16
+ sqlframe/base/session.py,sha256=s9M9_nbtOQQgLyEBZs-ijkMeHkYkILHfBc8JsU2SLmU,26369
17
17
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
18
18
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
19
19
  sqlframe/base/types.py,sha256=iBNk9bpFtb2NBIogYS8i7OlQZMRvpR6XxqzBebsjQDU,12280
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.14.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.14.2.dist-info/METADATA,sha256=jaarma0pQSOhwGo8XtkdteTdJadSB4CIiVrjLLQovu0,8970
134
- sqlframe-3.14.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.14.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.14.2.dist-info/RECORD,,
132
+ sqlframe-3.15.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.15.1.dist-info/METADATA,sha256=-MxovSCoyQnT-6Ujd4BDA_yVpf9KWra2v1CQGN2TmG4,8970
134
+ sqlframe-3.15.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.15.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.15.1.dist-info/RECORD,,