sqlframe 3.43.5__py3-none-any.whl → 3.43.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/__init__.py CHANGED
@@ -6,6 +6,10 @@ import typing as t
6
6
  from contextlib import contextmanager
7
7
  from unittest.mock import MagicMock
8
8
 
9
+ from sqlglot.dialects import spark
10
+
11
+ spark.Spark.Generator.readparquet_sql = lambda self, x: x.sql(dialect="duckdb") # type: ignore
12
+
9
13
  if t.TYPE_CHECKING:
10
14
  from sqlframe.base.session import CONN
11
15
 
sqlframe/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.43.5'
32
- __version_tuple__ = version_tuple = (3, 43, 5)
31
+ __version__ = version = '3.43.7'
32
+ __version_tuple__ = version_tuple = (3, 43, 7)
33
33
 
34
- __commit_id__ = commit_id = 'ge9449259f'
34
+ __commit_id__ = commit_id = 'g745c96966'
@@ -361,15 +361,31 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
361
361
 
362
362
  def _replace_cte_names_with_hashes(self, expression: exp.Select):
363
363
  replacement_mapping = {}
364
- for cte in expression.ctes:
364
+ seen_hashes: t.Dict[str, exp.Identifier] = {}
365
+ cte_indices_to_remove = []
366
+
367
+ for i, cte in enumerate(expression.ctes):
365
368
  old_name_id = cte.args["alias"].this
366
- new_hashed_id = exp.to_identifier(
367
- self._create_hash_from_expression(cte.this), quoted=old_name_id.args["quoted"]
368
- )
369
- replacement_mapping[old_name_id] = new_hashed_id
369
+ cte_hash = self._create_hash_from_expression(cte.this)
370
+
371
+ if cte_hash in seen_hashes:
372
+ # Duplicate CTE found - map its old name to the existing hash
373
+ replacement_mapping[old_name_id] = seen_hashes[cte_hash]
374
+ cte_indices_to_remove.append(i)
375
+ else:
376
+ # New unique CTE - process normally
377
+ new_hashed_id = exp.to_identifier(cte_hash, quoted=old_name_id.args["quoted"])
378
+ seen_hashes[cte_hash] = new_hashed_id
379
+ replacement_mapping[old_name_id] = new_hashed_id
380
+
370
381
  expression = expression.transform(replace_id_value, replacement_mapping).assert_is(
371
382
  exp.Select
372
383
  )
384
+
385
+ # Remove duplicate CTEs by index in reverse order to avoid index shifting
386
+ for idx in reversed(cte_indices_to_remove):
387
+ del expression.args["with"].expressions[idx]
388
+
373
389
  return expression
374
390
 
375
391
  def _create_cte_from_expression(
@@ -385,9 +385,9 @@ def tan(col: ColumnOrName) -> Column:
385
385
  return Column.invoke_expression_over_column(col, expression.Tan)
386
386
 
387
387
 
388
- @meta(unsupported_engines="duckdb")
388
+ @meta()
389
389
  def tanh(col: ColumnOrName) -> Column:
390
- return Column.invoke_anonymous_function(col, "TANH")
390
+ return Column.invoke_expression_over_column(col, expression.Tanh)
391
391
 
392
392
 
393
393
  @meta()
@@ -414,7 +414,7 @@ def radians(col: ColumnOrName) -> Column:
414
414
  if session._is_bigquery:
415
415
  return radians_bgutil(col)
416
416
 
417
- return Column.invoke_anonymous_function(col, "RADIANS")
417
+ return Column.invoke_expression_over_column(col, expression.Radians)
418
418
 
419
419
 
420
420
  toRadians = radians
@@ -1078,7 +1078,7 @@ def hour(col: ColumnOrName) -> Column:
1078
1078
  if session._is_bigquery or session._is_postgres:
1079
1079
  return hour_from_extract(col)
1080
1080
 
1081
- return Column.invoke_anonymous_function(col, "HOUR")
1081
+ return Column.invoke_expression_over_column(col, expression.Hour)
1082
1082
 
1083
1083
 
1084
1084
  @meta()
@@ -1090,7 +1090,7 @@ def minute(col: ColumnOrName) -> Column:
1090
1090
  if session._is_bigquery or session._is_postgres:
1091
1091
  return minute_from_extract(col)
1092
1092
 
1093
- return Column.invoke_anonymous_function(col, "MINUTE")
1093
+ return Column.invoke_expression_over_column(col, expression.Minute)
1094
1094
 
1095
1095
 
1096
1096
  @meta()
@@ -1102,7 +1102,7 @@ def second(col: ColumnOrName) -> Column:
1102
1102
  if session._is_bigquery or session._is_postgres:
1103
1103
  return second_from_extract(col)
1104
1104
 
1105
- return Column.invoke_anonymous_function(col, "SECOND")
1105
+ return Column.invoke_expression_over_column(col, expression.Second)
1106
1106
 
1107
1107
 
1108
1108
  @meta()
@@ -1398,7 +1398,7 @@ def next_day(col: ColumnOrName, dayOfWeek: str) -> Column:
1398
1398
  if session._is_bigquery:
1399
1399
  return next_day_bgutil(col, dayOfWeek)
1400
1400
 
1401
- return Column.invoke_anonymous_function(col, "NEXT_DAY", lit(dayOfWeek))
1401
+ return Column.invoke_expression_over_column(col, expression.NextDay, expression=lit(dayOfWeek))
1402
1402
 
1403
1403
 
1404
1404
  @meta()
@@ -2160,7 +2160,7 @@ def bit_count(col: ColumnOrName) -> Column:
2160
2160
  if session._is_duckdb:
2161
2161
  return Column.invoke_anonymous_function(col, "BIT_COUNT")
2162
2162
 
2163
- return Column.invoke_expression_over_column(col, expression.BitwiseCountAgg)
2163
+ return Column.invoke_expression_over_column(col, expression.BitwiseCount)
2164
2164
 
2165
2165
 
2166
2166
  @meta(unsupported_engines="*")
@@ -4108,7 +4108,7 @@ def left(str: ColumnOrName, len: ColumnOrName) -> Column:
4108
4108
  return Column.invoke_expression_over_column(str, expression.Left, expression=len)
4109
4109
 
4110
4110
 
4111
- @meta(unsupported_engines="*")
4111
+ @meta(unsupported_engines="bigquery")
4112
4112
  def like(
4113
4113
  str: ColumnOrName, pattern: ColumnOrName, escapeChar: t.Optional["Column"] = None
4114
4114
  ) -> Column:
@@ -4886,7 +4886,7 @@ def parse_url(
4886
4886
  )
4887
4887
 
4888
4888
 
4889
- @meta(unsupported_engines="*")
4889
+ @meta(unsupported_engines=["bigquery", "snowflake"])
4890
4890
  def pi() -> Column:
4891
4891
  """Returns Pi.
4892
4892
 
@@ -4901,7 +4901,7 @@ def pi() -> Column:
4901
4901
  |3.141592653589793|
4902
4902
  +-----------------+
4903
4903
  """
4904
- return Column.invoke_anonymous_function(None, "pi")
4904
+ return Column.invoke_expression_over_column(None, expression.Pi)
4905
4905
 
4906
4906
 
4907
4907
  @meta(unsupported_engines="*")
@@ -6805,7 +6805,7 @@ def weekday(col: ColumnOrName) -> Column:
6805
6805
  return Column.invoke_anonymous_function(col, "weekday")
6806
6806
 
6807
6807
 
6808
- @meta(unsupported_engines="*")
6808
+ @meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
6809
6809
  def width_bucket(
6810
6810
  v: ColumnOrName,
6811
6811
  min: ColumnOrName,
@@ -6854,7 +6854,9 @@ def width_bucket(
6854
6854
  +----------------------------+
6855
6855
  """
6856
6856
  numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
6857
- return Column.invoke_anonymous_function(v, "width_bucket", min, max, numBucket)
6857
+ return Column.invoke_expression_over_column(
6858
+ v, expression.WidthBucket, min_value=min, max_value=max, num_buckets=numBucket
6859
+ )
6858
6860
 
6859
6861
 
6860
6862
  @meta(unsupported_engines=["*", "spark"])
@@ -78,7 +78,7 @@ def operation(
78
78
 
79
79
  # Here decorate a function (self: _BaseGroupedData[DF], *args, **kwargs) -> DF
80
80
  # Hence we work with t.Callable[Concatenate[_BaseGroupedData[DF], P], DF]
81
- # We simplify the parameters, as Pyright (used for VSCode autocomplete) doesn't unterstand this
81
+ # We simplify the parameters, as Pyright (used for VSCode autocomplete) doesn't understand this
82
82
  def group_operation(
83
83
  op: Operation,
84
84
  ) -> t.Callable[[t.Callable[P, DF]], t.Callable[P, DF]]:
sqlframe/base/util.py CHANGED
@@ -239,7 +239,7 @@ def soundex(s):
239
239
  result = [s[0]]
240
240
  count = 1
241
241
 
242
- # find would-be replacment for first character
242
+ # find would-be replacement for first character
243
243
  for lset, sub in replacements:
244
244
  if s[0] in lset:
245
245
  last = sub
@@ -231,7 +231,7 @@ class DatabricksDataFrameWriter(
231
231
  elif mode == "ignore":
232
232
  pass
233
233
  else:
234
- raise RuntimeError(f"Unssuported mode: {mode}")
234
+ raise RuntimeError(f"Unsupported mode: {mode}")
235
235
 
236
236
  def insertInto(
237
237
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.43.5
3
+ Version: 3.43.7
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: more-itertools
20
20
  Requires-Dist: prettytable <4
21
- Requires-Dist: sqlglot <27.26,>=24.0.0
21
+ Requires-Dist: sqlglot <27.29,>=24.0.0
22
22
  Requires-Dist: typing-extensions
23
23
  Provides-Extra: bigquery
24
24
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
@@ -1,25 +1,25 @@
1
- sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
2
- sqlframe/_version.py,sha256=IvY9cEQS0mR8Z_blqYVt_qrlrvrqKlUu84D9RGXXjQI,714
1
+ sqlframe/__init__.py,sha256=Et338oqN6tgrUNzSHpaIyjTiXcXS9lze7qeLFYdRNVc,3536
2
+ sqlframe/_version.py,sha256=rxxVqq9tM144VQj_9aHUEqtWXVIJXvf2oSYA3-oytsk,714
3
3
  sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
4
4
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
6
6
  sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
7
7
  sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
8
- sqlframe/base/dataframe.py,sha256=3vlZij84GUKpS23DZSrTRm0mi5SRFjZv_BSn4rAJ0IE,89614
8
+ sqlframe/base/dataframe.py,sha256=iKdiJ9OnMEbL0GAwydP9hQz0XsTKs1mpKi_6ajrCZ9I,90286
9
9
  sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
10
10
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
11
11
  sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
12
- sqlframe/base/functions.py,sha256=6w-uUadya_Tih20uNW21M-UMQ2iM7VPgvZwIT-yn6Zg,229620
12
+ sqlframe/base/functions.py,sha256=FtX9TAoST0Z_OrIlS9bOJ3V--lGmDE8o8rAEiNIL-5Q,229808
13
13
  sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
14
14
  sqlframe/base/normalize.py,sha256=YPeopWr8ZRjevArYfrM-DZBkQp4t4UfAEwynoj4VvcU,11773
15
- sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
15
+ sqlframe/base/operations.py,sha256=v8BA80eDKBOOpfHB_Zxsdi2RrDkSl1-_gtA-GuwO8qY,4438
16
16
  sqlframe/base/readerwriter.py,sha256=b1CZgOZv-8h0sC3PWqPVAwAwlDMjpmRys6FGhugKspU,31391
17
17
  sqlframe/base/session.py,sha256=jMm18v5MYW7Z61pXda-yd-WNYYwnYU9i2jIaT8gKSvA,27687
18
18
  sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
19
19
  sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
20
20
  sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
21
21
  sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
22
- sqlframe/base/util.py,sha256=11rBF_GBFXGBCllSdlWWWo8EiZZATJn4me3u7OUNIFg,19782
22
+ sqlframe/base/util.py,sha256=THc2Xpa3oo-cKohFFQ1KFTMn8WSrSD58sTjNMAtr7wM,19783
23
23
  sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
24
24
  sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
@@ -46,7 +46,7 @@ sqlframe/databricks/dataframe.py,sha256=8kwT1kWU2TwGjR9zDrGdmkvabiBCivA_Mcg06r2X
46
46
  sqlframe/databricks/functions.py,sha256=La8rjAwO0hD4FBO0QxW5CtZtFAPvOrVc6lG4OtPGgbc,336
47
47
  sqlframe/databricks/functions.pyi,sha256=FzVBpzXCJzxIp73sIAo_R8Wx8uOJrix-W12HsgyeTcQ,23799
48
48
  sqlframe/databricks/group.py,sha256=dU3g0DVLRlfOSCamKchQFXRd1WTFbdxoXkpEX8tPD6Y,399
49
- sqlframe/databricks/readwriter.py,sha256=u2-0j_gXB4JikMxLBzUWhJZhJ5tYbGJpIGTqnWuDKqk,14521
49
+ sqlframe/databricks/readwriter.py,sha256=RHwhfnYXC8LOCE_WSKufIB5CvHWeSiaKg5DUHjfGDTY,14521
50
50
  sqlframe/databricks/session.py,sha256=i2CgrLIHJb53Cx1qu_rE1-cmmm19S-Sw1MhTISX1zYU,4013
51
51
  sqlframe/databricks/table.py,sha256=Q0Vnrl5aUqnqFTQpTwfWMRyQ9AQnagtpnSnXmP6IKRs,678
52
52
  sqlframe/databricks/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
130
130
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
131
131
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
132
132
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
133
- sqlframe-3.43.5.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
- sqlframe-3.43.5.dist-info/METADATA,sha256=30lHlixHE3YTNzzWqPsYLh00DzthRRTU3KbM96TE8Cc,9070
135
- sqlframe-3.43.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
- sqlframe-3.43.5.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
- sqlframe-3.43.5.dist-info/RECORD,,
133
+ sqlframe-3.43.7.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
134
+ sqlframe-3.43.7.dist-info/METADATA,sha256=pOTnxd_hgmoNGSbRzFF-4fWMETe2sq8VKRL3_HhMg0o,9070
135
+ sqlframe-3.43.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
136
+ sqlframe-3.43.7.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
137
+ sqlframe-3.43.7.dist-info/RECORD,,