sqlframe 3.43.5__py3-none-any.whl → 3.43.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/__init__.py +4 -0
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +21 -5
- sqlframe/base/functions.py +15 -13
- sqlframe/base/operations.py +1 -1
- sqlframe/base/util.py +1 -1
- sqlframe/databricks/readwriter.py +1 -1
- {sqlframe-3.43.5.dist-info → sqlframe-3.43.7.dist-info}/METADATA +2 -2
- {sqlframe-3.43.5.dist-info → sqlframe-3.43.7.dist-info}/RECORD +12 -12
- {sqlframe-3.43.5.dist-info → sqlframe-3.43.7.dist-info}/LICENSE +0 -0
- {sqlframe-3.43.5.dist-info → sqlframe-3.43.7.dist-info}/WHEEL +0 -0
- {sqlframe-3.43.5.dist-info → sqlframe-3.43.7.dist-info}/top_level.txt +0 -0
sqlframe/__init__.py
CHANGED
|
@@ -6,6 +6,10 @@ import typing as t
|
|
|
6
6
|
from contextlib import contextmanager
|
|
7
7
|
from unittest.mock import MagicMock
|
|
8
8
|
|
|
9
|
+
from sqlglot.dialects import spark
|
|
10
|
+
|
|
11
|
+
spark.Spark.Generator.readparquet_sql = lambda self, x: x.sql(dialect="duckdb") # type: ignore
|
|
12
|
+
|
|
9
13
|
if t.TYPE_CHECKING:
|
|
10
14
|
from sqlframe.base.session import CONN
|
|
11
15
|
|
sqlframe/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '3.43.
|
|
32
|
-
__version_tuple__ = version_tuple = (3, 43,
|
|
31
|
+
__version__ = version = '3.43.7'
|
|
32
|
+
__version_tuple__ = version_tuple = (3, 43, 7)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g745c96966'
|
sqlframe/base/dataframe.py
CHANGED
|
@@ -361,15 +361,31 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
361
361
|
|
|
362
362
|
def _replace_cte_names_with_hashes(self, expression: exp.Select):
|
|
363
363
|
replacement_mapping = {}
|
|
364
|
-
|
|
364
|
+
seen_hashes: t.Dict[str, exp.Identifier] = {}
|
|
365
|
+
cte_indices_to_remove = []
|
|
366
|
+
|
|
367
|
+
for i, cte in enumerate(expression.ctes):
|
|
365
368
|
old_name_id = cte.args["alias"].this
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
369
|
+
cte_hash = self._create_hash_from_expression(cte.this)
|
|
370
|
+
|
|
371
|
+
if cte_hash in seen_hashes:
|
|
372
|
+
# Duplicate CTE found - map its old name to the existing hash
|
|
373
|
+
replacement_mapping[old_name_id] = seen_hashes[cte_hash]
|
|
374
|
+
cte_indices_to_remove.append(i)
|
|
375
|
+
else:
|
|
376
|
+
# New unique CTE - process normally
|
|
377
|
+
new_hashed_id = exp.to_identifier(cte_hash, quoted=old_name_id.args["quoted"])
|
|
378
|
+
seen_hashes[cte_hash] = new_hashed_id
|
|
379
|
+
replacement_mapping[old_name_id] = new_hashed_id
|
|
380
|
+
|
|
370
381
|
expression = expression.transform(replace_id_value, replacement_mapping).assert_is(
|
|
371
382
|
exp.Select
|
|
372
383
|
)
|
|
384
|
+
|
|
385
|
+
# Remove duplicate CTEs by index in reverse order to avoid index shifting
|
|
386
|
+
for idx in reversed(cte_indices_to_remove):
|
|
387
|
+
del expression.args["with"].expressions[idx]
|
|
388
|
+
|
|
373
389
|
return expression
|
|
374
390
|
|
|
375
391
|
def _create_cte_from_expression(
|
sqlframe/base/functions.py
CHANGED
|
@@ -385,9 +385,9 @@ def tan(col: ColumnOrName) -> Column:
|
|
|
385
385
|
return Column.invoke_expression_over_column(col, expression.Tan)
|
|
386
386
|
|
|
387
387
|
|
|
388
|
-
@meta(
|
|
388
|
+
@meta()
|
|
389
389
|
def tanh(col: ColumnOrName) -> Column:
|
|
390
|
-
return Column.
|
|
390
|
+
return Column.invoke_expression_over_column(col, expression.Tanh)
|
|
391
391
|
|
|
392
392
|
|
|
393
393
|
@meta()
|
|
@@ -414,7 +414,7 @@ def radians(col: ColumnOrName) -> Column:
|
|
|
414
414
|
if session._is_bigquery:
|
|
415
415
|
return radians_bgutil(col)
|
|
416
416
|
|
|
417
|
-
return Column.
|
|
417
|
+
return Column.invoke_expression_over_column(col, expression.Radians)
|
|
418
418
|
|
|
419
419
|
|
|
420
420
|
toRadians = radians
|
|
@@ -1078,7 +1078,7 @@ def hour(col: ColumnOrName) -> Column:
|
|
|
1078
1078
|
if session._is_bigquery or session._is_postgres:
|
|
1079
1079
|
return hour_from_extract(col)
|
|
1080
1080
|
|
|
1081
|
-
return Column.
|
|
1081
|
+
return Column.invoke_expression_over_column(col, expression.Hour)
|
|
1082
1082
|
|
|
1083
1083
|
|
|
1084
1084
|
@meta()
|
|
@@ -1090,7 +1090,7 @@ def minute(col: ColumnOrName) -> Column:
|
|
|
1090
1090
|
if session._is_bigquery or session._is_postgres:
|
|
1091
1091
|
return minute_from_extract(col)
|
|
1092
1092
|
|
|
1093
|
-
return Column.
|
|
1093
|
+
return Column.invoke_expression_over_column(col, expression.Minute)
|
|
1094
1094
|
|
|
1095
1095
|
|
|
1096
1096
|
@meta()
|
|
@@ -1102,7 +1102,7 @@ def second(col: ColumnOrName) -> Column:
|
|
|
1102
1102
|
if session._is_bigquery or session._is_postgres:
|
|
1103
1103
|
return second_from_extract(col)
|
|
1104
1104
|
|
|
1105
|
-
return Column.
|
|
1105
|
+
return Column.invoke_expression_over_column(col, expression.Second)
|
|
1106
1106
|
|
|
1107
1107
|
|
|
1108
1108
|
@meta()
|
|
@@ -1398,7 +1398,7 @@ def next_day(col: ColumnOrName, dayOfWeek: str) -> Column:
|
|
|
1398
1398
|
if session._is_bigquery:
|
|
1399
1399
|
return next_day_bgutil(col, dayOfWeek)
|
|
1400
1400
|
|
|
1401
|
-
return Column.
|
|
1401
|
+
return Column.invoke_expression_over_column(col, expression.NextDay, expression=lit(dayOfWeek))
|
|
1402
1402
|
|
|
1403
1403
|
|
|
1404
1404
|
@meta()
|
|
@@ -2160,7 +2160,7 @@ def bit_count(col: ColumnOrName) -> Column:
|
|
|
2160
2160
|
if session._is_duckdb:
|
|
2161
2161
|
return Column.invoke_anonymous_function(col, "BIT_COUNT")
|
|
2162
2162
|
|
|
2163
|
-
return Column.invoke_expression_over_column(col, expression.
|
|
2163
|
+
return Column.invoke_expression_over_column(col, expression.BitwiseCount)
|
|
2164
2164
|
|
|
2165
2165
|
|
|
2166
2166
|
@meta(unsupported_engines="*")
|
|
@@ -4108,7 +4108,7 @@ def left(str: ColumnOrName, len: ColumnOrName) -> Column:
|
|
|
4108
4108
|
return Column.invoke_expression_over_column(str, expression.Left, expression=len)
|
|
4109
4109
|
|
|
4110
4110
|
|
|
4111
|
-
@meta(unsupported_engines="
|
|
4111
|
+
@meta(unsupported_engines="bigquery")
|
|
4112
4112
|
def like(
|
|
4113
4113
|
str: ColumnOrName, pattern: ColumnOrName, escapeChar: t.Optional["Column"] = None
|
|
4114
4114
|
) -> Column:
|
|
@@ -4886,7 +4886,7 @@ def parse_url(
|
|
|
4886
4886
|
)
|
|
4887
4887
|
|
|
4888
4888
|
|
|
4889
|
-
@meta(unsupported_engines="
|
|
4889
|
+
@meta(unsupported_engines=["bigquery", "snowflake"])
|
|
4890
4890
|
def pi() -> Column:
|
|
4891
4891
|
"""Returns Pi.
|
|
4892
4892
|
|
|
@@ -4901,7 +4901,7 @@ def pi() -> Column:
|
|
|
4901
4901
|
|3.141592653589793|
|
|
4902
4902
|
+-----------------+
|
|
4903
4903
|
"""
|
|
4904
|
-
return Column.
|
|
4904
|
+
return Column.invoke_expression_over_column(None, expression.Pi)
|
|
4905
4905
|
|
|
4906
4906
|
|
|
4907
4907
|
@meta(unsupported_engines="*")
|
|
@@ -6805,7 +6805,7 @@ def weekday(col: ColumnOrName) -> Column:
|
|
|
6805
6805
|
return Column.invoke_anonymous_function(col, "weekday")
|
|
6806
6806
|
|
|
6807
6807
|
|
|
6808
|
-
@meta(unsupported_engines="
|
|
6808
|
+
@meta(unsupported_engines=["bigquery", "duckdb", "postgres"])
|
|
6809
6809
|
def width_bucket(
|
|
6810
6810
|
v: ColumnOrName,
|
|
6811
6811
|
min: ColumnOrName,
|
|
@@ -6854,7 +6854,9 @@ def width_bucket(
|
|
|
6854
6854
|
+----------------------------+
|
|
6855
6855
|
"""
|
|
6856
6856
|
numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
|
|
6857
|
-
return Column.
|
|
6857
|
+
return Column.invoke_expression_over_column(
|
|
6858
|
+
v, expression.WidthBucket, min_value=min, max_value=max, num_buckets=numBucket
|
|
6859
|
+
)
|
|
6858
6860
|
|
|
6859
6861
|
|
|
6860
6862
|
@meta(unsupported_engines=["*", "spark"])
|
sqlframe/base/operations.py
CHANGED
|
@@ -78,7 +78,7 @@ def operation(
|
|
|
78
78
|
|
|
79
79
|
# Here decorate a function (self: _BaseGroupedData[DF], *args, **kwargs) -> DF
|
|
80
80
|
# Hence we work with t.Callable[Concatenate[_BaseGroupedData[DF], P], DF]
|
|
81
|
-
# We simplify the parameters, as Pyright (used for VSCode autocomplete) doesn't
|
|
81
|
+
# We simplify the parameters, as Pyright (used for VSCode autocomplete) doesn't understand this
|
|
82
82
|
def group_operation(
|
|
83
83
|
op: Operation,
|
|
84
84
|
) -> t.Callable[[t.Callable[P, DF]], t.Callable[P, DF]]:
|
sqlframe/base/util.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 3.43.
|
|
3
|
+
Version: 3.43.7
|
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
|
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: more-itertools
|
|
20
20
|
Requires-Dist: prettytable <4
|
|
21
|
-
Requires-Dist: sqlglot <27.
|
|
21
|
+
Requires-Dist: sqlglot <27.29,>=24.0.0
|
|
22
22
|
Requires-Dist: typing-extensions
|
|
23
23
|
Provides-Extra: bigquery
|
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
sqlframe/__init__.py,sha256=
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
1
|
+
sqlframe/__init__.py,sha256=Et338oqN6tgrUNzSHpaIyjTiXcXS9lze7qeLFYdRNVc,3536
|
|
2
|
+
sqlframe/_version.py,sha256=rxxVqq9tM144VQj_9aHUEqtWXVIJXvf2oSYA3-oytsk,714
|
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
|
8
|
-
sqlframe/base/dataframe.py,sha256=
|
|
8
|
+
sqlframe/base/dataframe.py,sha256=iKdiJ9OnMEbL0GAwydP9hQz0XsTKs1mpKi_6ajrCZ9I,90286
|
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
|
12
|
-
sqlframe/base/functions.py,sha256=
|
|
12
|
+
sqlframe/base/functions.py,sha256=FtX9TAoST0Z_OrIlS9bOJ3V--lGmDE8o8rAEiNIL-5Q,229808
|
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
|
14
14
|
sqlframe/base/normalize.py,sha256=YPeopWr8ZRjevArYfrM-DZBkQp4t4UfAEwynoj4VvcU,11773
|
|
15
|
-
sqlframe/base/operations.py,sha256=
|
|
15
|
+
sqlframe/base/operations.py,sha256=v8BA80eDKBOOpfHB_Zxsdi2RrDkSl1-_gtA-GuwO8qY,4438
|
|
16
16
|
sqlframe/base/readerwriter.py,sha256=b1CZgOZv-8h0sC3PWqPVAwAwlDMjpmRys6FGhugKspU,31391
|
|
17
17
|
sqlframe/base/session.py,sha256=jMm18v5MYW7Z61pXda-yd-WNYYwnYU9i2jIaT8gKSvA,27687
|
|
18
18
|
sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
|
|
19
19
|
sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
|
|
20
20
|
sqlframe/base/types.py,sha256=OktuJ5f7tEogOW0oupI0RBlHfzZMmKh7zGLke9cwllo,12305
|
|
21
21
|
sqlframe/base/udf.py,sha256=O6hMhBUy9NVv-mhJRtfFhXTIa_-Z8Y_FkmmuOHu0l90,1117
|
|
22
|
-
sqlframe/base/util.py,sha256=
|
|
22
|
+
sqlframe/base/util.py,sha256=THc2Xpa3oo-cKohFFQ1KFTMn8WSrSD58sTjNMAtr7wM,19783
|
|
23
23
|
sqlframe/base/window.py,sha256=7NaKDTlhun-95LEghukBCjFBwq0RHrPaajWQNCsLxok,4818
|
|
24
24
|
sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
|
|
@@ -46,7 +46,7 @@ sqlframe/databricks/dataframe.py,sha256=8kwT1kWU2TwGjR9zDrGdmkvabiBCivA_Mcg06r2X
|
|
|
46
46
|
sqlframe/databricks/functions.py,sha256=La8rjAwO0hD4FBO0QxW5CtZtFAPvOrVc6lG4OtPGgbc,336
|
|
47
47
|
sqlframe/databricks/functions.pyi,sha256=FzVBpzXCJzxIp73sIAo_R8Wx8uOJrix-W12HsgyeTcQ,23799
|
|
48
48
|
sqlframe/databricks/group.py,sha256=dU3g0DVLRlfOSCamKchQFXRd1WTFbdxoXkpEX8tPD6Y,399
|
|
49
|
-
sqlframe/databricks/readwriter.py,sha256=
|
|
49
|
+
sqlframe/databricks/readwriter.py,sha256=RHwhfnYXC8LOCE_WSKufIB5CvHWeSiaKg5DUHjfGDTY,14521
|
|
50
50
|
sqlframe/databricks/session.py,sha256=i2CgrLIHJb53Cx1qu_rE1-cmmm19S-Sw1MhTISX1zYU,4013
|
|
51
51
|
sqlframe/databricks/table.py,sha256=Q0Vnrl5aUqnqFTQpTwfWMRyQ9AQnagtpnSnXmP6IKRs,678
|
|
52
52
|
sqlframe/databricks/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
|
133
|
-
sqlframe-3.43.
|
|
134
|
-
sqlframe-3.43.
|
|
135
|
-
sqlframe-3.43.
|
|
136
|
-
sqlframe-3.43.
|
|
137
|
-
sqlframe-3.43.
|
|
133
|
+
sqlframe-3.43.7.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
134
|
+
sqlframe-3.43.7.dist-info/METADATA,sha256=pOTnxd_hgmoNGSbRzFF-4fWMETe2sq8VKRL3_HhMg0o,9070
|
|
135
|
+
sqlframe-3.43.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
136
|
+
sqlframe-3.43.7.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
137
|
+
sqlframe-3.43.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|