sqlframe 3.4.0__py3-none-any.whl → 3.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/column.py +4 -1
- sqlframe/base/dataframe.py +33 -9
- sqlframe/base/function_alternatives.py +2 -2
- sqlframe/base/functions.py +6 -4
- sqlframe/duckdb/functions.pyi +1 -0
- sqlframe/snowflake/functions.pyi +1 -0
- {sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/METADATA +6 -6
- {sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/RECORD +12 -12
- {sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/LICENSE +0 -0
- {sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/WHEEL +0 -0
- {sqlframe-3.4.0.dist-info → sqlframe-3.5.0.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/column.py
CHANGED
|
@@ -338,6 +338,9 @@ class Column:
|
|
|
338
338
|
new_expression = exp.Not(this=exp.Is(this=self.column_expression, expression=exp.Null()))
|
|
339
339
|
return Column(new_expression)
|
|
340
340
|
|
|
341
|
+
def eqNullSafe(self, other: ColumnOrLiteral) -> Column:
|
|
342
|
+
return self.binary_op(exp.NullSafeEQ, other)
|
|
343
|
+
|
|
341
344
|
def cast(
|
|
342
345
|
self,
|
|
343
346
|
dataType: t.Union[str, DataType, exp.DataType, exp.DataType.Type],
|
|
@@ -385,7 +388,7 @@ class Column:
|
|
|
385
388
|
|
|
386
389
|
def isin(self, *cols: t.Union[ColumnOrLiteral, t.Iterable[ColumnOrLiteral]]):
|
|
387
390
|
columns = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols # type: ignore
|
|
388
|
-
expressions = [self._lit(x).expression for x in columns]
|
|
391
|
+
expressions = [self._lit(x).expression for x in columns] # type: ignore
|
|
389
392
|
return Column.invoke_expression_over_column(self, exp.In, expressions=expressions) # type: ignore
|
|
390
393
|
|
|
391
394
|
def between(
|
sqlframe/base/dataframe.py
CHANGED
|
@@ -15,7 +15,7 @@ from dataclasses import dataclass
|
|
|
15
15
|
|
|
16
16
|
import sqlglot
|
|
17
17
|
from prettytable import PrettyTable
|
|
18
|
-
from sqlglot import Dialect
|
|
18
|
+
from sqlglot import Dialect, maybe_parse
|
|
19
19
|
from sqlglot import expressions as exp
|
|
20
20
|
from sqlglot import lineage as sqlglot_lineage
|
|
21
21
|
from sqlglot.helper import ensure_list, flatten, object_to_dict, seq_get
|
|
@@ -460,16 +460,40 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
460
460
|
df.expression.ctes[-1].set("cache_storage_level", storage_level)
|
|
461
461
|
return df
|
|
462
462
|
|
|
463
|
-
|
|
464
|
-
def _add_ctes_to_expression(cls, expression: exp.Select, ctes: t.List[exp.CTE]) -> exp.Select:
|
|
463
|
+
def _add_ctes_to_expression(self, expression: exp.Select, ctes: t.List[exp.CTE]) -> exp.Select:
|
|
465
464
|
expression = expression.copy()
|
|
466
465
|
with_expression = expression.args.get("with")
|
|
467
466
|
if with_expression:
|
|
468
467
|
existing_ctes = with_expression.expressions
|
|
469
|
-
|
|
468
|
+
existing_cte_counts = {x.alias_or_name: 0 for x in existing_ctes}
|
|
469
|
+
replaced_cte_names = {} # type: ignore
|
|
470
470
|
for cte in ctes:
|
|
471
|
-
if
|
|
472
|
-
|
|
471
|
+
if replaced_cte_names:
|
|
472
|
+
cte = cte.transform(replace_id_value, replaced_cte_names) # type: ignore
|
|
473
|
+
if cte.alias_or_name in existing_cte_counts:
|
|
474
|
+
existing_cte_counts[cte.alias_or_name] += 10
|
|
475
|
+
cte.set(
|
|
476
|
+
"this",
|
|
477
|
+
cte.this.where(
|
|
478
|
+
exp.EQ(
|
|
479
|
+
this=exp.Literal.number(existing_cte_counts[cte.alias_or_name]),
|
|
480
|
+
expression=exp.Literal.number(
|
|
481
|
+
existing_cte_counts[cte.alias_or_name]
|
|
482
|
+
),
|
|
483
|
+
)
|
|
484
|
+
),
|
|
485
|
+
)
|
|
486
|
+
new_cte_alias = self._create_hash_from_expression(cte.this)
|
|
487
|
+
replaced_cte_names[cte.args["alias"].this] = maybe_parse(
|
|
488
|
+
new_cte_alias, dialect=self.session.input_dialect, into=exp.Identifier
|
|
489
|
+
)
|
|
490
|
+
cte.set(
|
|
491
|
+
"alias",
|
|
492
|
+
maybe_parse(
|
|
493
|
+
new_cte_alias, dialect=self.session.input_dialect, into=exp.TableAlias
|
|
494
|
+
),
|
|
495
|
+
)
|
|
496
|
+
existing_ctes.append(cte)
|
|
473
497
|
else:
|
|
474
498
|
existing_ctes = ctes
|
|
475
499
|
expression.set("with", exp.With(expressions=existing_ctes))
|
|
@@ -843,11 +867,11 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
843
867
|
logger.warning("Got no value for on. This appears to change the join to a cross join.")
|
|
844
868
|
how = "cross"
|
|
845
869
|
other_df = other_df._convert_leaf_to_cte()
|
|
870
|
+
join_expression = self._add_ctes_to_expression(self.expression, other_df.expression.ctes)
|
|
846
871
|
# We will determine actual "join on" expression later so we don't provide it at first
|
|
847
|
-
join_expression =
|
|
848
|
-
|
|
872
|
+
join_expression = join_expression.join(
|
|
873
|
+
join_expression.ctes[-1].alias, join_type=how.replace("_", " ")
|
|
849
874
|
)
|
|
850
|
-
join_expression = self._add_ctes_to_expression(join_expression, other_df.expression.ctes)
|
|
851
875
|
self_columns = self._get_outer_select_columns(join_expression)
|
|
852
876
|
other_columns = self._get_outer_select_columns(other_df.expression)
|
|
853
877
|
join_columns = self._ensure_and_normalize_cols(on)
|
|
@@ -1320,8 +1320,8 @@ def flatten_using_array_flatten(col: ColumnOrName) -> Column:
|
|
|
1320
1320
|
def map_concat_using_map_cat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
1321
1321
|
columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
|
|
1322
1322
|
if len(columns) == 1:
|
|
1323
|
-
return Column.invoke_anonymous_function(columns[0], "MAP_CAT")
|
|
1324
|
-
return Column.invoke_anonymous_function(columns[0], "MAP_CAT", *columns[1:])
|
|
1323
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CAT") # type: ignore
|
|
1324
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CAT", *columns[1:]) # type: ignore
|
|
1325
1325
|
|
|
1326
1326
|
|
|
1327
1327
|
def sequence_from_generate_series(
|
sqlframe/base/functions.py
CHANGED
|
@@ -1652,8 +1652,8 @@ def arrays_zip(*cols: ColumnOrName) -> Column:
|
|
|
1652
1652
|
def map_concat(*cols: t.Union[ColumnOrName, t.Iterable[ColumnOrName]]) -> Column:
|
|
1653
1653
|
columns = list(flatten(cols)) if not isinstance(cols[0], (str, Column)) else cols # type: ignore
|
|
1654
1654
|
if len(columns) == 1:
|
|
1655
|
-
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT")
|
|
1656
|
-
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT", *columns[1:])
|
|
1655
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT") # type: ignore
|
|
1656
|
+
return Column.invoke_anonymous_function(columns[0], "MAP_CONCAT", *columns[1:]) # type: ignore
|
|
1657
1657
|
|
|
1658
1658
|
|
|
1659
1659
|
@meta(unsupported_engines="postgres")
|
|
@@ -4697,7 +4697,7 @@ def spark_partition_id() -> Column:
|
|
|
4697
4697
|
return Column.invoke_anonymous_function(None, "spark_partition_id")
|
|
4698
4698
|
|
|
4699
4699
|
|
|
4700
|
-
@meta(unsupported_engines="
|
|
4700
|
+
@meta(unsupported_engines=["bigquery", "postgres"])
|
|
4701
4701
|
def split_part(src: ColumnOrName, delimiter: ColumnOrName, partNum: ColumnOrName) -> Column:
|
|
4702
4702
|
"""
|
|
4703
4703
|
Splits `str` by delimiter and return requested part of the split (1-based).
|
|
@@ -4723,7 +4723,9 @@ def split_part(src: ColumnOrName, delimiter: ColumnOrName, partNum: ColumnOrName
|
|
|
4723
4723
|
>>> df.select(split_part(df.a, df.b, df.c).alias('r')).collect()
|
|
4724
4724
|
[Row(r='13')]
|
|
4725
4725
|
"""
|
|
4726
|
-
return Column.
|
|
4726
|
+
return Column.invoke_expression_over_column(
|
|
4727
|
+
src, expression.SplitPart, delimiter=delimiter, part_index=partNum
|
|
4728
|
+
)
|
|
4727
4729
|
|
|
4728
4730
|
|
|
4729
4731
|
@meta()
|
sqlframe/duckdb/functions.pyi
CHANGED
sqlframe/snowflake/functions.pyi
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.0
|
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -18,7 +18,7 @@ Requires-Python: >=3.8
|
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: prettytable (<3.11.1)
|
|
21
|
-
Requires-Dist: sqlglot (<25.
|
|
21
|
+
Requires-Dist: sqlglot (<25.26,>=24.0.0)
|
|
22
22
|
Requires-Dist: typing-extensions (<5,>=4.8)
|
|
23
23
|
Provides-Extra: bigquery
|
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
|
|
@@ -26,8 +26,8 @@ Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
|
|
|
26
26
|
Provides-Extra: dev
|
|
27
27
|
Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'dev'
|
|
28
28
|
Requires-Dist: findspark (<3,>=2) ; extra == 'dev'
|
|
29
|
-
Requires-Dist: mypy (<1.
|
|
30
|
-
Requires-Dist: openai (<1.
|
|
29
|
+
Requires-Dist: mypy (<1.13,>=1.10.0) ; extra == 'dev'
|
|
30
|
+
Requires-Dist: openai (<1.53,>=1.30) ; extra == 'dev'
|
|
31
31
|
Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
|
|
32
32
|
Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
|
|
33
33
|
Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
|
|
@@ -36,7 +36,7 @@ Requires-Dist: pyspark (<3.6,>=2) ; extra == 'dev'
|
|
|
36
36
|
Requires-Dist: pytest-postgresql (<7,>=6) ; extra == 'dev'
|
|
37
37
|
Requires-Dist: pytest-xdist (<3.7,>=3.6) ; extra == 'dev'
|
|
38
38
|
Requires-Dist: pytest (<8.4,>=8.2.0) ; extra == 'dev'
|
|
39
|
-
Requires-Dist: ruff (<0.
|
|
39
|
+
Requires-Dist: ruff (<0.8,>=0.4.4) ; extra == 'dev'
|
|
40
40
|
Requires-Dist: types-psycopg2 (<3,>=2.9) ; extra == 'dev'
|
|
41
41
|
Requires-Dist: pre-commit (>=3.5) ; (python_version == "3.8") and extra == 'dev'
|
|
42
42
|
Requires-Dist: pre-commit (<4.1,>=3.7) ; (python_version >= "3.9") and extra == 'dev'
|
|
@@ -50,7 +50,7 @@ Provides-Extra: duckdb
|
|
|
50
50
|
Requires-Dist: duckdb (<1.2,>=0.9) ; extra == 'duckdb'
|
|
51
51
|
Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
|
|
52
52
|
Provides-Extra: openai
|
|
53
|
-
Requires-Dist: openai (<1.
|
|
53
|
+
Requires-Dist: openai (<1.53,>=1.30) ; extra == 'openai'
|
|
54
54
|
Provides-Extra: pandas
|
|
55
55
|
Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
|
|
56
56
|
Provides-Extra: postgres
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
sqlframe/__init__.py,sha256=E3qCJ4PSEgKz6Hg3ves6LWt3JrQOV8c9HVLSIUOzKNc,3106
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
2
|
+
sqlframe/_version.py,sha256=wy34mXzQ8fLJu7i4fZikKwCDGQODEviQb-OrdMe9F4Q,411
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
|
6
|
-
sqlframe/base/column.py,sha256=
|
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
|
6
|
+
sqlframe/base/column.py,sha256=06fhVZ2nCn2QLxnfjdK-oYKeTFJC_smgSxu7u2UYlVg,17878
|
|
7
|
+
sqlframe/base/dataframe.py,sha256=coeUwntwYbT1g6YKVwk3ZfWMfJqAzd1ECYabBSsNsV0,72892
|
|
8
8
|
sqlframe/base/decorators.py,sha256=Jy4bf8MhZ-AJ6CWTj59bBJRqamtLbPC0USUMFrY6g0w,449
|
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
|
10
|
-
sqlframe/base/function_alternatives.py,sha256=
|
|
11
|
-
sqlframe/base/functions.py,sha256=
|
|
10
|
+
sqlframe/base/function_alternatives.py,sha256=Cqt-OjyWewCXZJ8PV-2vc8yrDG2ennLPm6yWaxWVXRc,50746
|
|
11
|
+
sqlframe/base/functions.py,sha256=JDVULt3WI1cwWYboU8ybenIXZPdKSNtnzg7A4xC1Lao,189978
|
|
12
12
|
sqlframe/base/group.py,sha256=fsyG5990_Pd7gFPjTFrH9IEoAquL_wEkVpIlBAIkZJU,4091
|
|
13
13
|
sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
|
|
14
14
|
sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3456
|
|
@@ -40,7 +40,7 @@ sqlframe/duckdb/catalog.py,sha256=YYYVmetLUaJOdObKw4AJ7L0P-msshkta4xHlcZQ9zEA,47
|
|
|
40
40
|
sqlframe/duckdb/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
|
41
41
|
sqlframe/duckdb/dataframe.py,sha256=HZg_uMAz4RsubZJT4-MslUQS_0-InF0_P5Yq5HyJ3wE,1708
|
|
42
42
|
sqlframe/duckdb/functions.py,sha256=Ee8o6YFtRdEiq0jNLXxgu5lcbc7Tsg0-lK6oRyxdcjo,1920
|
|
43
|
-
sqlframe/duckdb/functions.pyi,sha256=
|
|
43
|
+
sqlframe/duckdb/functions.pyi,sha256=bWfQl7Cm1eecI39LJAyyRcC4z7epDJ-h9JOozPsEc34,5879
|
|
44
44
|
sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
|
|
45
45
|
sqlframe/duckdb/readwriter.py,sha256=iY0Wsms35gymilETOhAQGQCHfH8LCj9_uY8aCdF77ds,4806
|
|
46
46
|
sqlframe/duckdb/session.py,sha256=b5IrKbTkYUVNQGSG2EJPNV9MTdJw4onN-9aMrskjxck,2721
|
|
@@ -75,7 +75,7 @@ sqlframe/snowflake/catalog.py,sha256=Tp_B9oMTInnqLTiZNsT0UNc-a-aKXu3zFThlCwLeKCA
|
|
|
75
75
|
sqlframe/snowflake/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
|
76
76
|
sqlframe/snowflake/dataframe.py,sha256=ANDZ5Dpxz-WTlTtmKNcv-nJlzTeTsAOK0X0gR2euPqQ,2144
|
|
77
77
|
sqlframe/snowflake/functions.py,sha256=iX67xU8Czy5k3HiNjNbN-x-via6yqGNaMPw1maV9HPs,2580
|
|
78
|
-
sqlframe/snowflake/functions.pyi,sha256=
|
|
78
|
+
sqlframe/snowflake/functions.pyi,sha256=anVqp9rhkkD6uTTiQYGbScFed3dp_uRcyPwq1JnRmRI,6237
|
|
79
79
|
sqlframe/snowflake/group.py,sha256=pPP1l2RRo_LgkXrji8a87n2PKo-63ZRPT-WUtvVcBME,395
|
|
80
80
|
sqlframe/snowflake/readwriter.py,sha256=yhRc2HcMq6PwV3ghZWC-q-qaE7LE4aEjZEXCip4OOlQ,884
|
|
81
81
|
sqlframe/snowflake/session.py,sha256=fnFYrJ9JxoOf4ZKTrQcSKjwr6wNp6A85cMqKczOi3vA,3320
|
|
@@ -107,8 +107,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
|
107
107
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
108
108
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
|
109
109
|
sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
|
|
110
|
-
sqlframe-3.
|
|
111
|
-
sqlframe-3.
|
|
112
|
-
sqlframe-3.
|
|
113
|
-
sqlframe-3.
|
|
114
|
-
sqlframe-3.
|
|
110
|
+
sqlframe-3.5.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
111
|
+
sqlframe-3.5.0.dist-info/METADATA,sha256=vH1Trx574zbKeuhU0oPb0Q7dxgGvHqaONgNiO3tcbkk,8639
|
|
112
|
+
sqlframe-3.5.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
113
|
+
sqlframe-3.5.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
114
|
+
sqlframe-3.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|