sqlframe 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +31 -13
- {sqlframe-1.6.1.dist-info → sqlframe-1.6.3.dist-info}/METADATA +4 -4
- {sqlframe-1.6.1.dist-info → sqlframe-1.6.3.dist-info}/RECORD +7 -7
- {sqlframe-1.6.1.dist-info → sqlframe-1.6.3.dist-info}/LICENSE +0 -0
- {sqlframe-1.6.1.dist-info → sqlframe-1.6.3.dist-info}/WHEEL +0 -0
- {sqlframe-1.6.1.dist-info → sqlframe-1.6.3.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
|
@@ -17,7 +17,7 @@ import sqlglot
|
|
|
17
17
|
from prettytable import PrettyTable
|
|
18
18
|
from sqlglot import Dialect
|
|
19
19
|
from sqlglot import expressions as exp
|
|
20
|
-
from sqlglot.helper import ensure_list, object_to_dict, seq_get
|
|
20
|
+
from sqlglot.helper import ensure_list, flatten, object_to_dict, seq_get
|
|
21
21
|
from sqlglot.optimizer.pushdown_projections import pushdown_projections
|
|
22
22
|
from sqlglot.optimizer.qualify import qualify
|
|
23
23
|
from sqlglot.optimizer.qualify_columns import quote_identifiers
|
|
@@ -649,11 +649,16 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
649
649
|
columns = self._ensure_and_normalize_cols(cols)
|
|
650
650
|
kwargs["append"] = kwargs.get("append", False)
|
|
651
651
|
if self.expression.args.get("joins"):
|
|
652
|
-
ambiguous_cols =
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
652
|
+
ambiguous_cols: t.List[exp.Column] = list(
|
|
653
|
+
flatten(
|
|
654
|
+
[
|
|
655
|
+
sub_col
|
|
656
|
+
for col in columns
|
|
657
|
+
for sub_col in col.expression.find_all(exp.Column)
|
|
658
|
+
if not sub_col.table
|
|
659
|
+
]
|
|
660
|
+
)
|
|
661
|
+
)
|
|
657
662
|
if ambiguous_cols:
|
|
658
663
|
join_table_identifiers = [
|
|
659
664
|
x.this for x in get_tables_from_expression_with_join(self.expression)
|
|
@@ -662,13 +667,15 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
662
667
|
# If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right
|
|
663
668
|
# and therefore we allow multiple columns with the same name in the result. This matches the behavior
|
|
664
669
|
# of Spark.
|
|
665
|
-
resolved_column_position: t.Dict[Column, int] = {
|
|
670
|
+
resolved_column_position: t.Dict[exp.Column, int] = {
|
|
671
|
+
col.copy(): -1 for col in ambiguous_cols
|
|
672
|
+
}
|
|
666
673
|
for ambiguous_col in ambiguous_cols:
|
|
667
674
|
ctes_with_column = [
|
|
668
675
|
cte
|
|
669
676
|
for cte in self.expression.ctes
|
|
670
677
|
if cte.alias_or_name in cte_names_in_join
|
|
671
|
-
and ambiguous_col.
|
|
678
|
+
and ambiguous_col.alias_or_name in cte.this.named_selects
|
|
672
679
|
]
|
|
673
680
|
# Check if there is a CTE with this column that we haven't used before. If so, use it. Otherwise,
|
|
674
681
|
# use the same CTE we used before
|
|
@@ -677,9 +684,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
677
684
|
resolved_column_position[ambiguous_col] += 1
|
|
678
685
|
else:
|
|
679
686
|
cte = ctes_with_column[resolved_column_position[ambiguous_col]]
|
|
680
|
-
ambiguous_col.
|
|
681
|
-
"table", exp.to_identifier(cte.alias_or_name)
|
|
682
|
-
)
|
|
687
|
+
ambiguous_col.set("table", exp.to_identifier(cte.alias_or_name))
|
|
683
688
|
# If an expression is `CAST(x AS DATETYPE)` then we want to alias so that `x` is the result column name
|
|
684
689
|
columns = [
|
|
685
690
|
col.alias(col.expression.alias_or_name)
|
|
@@ -776,6 +781,8 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
776
781
|
how: str = "inner",
|
|
777
782
|
**kwargs,
|
|
778
783
|
) -> Self:
|
|
784
|
+
from sqlframe.base.functions import coalesce
|
|
785
|
+
|
|
779
786
|
if on is None:
|
|
780
787
|
logger.warning("Got no value for on. This appears change the join to a cross join.")
|
|
781
788
|
how = "cross"
|
|
@@ -835,7 +842,15 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
835
842
|
for left_column, right_column in join_column_pairs
|
|
836
843
|
],
|
|
837
844
|
)
|
|
838
|
-
join_column_names = [
|
|
845
|
+
join_column_names = [
|
|
846
|
+
coalesce(
|
|
847
|
+
left_col.sql(dialect=self.session.input_dialect),
|
|
848
|
+
right_col.sql(dialect=self.session.input_dialect),
|
|
849
|
+
).alias(left_col.alias_or_name)
|
|
850
|
+
if how == "full"
|
|
851
|
+
else left_col.alias_or_name
|
|
852
|
+
for left_col, right_col in join_column_pairs
|
|
853
|
+
]
|
|
839
854
|
# To match spark behavior only the join clause gets deduplicated and it gets put in the front of the column list
|
|
840
855
|
select_column_names = [
|
|
841
856
|
(
|
|
@@ -848,7 +863,10 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
848
863
|
select_column_names = [
|
|
849
864
|
column_name
|
|
850
865
|
for column_name in select_column_names
|
|
851
|
-
if column_name
|
|
866
|
+
if column_name
|
|
867
|
+
not in [
|
|
868
|
+
x.alias_or_name if not isinstance(x, str) else x for x in join_column_names
|
|
869
|
+
]
|
|
852
870
|
]
|
|
853
871
|
select_column_names = join_column_names + select_column_names
|
|
854
872
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlframe
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.3
|
|
4
4
|
Summary: Taking the Spark out of PySpark by converting to SQL
|
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
|
6
6
|
Author: Ryan Eakman
|
|
@@ -18,14 +18,14 @@ Requires-Python: >=3.8
|
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
20
|
Requires-Dist: prettytable (<3.11.0)
|
|
21
|
-
Requires-Dist: sqlglot (<
|
|
21
|
+
Requires-Dist: sqlglot (<25.1,>=24.0.0)
|
|
22
22
|
Provides-Extra: bigquery
|
|
23
23
|
Requires-Dist: google-cloud-bigquery-storage (<3,>=2) ; extra == 'bigquery'
|
|
24
24
|
Requires-Dist: google-cloud-bigquery[pandas] (<4,>=3) ; extra == 'bigquery'
|
|
25
25
|
Provides-Extra: dev
|
|
26
26
|
Requires-Dist: duckdb (<1.1,>=0.9) ; extra == 'dev'
|
|
27
27
|
Requires-Dist: mypy (<1.11,>=1.10.0) ; extra == 'dev'
|
|
28
|
-
Requires-Dist: openai (<1.
|
|
28
|
+
Requires-Dist: openai (<1.33,>=1.30) ; extra == 'dev'
|
|
29
29
|
Requires-Dist: pandas-stubs (<3,>=2) ; extra == 'dev'
|
|
30
30
|
Requires-Dist: pandas (<3,>=2) ; extra == 'dev'
|
|
31
31
|
Requires-Dist: psycopg (<4,>=3.1) ; extra == 'dev'
|
|
@@ -49,7 +49,7 @@ Provides-Extra: duckdb
|
|
|
49
49
|
Requires-Dist: duckdb (<1.1,>=0.9) ; extra == 'duckdb'
|
|
50
50
|
Requires-Dist: pandas (<3,>=2) ; extra == 'duckdb'
|
|
51
51
|
Provides-Extra: openai
|
|
52
|
-
Requires-Dist: openai (<1.
|
|
52
|
+
Requires-Dist: openai (<1.33,>=1.30) ; extra == 'openai'
|
|
53
53
|
Provides-Extra: pandas
|
|
54
54
|
Requires-Dist: pandas (<3,>=2) ; extra == 'pandas'
|
|
55
55
|
Provides-Extra: postgres
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
2
|
+
sqlframe/_version.py,sha256=mQ_8947spH9F9E4bJgRMJ3LZK_sGORi1ak9UVDzTrr8,411
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
|
|
5
5
|
sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
|
|
6
6
|
sqlframe/base/column.py,sha256=0WgIRBfF8Fkbx_OtiaUB1-BsX3qCd4W5IL534Q2BkCA,16137
|
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
|
7
|
+
sqlframe/base/dataframe.py,sha256=uL4neDTMy1a9XJH46YLQryzdDci4iDxNXBtiJOzfHfs,67718
|
|
8
8
|
sqlframe/base/decorators.py,sha256=I5osMgx9BuCgbtp4jVM2DNwYJVLzCv-OtTedhQEik0g,1882
|
|
9
9
|
sqlframe/base/exceptions.py,sha256=pCB9hXX4jxZWzNg3JN1i38cv3BmpUlee5NoLYx3YXIQ,208
|
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=NDXs2igY7PBsStzTSRZvJcCshBOJkPQl2GbhpVFU6To,42931
|
|
@@ -96,8 +96,8 @@ sqlframe/standalone/readwriter.py,sha256=EZNyDJ4ID6sGNog3uP4-e9RvchX4biJJDNtc5hk
|
|
|
96
96
|
sqlframe/standalone/session.py,sha256=wQmdu2sv6KMTAv0LRFk7TY7yzlh3xvmsyqilEtRecbY,1191
|
|
97
97
|
sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
|
|
98
98
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
99
|
-
sqlframe-1.6.
|
|
100
|
-
sqlframe-1.6.
|
|
101
|
-
sqlframe-1.6.
|
|
102
|
-
sqlframe-1.6.
|
|
103
|
-
sqlframe-1.6.
|
|
99
|
+
sqlframe-1.6.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
100
|
+
sqlframe-1.6.3.dist-info/METADATA,sha256=edYCJbKjAmY0rrInh7BRmOhNHFr36bSsXtaPsD9AzFI,7330
|
|
101
|
+
sqlframe-1.6.3.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
102
|
+
sqlframe-1.6.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
103
|
+
sqlframe-1.6.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|