sqlframe 3.14.0__py3-none-any.whl → 3.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.14.0'
16
- __version_tuple__ = version_tuple = (3, 14, 0)
15
+ __version__ = version = '3.14.1'
16
+ __version_tuple__ = version_tuple = (3, 14, 1)
@@ -79,6 +79,23 @@ JOIN_HINTS = {
79
79
  "SHUFFLE_REPLICATE_NL",
80
80
  }
81
81
 
82
+ JOIN_TYPE_MAPPING = {
83
+ "inner": "inner",
84
+ "cross": "cross",
85
+ "outer": "full_outer",
86
+ "full": "full_outer",
87
+ "fullouter": "full_outer",
88
+ "left": "left_outer",
89
+ "leftouter": "left_outer",
90
+ "right": "right_outer",
91
+ "rightouter": "right_outer",
92
+ "semi": "left_semi",
93
+ "leftsemi": "left_semi",
94
+ "left_semi": "left_semi",
95
+ "anti": "left_anti",
96
+ "leftanti": "left_anti",
97
+ "left_anti": "left_anti",
98
+ }
82
99
 
83
100
  DF = t.TypeVar("DF", bound="BaseDataFrame")
84
101
 
@@ -944,16 +961,20 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
944
961
  ) -> Self:
945
962
  from sqlframe.base.functions import coalesce
946
963
 
947
- if on is None:
964
+ if (on is None) and ("cross" not in how):
948
965
  logger.warning("Got no value for on. This appears to change the join to a cross join.")
949
966
  how = "cross"
967
+ if (on is not None) and ("cross" in how):
968
+ # Not a lot of doc, but Spark handles cross with predicate as an inner join
969
+ # https://learn.microsoft.com/en-us/dotnet/api/microsoft.spark.sql.dataframe.join
970
+ logger.warning("Got cross join with an 'on' value. This will result in an inner join.")
971
+ how = "inner"
950
972
 
951
973
  other_df = other_df._convert_leaf_to_cte()
952
974
  join_expression = self._add_ctes_to_expression(self.expression, other_df.expression.ctes)
953
975
  # We will determine actual "join on" expression later so we don't provide it at first
954
- join_expression = join_expression.join(
955
- join_expression.ctes[-1].alias, join_type=how.replace("_", " ")
956
- )
976
+ join_type = JOIN_TYPE_MAPPING.get(how, how).replace("_", " ")
977
+ join_expression = join_expression.join(join_expression.ctes[-1].alias, join_type=join_type)
957
978
  self_columns = self._get_outer_select_columns(join_expression)
958
979
  other_columns = self._get_outer_select_columns(other_df.expression)
959
980
  join_columns = self._ensure_and_normalize_cols(on)
@@ -961,7 +982,12 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
961
982
 
962
983
  # Determines the join clause and select columns to be used passed on what type of columns were provided for
963
984
  # the join. The columns returned changes based on how the on expression is provided.
964
- if how != "cross":
985
+ select_columns = (
986
+ self_columns
987
+ if join_type in ["left anti", "left semi"]
988
+ else self_columns + other_columns
989
+ )
990
+ if join_type != "cross":
965
991
  if isinstance(join_columns[0].expression, exp.Column):
966
992
  """
967
993
  Unique characteristics of join on column names only:
@@ -992,7 +1018,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
992
1018
  if not isinstance(column.expression.this, exp.Star)
993
1019
  else column.sql()
994
1020
  )
995
- for column in self_columns + other_columns
1021
+ for column in select_columns
996
1022
  ]
997
1023
  select_column_names = [
998
1024
  column_name
@@ -1010,13 +1036,11 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1010
1036
  * The left join dataframe columns go first and right come after. No sort preference is given to join columns
1011
1037
  """
1012
1038
  join_clause = self._normalize_join_clause(join_columns, join_expression)
1013
- select_column_names = [
1014
- column.alias_or_name for column in self_columns + other_columns
1015
- ]
1039
+ select_column_names = [column.alias_or_name for column in select_columns]
1016
1040
 
1017
1041
  # Update the on expression with the actual join clause to replace the dummy one from before
1018
1042
  else:
1019
- select_column_names = [column.alias_or_name for column in self_columns + other_columns]
1043
+ select_column_names = [column.alias_or_name for column in select_columns]
1020
1044
  join_clause = None
1021
1045
  join_expression.args["joins"][-1].set("on", join_clause.expression if join_clause else None)
1022
1046
  new_df = self.copy(expression=join_expression)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.14.0
3
+ Version: 3.14.1
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,13 +17,13 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <26.3,>=24.0.0
20
+ Requires-Dist: sqlglot <26.4,>=24.0.0
21
21
  Requires-Dist: typing-extensions
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
24
24
  Requires-Dist: google-cloud-bigquery[pandas] <4,>=3 ; extra == 'bigquery'
25
25
  Provides-Extra: databricks
26
- Requires-Dist: databricks-sql-connector <4,>=3.6 ; extra == 'databricks'
26
+ Requires-Dist: databricks-sql-connector <5,>=3.6 ; extra == 'databricks'
27
27
  Provides-Extra: dev
28
28
  Requires-Dist: duckdb <1.2,>=0.9 ; extra == 'dev'
29
29
  Requires-Dist: findspark <3,>=2 ; extra == 'dev'
@@ -59,7 +59,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
59
59
  Provides-Extra: redshift
60
60
  Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
61
61
  Provides-Extra: snowflake
62
- Requires-Dist: snowflake-connector-python[secure-local-storage] <3.13,>=3.10.0 ; extra == 'snowflake'
62
+ Requires-Dist: snowflake-connector-python[secure-local-storage] <3.14,>=3.10.0 ; extra == 'snowflake'
63
63
  Provides-Extra: spark
64
64
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
65
65
 
@@ -1,10 +1,10 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=Ipjekae6alpGZC2b94mJAE2S2ZyJybTBe3oNCWsIFS4,413
2
+ sqlframe/_version.py,sha256=TK0msIUEaLkvUHhRM_fUlCaK-1zpNU289vecbm9mQbk,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=wRghgieYAA51aw4WuFQWOvl0TFOToZbBhBuIamEzxx4,18011
7
- sqlframe/base/dataframe.py,sha256=DuvAT_xBqhiOVZgyYCXL5J01ahHEPp_qvx_62uHqbu4,75768
7
+ sqlframe/base/dataframe.py,sha256=7PMHXTaYDpXXLUE4wLaEUS9mCQnuspZI3MAIGO08pzk,76716
8
8
  sqlframe/base/decorators.py,sha256=P56cgs8DANxGRIwVs5uOMnDy-BlXZZYMbf4fdnkpWPI,1889
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=8kDCh1cOXtdCcBPYBQ8byXxRAZvphS9N8GDs4txBzGg,52544
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.14.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.14.0.dist-info/METADATA,sha256=Gvp37AedPVOp_1Rh4qf5B4s8fkReVysUqCySQesSl6s,8970
134
- sqlframe-3.14.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.14.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.14.0.dist-info/RECORD,,
132
+ sqlframe-3.14.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.14.1.dist-info/METADATA,sha256=35g-WMlgVgbqNTvaWacRcdKRlCbprzZj390VrP0___w,8970
134
+ sqlframe-3.14.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.14.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.14.1.dist-info/RECORD,,