sqlframe 3.14.0__py3-none-any.whl → 3.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.14.0'
16
- __version_tuple__ = version_tuple = (3, 14, 0)
15
+ __version__ = version = '3.14.2'
16
+ __version_tuple__ = version_tuple = (3, 14, 2)
@@ -9,6 +9,7 @@ import json
9
9
  import logging
10
10
  import sys
11
11
  import typing as t
12
+ import uuid
12
13
  import zlib
13
14
  from copy import copy
14
15
  from dataclasses import dataclass
@@ -79,6 +80,23 @@ JOIN_HINTS = {
79
80
  "SHUFFLE_REPLICATE_NL",
80
81
  }
81
82
 
83
+ JOIN_TYPE_MAPPING = {
84
+ "inner": "inner",
85
+ "cross": "cross",
86
+ "outer": "full_outer",
87
+ "full": "full_outer",
88
+ "fullouter": "full_outer",
89
+ "left": "left_outer",
90
+ "leftouter": "left_outer",
91
+ "right": "right_outer",
92
+ "rightouter": "right_outer",
93
+ "semi": "left_semi",
94
+ "leftsemi": "left_semi",
95
+ "left_semi": "left_semi",
96
+ "anti": "left_anti",
97
+ "leftanti": "left_anti",
98
+ "left_anti": "left_anti",
99
+ }
82
100
 
83
101
  DF = t.TypeVar("DF", bound="BaseDataFrame")
84
102
 
@@ -474,22 +492,20 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
474
492
  with_expression = expression.args.get("with")
475
493
  if with_expression:
476
494
  existing_ctes = with_expression.expressions
477
- existing_cte_counts = {x.alias_or_name: 0 for x in existing_ctes}
495
+ existing_cte_names = {x.alias_or_name for x in existing_ctes}
478
496
  replaced_cte_names = {} # type: ignore
479
497
  for cte in ctes:
480
498
  if replaced_cte_names:
481
499
  cte = cte.transform(replace_id_value, replaced_cte_names) # type: ignore
482
- if cte.alias_or_name in existing_cte_counts:
483
- existing_cte_counts[cte.alias_or_name] += 10
500
+ if cte.alias_or_name in existing_cte_names:
501
+ random_filter = exp.Literal.string(uuid.uuid4().hex)
484
502
  # Add unique where filter to ensure that the hash of the CTE is unique
485
503
  cte.set(
486
504
  "this",
487
505
  cte.this.where(
488
506
  exp.EQ(
489
- this=exp.Literal.number(existing_cte_counts[cte.alias_or_name]),
490
- expression=exp.Literal.number(
491
- existing_cte_counts[cte.alias_or_name]
492
- ),
507
+ this=random_filter,
508
+ expression=random_filter,
493
509
  )
494
510
  ),
495
511
  )
@@ -503,7 +519,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
503
519
  new_cte_alias, dialect=self.session.input_dialect, into=exp.TableAlias
504
520
  ),
505
521
  )
506
- existing_cte_counts[new_cte_alias] = 0
522
+ existing_cte_names.add(new_cte_alias)
507
523
  existing_ctes.append(cte)
508
524
  else:
509
525
  existing_ctes = ctes
@@ -944,16 +960,20 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
944
960
  ) -> Self:
945
961
  from sqlframe.base.functions import coalesce
946
962
 
947
- if on is None:
963
+ if (on is None) and ("cross" not in how):
948
964
  logger.warning("Got no value for on. This appears to change the join to a cross join.")
949
965
  how = "cross"
966
+ if (on is not None) and ("cross" in how):
967
+ # Not a lot of doc, but Spark handles cross with predicate as an inner join
968
+ # https://learn.microsoft.com/en-us/dotnet/api/microsoft.spark.sql.dataframe.join
969
+ logger.warning("Got cross join with an 'on' value. This will result in an inner join.")
970
+ how = "inner"
950
971
 
951
972
  other_df = other_df._convert_leaf_to_cte()
952
973
  join_expression = self._add_ctes_to_expression(self.expression, other_df.expression.ctes)
953
974
  # We will determine actual "join on" expression later so we don't provide it at first
954
- join_expression = join_expression.join(
955
- join_expression.ctes[-1].alias, join_type=how.replace("_", " ")
956
- )
975
+ join_type = JOIN_TYPE_MAPPING.get(how, how).replace("_", " ")
976
+ join_expression = join_expression.join(join_expression.ctes[-1].alias, join_type=join_type)
957
977
  self_columns = self._get_outer_select_columns(join_expression)
958
978
  other_columns = self._get_outer_select_columns(other_df.expression)
959
979
  join_columns = self._ensure_and_normalize_cols(on)
@@ -961,7 +981,12 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
961
981
 
962
982
  # Determines the join clause and select columns to be used passed on what type of columns were provided for
963
983
  # the join. The columns returned changes based on how the on expression is provided.
964
- if how != "cross":
984
+ select_columns = (
985
+ self_columns
986
+ if join_type in ["left anti", "left semi"]
987
+ else self_columns + other_columns
988
+ )
989
+ if join_type != "cross":
965
990
  if isinstance(join_columns[0].expression, exp.Column):
966
991
  """
967
992
  Unique characteristics of join on column names only:
@@ -992,7 +1017,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
992
1017
  if not isinstance(column.expression.this, exp.Star)
993
1018
  else column.sql()
994
1019
  )
995
- for column in self_columns + other_columns
1020
+ for column in select_columns
996
1021
  ]
997
1022
  select_column_names = [
998
1023
  column_name
@@ -1010,13 +1035,11 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
1010
1035
  * The left join dataframe columns go first and right come after. No sort preference is given to join columns
1011
1036
  """
1012
1037
  join_clause = self._normalize_join_clause(join_columns, join_expression)
1013
- select_column_names = [
1014
- column.alias_or_name for column in self_columns + other_columns
1015
- ]
1038
+ select_column_names = [column.alias_or_name for column in select_columns]
1016
1039
 
1017
1040
  # Update the on expression with the actual join clause to replace the dummy one from before
1018
1041
  else:
1019
- select_column_names = [column.alias_or_name for column in self_columns + other_columns]
1042
+ select_column_names = [column.alias_or_name for column in select_columns]
1020
1043
  join_clause = None
1021
1044
  join_expression.args["joins"][-1].set("on", join_clause.expression if join_clause else None)
1022
1045
  new_df = self.copy(expression=join_expression)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.14.0
3
+ Version: 3.14.2
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -17,13 +17,13 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: prettytable <4
20
- Requires-Dist: sqlglot <26.3,>=24.0.0
20
+ Requires-Dist: sqlglot <26.4,>=24.0.0
21
21
  Requires-Dist: typing-extensions
22
22
  Provides-Extra: bigquery
23
23
  Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
24
24
  Requires-Dist: google-cloud-bigquery[pandas] <4,>=3 ; extra == 'bigquery'
25
25
  Provides-Extra: databricks
26
- Requires-Dist: databricks-sql-connector <4,>=3.6 ; extra == 'databricks'
26
+ Requires-Dist: databricks-sql-connector <5,>=3.6 ; extra == 'databricks'
27
27
  Provides-Extra: dev
28
28
  Requires-Dist: duckdb <1.2,>=0.9 ; extra == 'dev'
29
29
  Requires-Dist: findspark <3,>=2 ; extra == 'dev'
@@ -59,7 +59,7 @@ Requires-Dist: psycopg2 <3,>=2.8 ; extra == 'postgres'
59
59
  Provides-Extra: redshift
60
60
  Requires-Dist: redshift-connector <2.2.0,>=2.1.1 ; extra == 'redshift'
61
61
  Provides-Extra: snowflake
62
- Requires-Dist: snowflake-connector-python[secure-local-storage] <3.13,>=3.10.0 ; extra == 'snowflake'
62
+ Requires-Dist: snowflake-connector-python[secure-local-storage] <3.14,>=3.10.0 ; extra == 'snowflake'
63
63
  Provides-Extra: spark
64
64
  Requires-Dist: pyspark <3.6,>=2 ; extra == 'spark'
65
65
 
@@ -1,10 +1,10 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=Ipjekae6alpGZC2b94mJAE2S2ZyJybTBe3oNCWsIFS4,413
2
+ sqlframe/_version.py,sha256=jOmVUgfrjHuKbVMclbnyeOg5hq5CSFD43rU-r3QVgI8,413
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=wRghgieYAA51aw4WuFQWOvl0TFOToZbBhBuIamEzxx4,18011
7
- sqlframe/base/dataframe.py,sha256=DuvAT_xBqhiOVZgyYCXL5J01ahHEPp_qvx_62uHqbu4,75768
7
+ sqlframe/base/dataframe.py,sha256=E1zWlB_a2FNOxjTcQ68MtL_A4c8fnLiHY3MeZttK4Xk,76570
8
8
  sqlframe/base/decorators.py,sha256=P56cgs8DANxGRIwVs5uOMnDy-BlXZZYMbf4fdnkpWPI,1889
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=8kDCh1cOXtdCcBPYBQ8byXxRAZvphS9N8GDs4txBzGg,52544
@@ -129,8 +129,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
129
129
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
130
130
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
131
131
  sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
132
- sqlframe-3.14.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
- sqlframe-3.14.0.dist-info/METADATA,sha256=Gvp37AedPVOp_1Rh4qf5B4s8fkReVysUqCySQesSl6s,8970
134
- sqlframe-3.14.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
- sqlframe-3.14.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
- sqlframe-3.14.0.dist-info/RECORD,,
132
+ sqlframe-3.14.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
133
+ sqlframe-3.14.2.dist-info/METADATA,sha256=jaarma0pQSOhwGo8XtkdteTdJadSB4CIiVrjLLQovu0,8970
134
+ sqlframe-3.14.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
135
+ sqlframe-3.14.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
136
+ sqlframe-3.14.2.dist-info/RECORD,,