sqlframe 3.9.0__py3-none-any.whl → 3.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlframe/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.9.0'
16
- __version_tuple__ = version_tuple = (3, 9, 0)
15
+ __version__ = version = '3.9.2'
16
+ __version_tuple__ = version_tuple = (3, 9, 2)
@@ -12,6 +12,7 @@ import typing as t
12
12
  import zlib
13
13
  from copy import copy
14
14
  from dataclasses import dataclass
15
+ from uuid import uuid4
15
16
 
16
17
  import sqlglot
17
18
  from prettytable import PrettyTable
@@ -208,6 +209,8 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
208
209
  expression: exp.Select,
209
210
  branch_id: t.Optional[str] = None,
210
211
  sequence_id: t.Optional[str] = None,
212
+ join_on_uuid: t.Optional[str] = None,
213
+ known_uuids: t.Optional[t.Set[str]] = None,
211
214
  last_op: Operation = Operation.INIT,
212
215
  pending_hints: t.Optional[t.List[exp.Expression]] = None,
213
216
  output_expression_container: t.Optional[OutputExpressionContainer] = None,
@@ -217,6 +220,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
217
220
  self.expression: exp.Select = expression
218
221
  self.branch_id = branch_id or self.session._random_branch_id
219
222
  self.sequence_id = sequence_id or self.session._random_sequence_id
223
+ self.join_on_uuid = join_on_uuid or str(uuid4())
224
+ self.known_uuids = known_uuids or set()
225
+ self.known_uuids.add(self.join_on_uuid)
220
226
  self.last_op = last_op
221
227
  self.pending_hints = pending_hints or []
222
228
  self.output_expression_container = output_expression_container or exp.Select()
@@ -228,10 +234,12 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
228
234
  def __getitem__(self, column_name: str) -> Column:
229
235
  from sqlframe.base.util import get_func_from_session
230
236
 
231
- col = get_func_from_session("col", self.session)
237
+ col_func = get_func_from_session("col", self.session)
232
238
 
233
239
  column_name = f"{self.branch_id}.{column_name}"
234
- return col(column_name)
240
+ col = col_func(column_name)
241
+ col.expression.meta["join_on_uuid"] = self.join_on_uuid
242
+ return col
235
243
 
236
244
  def __copy__(self):
237
245
  return self.copy()
@@ -715,6 +723,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
715
723
  return ";\n".join(results)
716
724
 
717
725
  def copy(self, **kwargs) -> Self:
726
+ kwargs["join_on_uuid"] = str(uuid4())
718
727
  return self.__class__(**object_to_dict(self, **kwargs))
719
728
 
720
729
  @operation(Operation.SELECT)
@@ -866,6 +875,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
866
875
  if on is None:
867
876
  logger.warning("Got no value for on. This appears to change the join to a cross join.")
868
877
  how = "cross"
878
+
869
879
  other_df = other_df._convert_leaf_to_cte()
870
880
  join_expression = self._add_ctes_to_expression(self.expression, other_df.expression.ctes)
871
881
  # We will determine actual "join on" expression later so we don't provide it at first
@@ -875,6 +885,21 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
875
885
  self_columns = self._get_outer_select_columns(join_expression)
876
886
  other_columns = self._get_outer_select_columns(other_df.expression)
877
887
  join_columns = self._ensure_and_normalize_cols(on)
888
+ # If the two dataframes being joined come from the same branch, we then check if they have any columns that
889
+ # were created using the "branch_id" (df["column_name"]). If so, we know that we need to differentiate
890
+ # the two columns since they would end up with the same table name. We do this by checking for the unique
891
+ # uuids in the other df and finding columns that have metadata on them that match the uuids. If so, we know
892
+ # it comes from the other df and we change the table name to the other df's table name.
893
+ # See `test_self_join` for an example of this.
894
+ if self.branch_id == other_df.branch_id:
895
+ other_df_unique_uuids = other_df.known_uuids - self.known_uuids
896
+ for col in join_columns:
897
+ for col_expr in col.expression.find_all(exp.Column):
898
+ if (
899
+ "join_on_uuid" in col_expr.meta
900
+ and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
901
+ ):
902
+ col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
878
903
  # Determines the join clause and select columns to be used passed on what type of columns were provided for
879
904
  # the join. The columns returned changes based on how the on expression is provided.
880
905
  if how != "cross":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlframe
3
- Version: 3.9.0
3
+ Version: 3.9.2
4
4
  Summary: Turning PySpark Into a Universal DataFrame API
5
5
  Home-page: https://github.com/eakmanrq/sqlframe
6
6
  Author: Ryan Eakman
@@ -1,10 +1,10 @@
1
1
  sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
2
- sqlframe/_version.py,sha256=nlCEABnIq3wuDiPbHxDLhorQ-m5w3H6kBSUgZhHE6gc,411
2
+ sqlframe/_version.py,sha256=QJm9ayY7R0okky0MsvD6a8gDu2IP6eTPk4n5rk7LAGs,411
3
3
  sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
5
5
  sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
6
6
  sqlframe/base/column.py,sha256=06fhVZ2nCn2QLxnfjdK-oYKeTFJC_smgSxu7u2UYlVg,17878
7
- sqlframe/base/dataframe.py,sha256=DtSeTMNdvfF7ItAIIOoZQlsW4J-GZKmmx3-pz7T9e90,72924
7
+ sqlframe/base/dataframe.py,sha256=ICW9eJElRsVIRutuu2aVJmP9k1n4oi6MfcLR0IrsBIs,74454
8
8
  sqlframe/base/decorators.py,sha256=Jy4bf8MhZ-AJ6CWTj59bBJRqamtLbPC0USUMFrY6g0w,449
9
9
  sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
10
10
  sqlframe/base/function_alternatives.py,sha256=jofb2-nweefqcjUsd4xVqfRmJSZ-T_0Iq5roW2pL0OA,50768
@@ -119,8 +119,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
119
119
  sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
120
120
  sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
121
121
  sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
122
- sqlframe-3.9.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
123
- sqlframe-3.9.0.dist-info/METADATA,sha256=AKqgRmEJB00qxx_FXfzKHeFS4346nx_W3i6jP62o7mo,9142
124
- sqlframe-3.9.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
125
- sqlframe-3.9.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
126
- sqlframe-3.9.0.dist-info/RECORD,,
122
+ sqlframe-3.9.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
123
+ sqlframe-3.9.2.dist-info/METADATA,sha256=5j2ptOPa6jrnQxNPvl9qxXqIeloc-tT4AxY32cf9CRc,9142
124
+ sqlframe-3.9.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
125
+ sqlframe-3.9.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
126
+ sqlframe-3.9.2.dist-info/RECORD,,