sqlframe 3.9.1__py3-none-any.whl → 3.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +2 -2
- sqlframe/base/dataframe.py +24 -7
- {sqlframe-3.9.1.dist-info → sqlframe-3.9.2.dist-info}/METADATA +1 -1
- {sqlframe-3.9.1.dist-info → sqlframe-3.9.2.dist-info}/RECORD +7 -7
- {sqlframe-3.9.1.dist-info → sqlframe-3.9.2.dist-info}/LICENSE +0 -0
- {sqlframe-3.9.1.dist-info → sqlframe-3.9.2.dist-info}/WHEEL +0 -0
- {sqlframe-3.9.1.dist-info → sqlframe-3.9.2.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
sqlframe/base/dataframe.py
CHANGED
|
@@ -12,6 +12,7 @@ import typing as t
|
|
|
12
12
|
import zlib
|
|
13
13
|
from copy import copy
|
|
14
14
|
from dataclasses import dataclass
|
|
15
|
+
from uuid import uuid4
|
|
15
16
|
|
|
16
17
|
import sqlglot
|
|
17
18
|
from prettytable import PrettyTable
|
|
@@ -208,6 +209,8 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
208
209
|
expression: exp.Select,
|
|
209
210
|
branch_id: t.Optional[str] = None,
|
|
210
211
|
sequence_id: t.Optional[str] = None,
|
|
212
|
+
join_on_uuid: t.Optional[str] = None,
|
|
213
|
+
known_uuids: t.Optional[t.Set[str]] = None,
|
|
211
214
|
last_op: Operation = Operation.INIT,
|
|
212
215
|
pending_hints: t.Optional[t.List[exp.Expression]] = None,
|
|
213
216
|
output_expression_container: t.Optional[OutputExpressionContainer] = None,
|
|
@@ -217,6 +220,9 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
217
220
|
self.expression: exp.Select = expression
|
|
218
221
|
self.branch_id = branch_id or self.session._random_branch_id
|
|
219
222
|
self.sequence_id = sequence_id or self.session._random_sequence_id
|
|
223
|
+
self.join_on_uuid = join_on_uuid or str(uuid4())
|
|
224
|
+
self.known_uuids = known_uuids or set()
|
|
225
|
+
self.known_uuids.add(self.join_on_uuid)
|
|
220
226
|
self.last_op = last_op
|
|
221
227
|
self.pending_hints = pending_hints or []
|
|
222
228
|
self.output_expression_container = output_expression_container or exp.Select()
|
|
@@ -228,10 +234,12 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
228
234
|
def __getitem__(self, column_name: str) -> Column:
|
|
229
235
|
from sqlframe.base.util import get_func_from_session
|
|
230
236
|
|
|
231
|
-
|
|
237
|
+
col_func = get_func_from_session("col", self.session)
|
|
232
238
|
|
|
233
239
|
column_name = f"{self.branch_id}.{column_name}"
|
|
234
|
-
|
|
240
|
+
col = col_func(column_name)
|
|
241
|
+
col.expression.meta["join_on_uuid"] = self.join_on_uuid
|
|
242
|
+
return col
|
|
235
243
|
|
|
236
244
|
def __copy__(self):
|
|
237
245
|
return self.copy()
|
|
@@ -715,6 +723,7 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
715
723
|
return ";\n".join(results)
|
|
716
724
|
|
|
717
725
|
def copy(self, **kwargs) -> Self:
|
|
726
|
+
kwargs["join_on_uuid"] = str(uuid4())
|
|
718
727
|
return self.__class__(**object_to_dict(self, **kwargs))
|
|
719
728
|
|
|
720
729
|
@operation(Operation.SELECT)
|
|
@@ -876,13 +885,21 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
|
876
885
|
self_columns = self._get_outer_select_columns(join_expression)
|
|
877
886
|
other_columns = self._get_outer_select_columns(other_df.expression)
|
|
878
887
|
join_columns = self._ensure_and_normalize_cols(on)
|
|
879
|
-
# If the two dataframes being joined come from the same branch
|
|
880
|
-
#
|
|
888
|
+
# If the two dataframes being joined come from the same branch, we then check if they have any columns that
|
|
889
|
+
# were created using the "branch_id" (df["column_name"]). If so, we know that we need to differentiate
|
|
890
|
+
# the two columns since they would end up with the same table name. We do this by checking for the unique
|
|
891
|
+
# uuids in the other df and finding columns that have metadata on them that match the uuids. If so, we know
|
|
892
|
+
# it comes from the other df and we change the table name to the other df's table name.
|
|
893
|
+
# See `test_self_join` for an example of this.
|
|
881
894
|
if self.branch_id == other_df.branch_id:
|
|
895
|
+
other_df_unique_uuids = other_df.known_uuids - self.known_uuids
|
|
882
896
|
for col in join_columns:
|
|
883
|
-
for
|
|
884
|
-
if
|
|
885
|
-
|
|
897
|
+
for col_expr in col.expression.find_all(exp.Column):
|
|
898
|
+
if (
|
|
899
|
+
"join_on_uuid" in col_expr.meta
|
|
900
|
+
and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
|
|
901
|
+
):
|
|
902
|
+
col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
|
|
886
903
|
# Determines the join clause and select columns to be used passed on what type of columns were provided for
|
|
887
904
|
# the join. The columns returned changes based on how the on expression is provided.
|
|
888
905
|
if how != "cross":
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
sqlframe/__init__.py,sha256=wfqm98eLoLid9oV_FzzpG5loKC6LxOhj2lXpfN7SARo,3138
|
|
2
|
-
sqlframe/_version.py,sha256=
|
|
2
|
+
sqlframe/_version.py,sha256=QJm9ayY7R0okky0MsvD6a8gDu2IP6eTPk4n5rk7LAGs,411
|
|
3
3
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
|
5
5
|
sqlframe/base/catalog.py,sha256=SzFQalTWdhWzxUY-4ut1f9TfOECp_JmJEgNPfrRKCe0,38457
|
|
6
6
|
sqlframe/base/column.py,sha256=06fhVZ2nCn2QLxnfjdK-oYKeTFJC_smgSxu7u2UYlVg,17878
|
|
7
|
-
sqlframe/base/dataframe.py,sha256=
|
|
7
|
+
sqlframe/base/dataframe.py,sha256=ICW9eJElRsVIRutuu2aVJmP9k1n4oi6MfcLR0IrsBIs,74454
|
|
8
8
|
sqlframe/base/decorators.py,sha256=Jy4bf8MhZ-AJ6CWTj59bBJRqamtLbPC0USUMFrY6g0w,449
|
|
9
9
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
|
10
10
|
sqlframe/base/function_alternatives.py,sha256=jofb2-nweefqcjUsd4xVqfRmJSZ-T_0Iq5roW2pL0OA,50768
|
|
@@ -119,8 +119,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
|
119
119
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
|
120
120
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
|
121
121
|
sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
|
|
122
|
-
sqlframe-3.9.
|
|
123
|
-
sqlframe-3.9.
|
|
124
|
-
sqlframe-3.9.
|
|
125
|
-
sqlframe-3.9.
|
|
126
|
-
sqlframe-3.9.
|
|
122
|
+
sqlframe-3.9.2.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
|
123
|
+
sqlframe-3.9.2.dist-info/METADATA,sha256=5j2ptOPa6jrnQxNPvl9qxXqIeloc-tT4AxY32cf9CRc,9142
|
|
124
|
+
sqlframe-3.9.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
125
|
+
sqlframe-3.9.2.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
|
126
|
+
sqlframe-3.9.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|