sqlframe 3.39.4__py3-none-any.whl → 3.40.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlframe/_version.py +3 -3
- sqlframe/base/dataframe.py +22 -5
- sqlframe/base/functions.py +20 -22
- sqlframe/base/mixins/table_mixins.py +6 -2
- sqlframe/base/normalize.py +197 -8
- {sqlframe-3.39.4.dist-info → sqlframe-3.40.1.dist-info}/METADATA +2 -2
- {sqlframe-3.39.4.dist-info → sqlframe-3.40.1.dist-info}/RECORD +10 -10
- {sqlframe-3.39.4.dist-info → sqlframe-3.40.1.dist-info}/LICENSE +0 -0
- {sqlframe-3.39.4.dist-info → sqlframe-3.40.1.dist-info}/WHEEL +0 -0
- {sqlframe-3.39.4.dist-info → sqlframe-3.40.1.dist-info}/top_level.txt +0 -0
sqlframe/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '3.
|
32
|
-
__version_tuple__ = version_tuple = (3,
|
31
|
+
__version__ = version = '3.40.1'
|
32
|
+
__version_tuple__ = version_tuple = (3, 40, 1)
|
33
33
|
|
34
|
-
__commit_id__ = commit_id = '
|
34
|
+
__commit_id__ = commit_id = 'g032251aae'
|
sqlframe/base/dataframe.py
CHANGED
@@ -396,12 +396,21 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
396
396
|
return Column.ensure_cols(ensure_list(cols)) # type: ignore
|
397
397
|
|
398
398
|
def _ensure_and_normalize_cols(
|
399
|
-
self,
|
399
|
+
self,
|
400
|
+
cols,
|
401
|
+
expression: t.Optional[exp.Select] = None,
|
402
|
+
skip_star_expansion: bool = False,
|
403
|
+
remove_identifier_if_possible: bool = True,
|
400
404
|
) -> t.List[Column]:
|
401
405
|
from sqlframe.base.normalize import normalize
|
402
406
|
|
403
407
|
cols = self._ensure_list_of_columns(cols)
|
404
|
-
normalize(
|
408
|
+
normalize(
|
409
|
+
self.session,
|
410
|
+
expression or self.expression,
|
411
|
+
cols,
|
412
|
+
remove_identifier_if_possible=remove_identifier_if_possible,
|
413
|
+
)
|
405
414
|
if not skip_star_expansion:
|
406
415
|
cols = list(flatten([self._expand_star(col) for col in cols]))
|
407
416
|
self._resolve_ambiguous_columns(cols)
|
@@ -635,7 +644,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
635
644
|
|
636
645
|
def _get_expressions(
|
637
646
|
self,
|
638
|
-
optimize: bool =
|
647
|
+
optimize: bool = False,
|
639
648
|
openai_config: t.Optional[t.Union[t.Dict[str, t.Any], OpenAIConfig]] = None,
|
640
649
|
quote_identifiers: bool = True,
|
641
650
|
) -> t.List[exp.Expression]:
|
@@ -1017,9 +1026,17 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
|
|
1017
1026
|
return join_column_pairs, join_clause
|
1018
1027
|
|
1019
1028
|
def _normalize_join_clause(
|
1020
|
-
self,
|
1029
|
+
self,
|
1030
|
+
join_columns: t.List[Column],
|
1031
|
+
join_expression: t.Optional[exp.Select],
|
1032
|
+
*,
|
1033
|
+
remove_identifier_if_possible: bool = True,
|
1021
1034
|
) -> Column:
|
1022
|
-
join_columns = self._ensure_and_normalize_cols(
|
1035
|
+
join_columns = self._ensure_and_normalize_cols(
|
1036
|
+
join_columns,
|
1037
|
+
join_expression,
|
1038
|
+
remove_identifier_if_possible=remove_identifier_if_possible,
|
1039
|
+
)
|
1023
1040
|
if len(join_columns) > 1:
|
1024
1041
|
join_columns = [functools.reduce(lambda x, y: x & y, join_columns)]
|
1025
1042
|
join_clause = join_columns[0]
|
sqlframe/base/functions.py
CHANGED
@@ -190,27 +190,27 @@ sum_distinct = sumDistinct
|
|
190
190
|
|
191
191
|
@meta()
|
192
192
|
def acos(col: ColumnOrName) -> Column:
|
193
|
-
return Column.
|
193
|
+
return Column.invoke_expression_over_column(col, expression.Acos)
|
194
194
|
|
195
195
|
|
196
196
|
@meta(unsupported_engines="duckdb")
|
197
197
|
def acosh(col: ColumnOrName) -> Column:
|
198
|
-
return Column.
|
198
|
+
return Column.invoke_expression_over_column(col, expression.Acosh)
|
199
199
|
|
200
200
|
|
201
201
|
@meta()
|
202
202
|
def asin(col: ColumnOrName) -> Column:
|
203
|
-
return Column.
|
203
|
+
return Column.invoke_expression_over_column(col, expression.Asin)
|
204
204
|
|
205
205
|
|
206
206
|
@meta(unsupported_engines="duckdb")
|
207
207
|
def asinh(col: ColumnOrName) -> Column:
|
208
|
-
return Column.
|
208
|
+
return Column.invoke_expression_over_column(col, expression.Asinh)
|
209
209
|
|
210
210
|
|
211
211
|
@meta()
|
212
212
|
def atan(col: ColumnOrName) -> Column:
|
213
|
-
return Column.
|
213
|
+
return Column.invoke_expression_over_column(col, expression.Atan)
|
214
214
|
|
215
215
|
|
216
216
|
@meta()
|
@@ -218,12 +218,12 @@ def atan2(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]
|
|
218
218
|
col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
|
219
219
|
col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
|
220
220
|
|
221
|
-
return Column.
|
221
|
+
return Column.invoke_expression_over_column(col1_value, expression.Atan2, expression=col2_value)
|
222
222
|
|
223
223
|
|
224
224
|
@meta(unsupported_engines="duckdb")
|
225
225
|
def atanh(col: ColumnOrName) -> Column:
|
226
|
-
return Column.
|
226
|
+
return Column.invoke_expression_over_column(col, expression.Atanh)
|
227
227
|
|
228
228
|
|
229
229
|
@meta()
|
@@ -251,12 +251,12 @@ def cosh(col: ColumnOrName) -> Column:
|
|
251
251
|
|
252
252
|
@meta()
|
253
253
|
def cot(col: ColumnOrName) -> Column:
|
254
|
-
return Column.
|
254
|
+
return Column.invoke_expression_over_column(col, expression.Cot)
|
255
255
|
|
256
256
|
|
257
257
|
@meta(unsupported_engines=["duckdb", "postgres", "snowflake"])
|
258
258
|
def csc(col: ColumnOrName) -> Column:
|
259
|
-
return Column.
|
259
|
+
return Column.invoke_expression_over_column(col, expression.Csc)
|
260
260
|
|
261
261
|
|
262
262
|
@meta()
|
@@ -362,7 +362,7 @@ def rint(col: ColumnOrName) -> Column:
|
|
362
362
|
|
363
363
|
@meta(unsupported_engines=["duckdb", "postgres", "snowflake"])
|
364
364
|
def sec(col: ColumnOrName) -> Column:
|
365
|
-
return Column.
|
365
|
+
return Column.invoke_expression_over_column(col, expression.Sec)
|
366
366
|
|
367
367
|
|
368
368
|
@meta()
|
@@ -372,12 +372,12 @@ def signum(col: ColumnOrName) -> Column:
|
|
372
372
|
|
373
373
|
@meta()
|
374
374
|
def sin(col: ColumnOrName) -> Column:
|
375
|
-
return Column.
|
375
|
+
return Column.invoke_expression_over_column(col, expression.Sin)
|
376
376
|
|
377
377
|
|
378
378
|
@meta(unsupported_engines="duckdb")
|
379
379
|
def sinh(col: ColumnOrName) -> Column:
|
380
|
-
return Column.
|
380
|
+
return Column.invoke_expression_over_column(col, expression.Sinh)
|
381
381
|
|
382
382
|
|
383
383
|
@meta()
|
@@ -940,7 +940,7 @@ def nth_value(
|
|
940
940
|
|
941
941
|
@meta()
|
942
942
|
def ntile(n: int) -> Column:
|
943
|
-
return Column.
|
943
|
+
return Column.invoke_expression_over_column(lit(n), expression.Ntile)
|
944
944
|
|
945
945
|
|
946
946
|
@meta()
|
@@ -2826,7 +2826,7 @@ def make_interval(
|
|
2826
2826
|
|
2827
2827
|
@meta(unsupported_engines="*")
|
2828
2828
|
def try_add(left: ColumnOrName, right: ColumnOrName) -> Column:
|
2829
|
-
return Column.
|
2829
|
+
return Column.invoke_expression_over_column(left, expression.SafeAdd, expression=right)
|
2830
2830
|
|
2831
2831
|
|
2832
2832
|
@meta(unsupported_engines="*")
|
@@ -2843,12 +2843,12 @@ def try_divide(left: ColumnOrName, right: ColumnOrName) -> Column:
|
|
2843
2843
|
|
2844
2844
|
@meta(unsupported_engines="*")
|
2845
2845
|
def try_multiply(left: ColumnOrName, right: ColumnOrName) -> Column:
|
2846
|
-
return Column.
|
2846
|
+
return Column.invoke_expression_over_column(left, expression.SafeMultiply, expression=right)
|
2847
2847
|
|
2848
2848
|
|
2849
2849
|
@meta(unsupported_engines="*")
|
2850
2850
|
def try_subtract(left: ColumnOrName, right: ColumnOrName) -> Column:
|
2851
|
-
return Column.
|
2851
|
+
return Column.invoke_expression_over_column(left, expression.SafeSubtract, expression=right)
|
2852
2852
|
|
2853
2853
|
|
2854
2854
|
@meta(unsupported_engines="*")
|
@@ -5256,7 +5256,7 @@ def regexp_extract_all(
|
|
5256
5256
|
)
|
5257
5257
|
|
5258
5258
|
|
5259
|
-
@meta(unsupported_engines="
|
5259
|
+
@meta(unsupported_engines=["duckdb", "bigquery", "postgres", "snowflake"])
|
5260
5260
|
def regexp_instr(
|
5261
5261
|
str: ColumnOrName, regexp: ColumnOrName, idx: t.Optional[t.Union[int, Column]] = None
|
5262
5262
|
) -> Column:
|
@@ -5291,11 +5291,9 @@ def regexp_instr(
|
|
5291
5291
|
>>> df.select(regexp_instr('str', col("regexp")).alias('d')).collect()
|
5292
5292
|
[Row(d=1)]
|
5293
5293
|
"""
|
5294
|
-
|
5295
|
-
|
5296
|
-
|
5297
|
-
idx = lit(idx) if isinstance(idx, int) else idx
|
5298
|
-
return Column.invoke_anonymous_function(str, "regexp_instr", regexp, idx)
|
5294
|
+
return Column.invoke_expression_over_column(
|
5295
|
+
str, expression.RegexpInstr, expression=regexp, group=idx
|
5296
|
+
)
|
5299
5297
|
|
5300
5298
|
|
5301
5299
|
@meta(unsupported_engines="snowflake")
|
@@ -275,7 +275,9 @@ class MergeSupportMixin(_BaseTable, t.Generic[DF]):
|
|
275
275
|
join_expression = self._add_ctes_to_expression(
|
276
276
|
self.expression, other_df.expression.copy().ctes
|
277
277
|
)
|
278
|
-
condition = self._ensure_and_normalize_cols(
|
278
|
+
condition = self._ensure_and_normalize_cols(
|
279
|
+
condition, self.expression, remove_identifier_if_possible=False
|
280
|
+
)
|
279
281
|
self._handle_self_join(other_df, condition)
|
280
282
|
|
281
283
|
if isinstance(condition[0].expression, exp.Column) and not clause:
|
@@ -291,7 +293,9 @@ class MergeSupportMixin(_BaseTable, t.Generic[DF]):
|
|
291
293
|
condition, join_expression, other_df, table_names
|
292
294
|
)
|
293
295
|
else:
|
294
|
-
join_clause = self._normalize_join_clause(
|
296
|
+
join_clause = self._normalize_join_clause(
|
297
|
+
condition, join_expression, remove_identifier_if_possible=False
|
298
|
+
)
|
295
299
|
return join_clause
|
296
300
|
|
297
301
|
def _ensure_and_normalize_assignments(
|
sqlframe/base/normalize.py
CHANGED
@@ -16,33 +16,103 @@ if t.TYPE_CHECKING:
|
|
16
16
|
NORMALIZE_INPUT = t.TypeVar("NORMALIZE_INPUT", bound=t.Union[str, exp.Expression, Column])
|
17
17
|
|
18
18
|
|
19
|
-
def normalize(
|
19
|
+
def normalize(
|
20
|
+
session: SESSION,
|
21
|
+
expression_context: exp.Select,
|
22
|
+
expr: t.List[NORMALIZE_INPUT],
|
23
|
+
*,
|
24
|
+
remove_identifier_if_possible: bool = True,
|
25
|
+
):
|
20
26
|
expr = ensure_list(expr)
|
21
27
|
expressions = _ensure_expressions(expr)
|
22
28
|
for expression in expressions:
|
23
29
|
identifiers = expression.find_all(exp.Identifier)
|
24
30
|
for identifier in identifiers:
|
25
31
|
identifier.transform(session.input_dialect.normalize_identifier)
|
26
|
-
replace_alias_name_with_cte_name(
|
27
|
-
|
32
|
+
replace_alias_name_with_cte_name(
|
33
|
+
session,
|
34
|
+
expression_context,
|
35
|
+
identifier,
|
36
|
+
remove_identifier_if_possible=remove_identifier_if_possible,
|
37
|
+
)
|
38
|
+
replace_branch_and_sequence_ids_with_cte_name(
|
39
|
+
session,
|
40
|
+
expression_context,
|
41
|
+
identifier,
|
42
|
+
remove_identifier_if_possible=remove_identifier_if_possible,
|
43
|
+
)
|
28
44
|
|
29
45
|
|
30
46
|
def replace_alias_name_with_cte_name(
|
31
|
-
session: SESSION,
|
47
|
+
session: SESSION,
|
48
|
+
expression_context: exp.Select,
|
49
|
+
id: exp.Identifier,
|
50
|
+
*,
|
51
|
+
remove_identifier_if_possible: bool,
|
32
52
|
):
|
33
53
|
normalized_id = session._normalize_string(id.alias_or_name)
|
34
54
|
if normalized_id in session.name_to_sequence_id_mapping:
|
35
|
-
|
55
|
+
# Get CTEs that are referenced in the FROM clause
|
56
|
+
referenced_cte_names = get_cte_names_from_from_clause(expression_context)
|
57
|
+
|
58
|
+
# Filter CTEs to only include those defined and referenced by the FROM clause
|
59
|
+
filtered_ctes = [
|
60
|
+
cte
|
61
|
+
for cte in reversed(expression_context.ctes)
|
62
|
+
if cte.alias_or_name in referenced_cte_names
|
63
|
+
]
|
64
|
+
|
65
|
+
for cte in filtered_ctes:
|
36
66
|
if cte.args["sequence_id"] in session.name_to_sequence_id_mapping[normalized_id]:
|
37
67
|
_set_alias_name(id, cte.alias_or_name)
|
38
68
|
break
|
69
|
+
else:
|
70
|
+
# Fallback: If not found in filtered CTEs, search through ALL CTEs unfiltered
|
71
|
+
for cte in reversed(expression_context.ctes):
|
72
|
+
if cte.args["sequence_id"] in session.name_to_sequence_id_mapping[normalized_id]:
|
73
|
+
_set_alias_name(id, cte.alias_or_name)
|
74
|
+
break
|
75
|
+
else:
|
76
|
+
# Final fallback: If this is a qualified column reference (table.column)
|
77
|
+
# and the table doesn't exist in FROM clause, remove the qualifier IF the column is unambiguously available
|
78
|
+
parent = id.parent
|
79
|
+
if parent and isinstance(parent, exp.Column) and remove_identifier_if_possible:
|
80
|
+
# Check if this table is not available in current context
|
81
|
+
current_tables = get_cte_names_from_from_clause(expression_context)
|
82
|
+
if normalized_id not in current_tables:
|
83
|
+
# Check if this table ID matches any CTE name directly (cross-context CTE reference)
|
84
|
+
cte_exists = any(
|
85
|
+
cte.alias_or_name == normalized_id for cte in expression_context.ctes
|
86
|
+
)
|
87
|
+
|
88
|
+
if cte_exists:
|
89
|
+
# This is a reference to a CTE that exists but is not in the current FROM clause
|
90
|
+
# Get the column name being referenced
|
91
|
+
column_name = (
|
92
|
+
_extract_column_name(parent.this)
|
93
|
+
if hasattr(parent, "this")
|
94
|
+
else None
|
95
|
+
)
|
96
|
+
|
97
|
+
# Only remove qualifier if the column is unambiguously available in current context
|
98
|
+
if column_name and is_column_unambiguously_available(
|
99
|
+
expression_context, column_name
|
100
|
+
):
|
101
|
+
parent.set("table", None)
|
39
102
|
|
40
103
|
|
41
104
|
def replace_branch_and_sequence_ids_with_cte_name(
|
42
|
-
session: SESSION,
|
105
|
+
session: SESSION,
|
106
|
+
expression_context: exp.Select,
|
107
|
+
id: exp.Identifier,
|
108
|
+
*,
|
109
|
+
remove_identifier_if_possible: bool,
|
43
110
|
):
|
44
111
|
normalized_id = session._normalize_string(id.alias_or_name)
|
45
112
|
if normalized_id in session.known_ids:
|
113
|
+
# Get CTEs that are referenced in the FROM clause
|
114
|
+
referenced_cte_names = get_cte_names_from_from_clause(expression_context)
|
115
|
+
|
46
116
|
# Check if we have a join and if both the tables in that join share a common branch id
|
47
117
|
# If so we need to have this reference the left table by default unless the id is a sequence
|
48
118
|
# id then it keeps that reference. This handles the weird edge case in spark that shouldn't
|
@@ -51,19 +121,138 @@ def replace_branch_and_sequence_ids_with_cte_name(
|
|
51
121
|
join_table_aliases = [
|
52
122
|
x.alias_or_name for x in get_tables_from_expression_with_join(expression_context)
|
53
123
|
]
|
124
|
+
# Filter CTEs to only include those referenced in the FROM clause
|
54
125
|
ctes_in_join = [
|
55
|
-
cte
|
126
|
+
cte
|
127
|
+
for cte in expression_context.ctes
|
128
|
+
if cte.alias_or_name in join_table_aliases
|
129
|
+
and cte.alias_or_name in referenced_cte_names
|
56
130
|
]
|
57
|
-
if
|
131
|
+
if (
|
132
|
+
len(ctes_in_join) >= 2
|
133
|
+
and ctes_in_join[0].args["branch_id"] == ctes_in_join[1].args["branch_id"]
|
134
|
+
):
|
58
135
|
assert len(ctes_in_join) == 2
|
59
136
|
_set_alias_name(id, ctes_in_join[0].alias_or_name)
|
60
137
|
return
|
61
138
|
|
139
|
+
# Filter CTEs to only include those defined and referenced by the FROM clause
|
140
|
+
filtered_ctes = [
|
141
|
+
cte
|
142
|
+
for cte in reversed(expression_context.ctes)
|
143
|
+
if cte.alias_or_name in referenced_cte_names
|
144
|
+
]
|
145
|
+
|
146
|
+
for cte in filtered_ctes:
|
147
|
+
if normalized_id in (cte.args["branch_id"], cte.args["sequence_id"]):
|
148
|
+
_set_alias_name(id, cte.alias_or_name)
|
149
|
+
return
|
150
|
+
|
151
|
+
# Fallback: If not found in filtered CTEs, search through ALL CTEs unfiltered
|
62
152
|
for cte in reversed(expression_context.ctes):
|
63
153
|
if normalized_id in (cte.args["branch_id"], cte.args["sequence_id"]):
|
64
154
|
_set_alias_name(id, cte.alias_or_name)
|
65
155
|
return
|
66
156
|
|
157
|
+
# Final fallback: If this is a qualified column reference (table.column)
|
158
|
+
# and the table doesn't exist in FROM clause, remove the qualifier IF the column is unambiguously available
|
159
|
+
parent = id.parent
|
160
|
+
if parent and isinstance(parent, exp.Column) and remove_identifier_if_possible:
|
161
|
+
# Check if this table is not available in current context
|
162
|
+
current_tables = get_cte_names_from_from_clause(expression_context)
|
163
|
+
if normalized_id not in current_tables:
|
164
|
+
# Check if this table ID matches any CTE name directly (cross-context CTE reference)
|
165
|
+
cte_exists = any(cte.alias_or_name == normalized_id for cte in expression_context.ctes)
|
166
|
+
|
167
|
+
if cte_exists:
|
168
|
+
# This is a reference to a CTE that exists but is not in the current FROM clause
|
169
|
+
# Get the column name being referenced
|
170
|
+
column_name = _extract_column_name(parent.this) if hasattr(parent, "this") else None
|
171
|
+
|
172
|
+
# Only remove qualifier if the column is unambiguously available in current context
|
173
|
+
if column_name and is_column_unambiguously_available(
|
174
|
+
expression_context, column_name
|
175
|
+
):
|
176
|
+
parent.set("table", None)
|
177
|
+
|
178
|
+
|
179
|
+
def is_column_unambiguously_available(expression_context: exp.Select, column_name: str) -> bool:
|
180
|
+
"""
|
181
|
+
Check if a column name is unambiguously available in the current context.
|
182
|
+
Returns True if the column appears exactly once across all accessible CTEs.
|
183
|
+
|
184
|
+
Enhanced to handle more column expression types and edge cases.
|
185
|
+
"""
|
186
|
+
current_tables = get_cte_names_from_from_clause(expression_context)
|
187
|
+
column_count_in_from = 0
|
188
|
+
|
189
|
+
# If no tables in FROM clause, be conservative
|
190
|
+
if not current_tables:
|
191
|
+
return False
|
192
|
+
|
193
|
+
# Count how many times this column appears in accessible CTEs
|
194
|
+
for cte in expression_context.ctes:
|
195
|
+
if cte.alias_or_name in current_tables:
|
196
|
+
if hasattr(cte, "this") and hasattr(cte.this, "expressions"):
|
197
|
+
for expr in cte.this.expressions:
|
198
|
+
expr_column_name = _extract_column_name(expr)
|
199
|
+
|
200
|
+
# Case-insensitive comparison for robustness
|
201
|
+
if expr_column_name and expr_column_name.lower() == column_name.lower():
|
202
|
+
column_count_in_from += 1
|
203
|
+
|
204
|
+
# Column is unambiguous if it appears exactly once in the FROM clause CTEs
|
205
|
+
return column_count_in_from == 1
|
206
|
+
|
207
|
+
|
208
|
+
def _extract_column_name(expr) -> str:
|
209
|
+
"""
|
210
|
+
Extract column name from various expression types.
|
211
|
+
Enhanced to handle more SQLGlot expression types.
|
212
|
+
"""
|
213
|
+
if hasattr(expr, "alias_or_name") and expr.alias_or_name:
|
214
|
+
return expr.alias_or_name
|
215
|
+
elif hasattr(expr, "this"):
|
216
|
+
if hasattr(expr.this, "this"):
|
217
|
+
return str(expr.this.this)
|
218
|
+
elif hasattr(expr.this, "name"):
|
219
|
+
return str(expr.this.name)
|
220
|
+
else:
|
221
|
+
return str(expr.this)
|
222
|
+
elif hasattr(expr, "name"):
|
223
|
+
return str(expr.name)
|
224
|
+
else:
|
225
|
+
return str(expr)
|
226
|
+
|
227
|
+
|
228
|
+
def get_cte_names_from_from_clause(expression_context: exp.Select) -> t.Set[str]:
|
229
|
+
"""
|
230
|
+
Get the set of CTE names that are referenced in the FROM clause of the expression.
|
231
|
+
|
232
|
+
Args:
|
233
|
+
expression_context: The SELECT expression to analyze
|
234
|
+
|
235
|
+
Returns:
|
236
|
+
Set of CTE alias names referenced in the FROM clause (including joins)
|
237
|
+
"""
|
238
|
+
referenced_cte_names = set()
|
239
|
+
|
240
|
+
# Get the main table from FROM clause
|
241
|
+
from_clause = expression_context.args.get("from")
|
242
|
+
if from_clause and from_clause.this:
|
243
|
+
main_table = from_clause.this
|
244
|
+
if hasattr(main_table, "alias_or_name") and main_table.alias_or_name:
|
245
|
+
referenced_cte_names.add(main_table.alias_or_name)
|
246
|
+
|
247
|
+
# Get tables from joins
|
248
|
+
if expression_context.args.get("joins"):
|
249
|
+
join_tables = get_tables_from_expression_with_join(expression_context)
|
250
|
+
for table in join_tables:
|
251
|
+
if hasattr(table, "alias_or_name") and table.alias_or_name:
|
252
|
+
referenced_cte_names.add(table.alias_or_name)
|
253
|
+
|
254
|
+
return referenced_cte_names
|
255
|
+
|
67
256
|
|
68
257
|
def normalize_dict(session: SESSION, data: t.Dict) -> t.Dict:
|
69
258
|
if isinstance(data, dict):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlframe
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.40.1
|
4
4
|
Summary: Turning PySpark Into a Universal DataFrame API
|
5
5
|
Home-page: https://github.com/eakmanrq/sqlframe
|
6
6
|
Author: Ryan Eakman
|
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
19
19
|
Requires-Dist: more-itertools
|
20
20
|
Requires-Dist: prettytable <4
|
21
|
-
Requires-Dist: sqlglot <27.
|
21
|
+
Requires-Dist: sqlglot <27.13,>=24.0.0
|
22
22
|
Requires-Dist: typing-extensions
|
23
23
|
Provides-Extra: bigquery
|
24
24
|
Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'
|
@@ -1,17 +1,17 @@
|
|
1
1
|
sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
|
2
|
-
sqlframe/_version.py,sha256=
|
2
|
+
sqlframe/_version.py,sha256=JI7kHkVj37MZpxBOd6a_jH2r_NuV5xRglZHG8VT7YO4,714
|
3
3
|
sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
4
4
|
sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
|
6
6
|
sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
|
7
7
|
sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
|
8
|
-
sqlframe/base/dataframe.py,sha256
|
8
|
+
sqlframe/base/dataframe.py,sha256=Kl3WycARIWBBIze0enmZDGkfOt65mZDQ2hx_6pxRsxI,87329
|
9
9
|
sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
|
10
10
|
sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
|
11
11
|
sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
|
12
|
-
sqlframe/base/functions.py,sha256=
|
12
|
+
sqlframe/base/functions.py,sha256=fc3jLuPAIJ3Hl4Bezm9Kgzsk4e5uFfgMgfajUCBKQG0,227919
|
13
13
|
sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
|
14
|
-
sqlframe/base/normalize.py,sha256=
|
14
|
+
sqlframe/base/normalize.py,sha256=YPeopWr8ZRjevArYfrM-DZBkQp4t4UfAEwynoj4VvcU,11773
|
15
15
|
sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
|
16
16
|
sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
|
17
17
|
sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
|
@@ -25,7 +25,7 @@ sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
25
25
|
sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
|
26
26
|
sqlframe/base/mixins/dataframe_mixins.py,sha256=8D2AFtfc0tj9Q5qzlNAXdXOYw9RuD8kpe8wixo8pY5o,1534
|
27
27
|
sqlframe/base/mixins/readwriter_mixins.py,sha256=ItQ_0jZ5RljgmLjGDIzLMRP_NQdy3wAyKwJ6K5NjaqA,4954
|
28
|
-
sqlframe/base/mixins/table_mixins.py,sha256=
|
28
|
+
sqlframe/base/mixins/table_mixins.py,sha256=zoqrgaH1fOgnHkC6C4L8IUyspDa5SETP3OXVdKWxcUM,13917
|
29
29
|
sqlframe/bigquery/__init__.py,sha256=kbaomhYAANPdxeDQhajv8IHfMg_ENKivtYK-rPwaV08,939
|
30
30
|
sqlframe/bigquery/catalog.py,sha256=Dcpp1JKftc3ukdYpn6M1ujqixA-6_1k8aY21Y5Johyc,11899
|
31
31
|
sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
|
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
|
|
130
130
|
sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
|
131
131
|
sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
|
132
132
|
sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
|
133
|
-
sqlframe-3.
|
134
|
-
sqlframe-3.
|
135
|
-
sqlframe-3.
|
136
|
-
sqlframe-3.
|
137
|
-
sqlframe-3.
|
133
|
+
sqlframe-3.40.1.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
|
134
|
+
sqlframe-3.40.1.dist-info/METADATA,sha256=vcuX8bS_t6YMPxX-bRUz8mlrne6wXwiqo4dS0AZzNtI,9070
|
135
|
+
sqlframe-3.40.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
136
|
+
sqlframe-3.40.1.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
|
137
|
+
sqlframe-3.40.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|