snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +200 -102
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +53 -8
- snowflake/snowpark_connect/expression/map_extension.py +37 -11
- snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
- snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
- snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
- snowflake/snowpark_connect/relation/map_extension.py +38 -17
- snowflake/snowpark_connect/relation/map_join.py +26 -12
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +124 -25
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +160 -48
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +73 -21
- snowflake/snowpark_connect/type_mapping.py +90 -20
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +24 -4
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -10,6 +10,7 @@ sys.path.append(str(pathlib.Path(__file__).parent / "includes/python"))
|
|
|
10
10
|
|
|
11
11
|
from .server import get_session # noqa: E402, F401
|
|
12
12
|
from .server import start_session # noqa: E402, F401
|
|
13
|
+
from .utils.session import skip_session_configuration # noqa: E402, F401
|
|
13
14
|
|
|
14
15
|
# Turn off catalog warning for Snowpark
|
|
15
16
|
sp_logger = logging.getLogger("snowflake.snowpark")
|
|
@@ -13,14 +13,17 @@ from functools import cached_property
|
|
|
13
13
|
from pyspark.errors.exceptions.base import AnalysisException
|
|
14
14
|
|
|
15
15
|
from snowflake.snowpark import DataFrame
|
|
16
|
-
from snowflake.snowpark._internal.analyzer.analyzer_utils import
|
|
17
|
-
quote_name_without_upper_casing,
|
|
18
|
-
unquote_if_quoted,
|
|
19
|
-
)
|
|
16
|
+
from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
|
|
20
17
|
from snowflake.snowpark._internal.utils import quote_name
|
|
21
18
|
from snowflake.snowpark.types import StructType
|
|
19
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
22
20
|
from snowflake.snowpark_connect.config import global_config
|
|
23
|
-
from snowflake.snowpark_connect.
|
|
21
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
22
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
23
|
+
from snowflake.snowpark_connect.utils.context import (
|
|
24
|
+
get_current_operation_scope,
|
|
25
|
+
get_is_processing_order_by,
|
|
26
|
+
)
|
|
24
27
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
25
28
|
split_fully_qualified_spark_name,
|
|
26
29
|
)
|
|
@@ -92,31 +95,15 @@ def make_column_names_snowpark_compatible(
|
|
|
92
95
|
class ColumnNames:
|
|
93
96
|
spark_name: str
|
|
94
97
|
snowpark_name: str
|
|
95
|
-
qualifiers:
|
|
98
|
+
qualifiers: set[ColumnQualifier]
|
|
96
99
|
catalog_info: str | None = None # Catalog from fully qualified name
|
|
97
100
|
database_info: str | None = None # Database from fully qualified name
|
|
98
101
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
For example, if the column name is 'id' and the qualifiers are ['db', 'table'],
|
|
106
|
-
then the possible Spark names are:
|
|
107
|
-
['id', 'db.table.id', 'table.id']
|
|
108
|
-
"""
|
|
109
|
-
spark_name = column_names.spark_name
|
|
110
|
-
qualifiers = column_names.qualifiers
|
|
111
|
-
|
|
112
|
-
qualifier_suffixes_list = [
|
|
113
|
-
".".join(quote_name_without_upper_casing(x) for x in qualifiers[i:])
|
|
114
|
-
for i in range(len(qualifiers))
|
|
115
|
-
]
|
|
116
|
-
return [spark_name] + [
|
|
117
|
-
f"{qualifier_suffix}.{spark_name}"
|
|
118
|
-
for qualifier_suffix in qualifier_suffixes_list
|
|
119
|
-
]
|
|
102
|
+
def all_spark_names_including_qualified_names(self):
|
|
103
|
+
all_names = [self.spark_name]
|
|
104
|
+
for qualifier in self.qualifiers:
|
|
105
|
+
all_names.extend(qualifier.all_qualified_names(self.spark_name))
|
|
106
|
+
return all_names
|
|
120
107
|
|
|
121
108
|
|
|
122
109
|
class ColumnNameMap:
|
|
@@ -128,13 +115,13 @@ class ColumnNameMap:
|
|
|
128
115
|
[], bool
|
|
129
116
|
] = lambda: global_config.spark_sql_caseSensitive,
|
|
130
117
|
column_metadata: dict | None = None,
|
|
131
|
-
column_qualifiers: list[
|
|
118
|
+
column_qualifiers: list[set[ColumnQualifier]] = None,
|
|
132
119
|
parent_column_name_map: ColumnNameMap | None = None,
|
|
133
120
|
) -> None:
|
|
134
121
|
"""
|
|
135
122
|
spark_column_names: Original spark column names
|
|
136
123
|
snowpark_column_names: Snowpark column names
|
|
137
|
-
column_metadata: This field is used to store metadata related to columns. Since Snowpark
|
|
124
|
+
column_metadata: This field is used to store metadata related to columns. Since Snowpark's Struct type does not support metadata,
|
|
138
125
|
we use this attribute to store any metadata related to the columns.
|
|
139
126
|
The key is the original Spark column name, and the value is the metadata.
|
|
140
127
|
example: Dict('age', {'foo': 'bar'})
|
|
@@ -142,7 +129,7 @@ class ColumnNameMap:
|
|
|
142
129
|
parent_column_name_map: parent ColumnNameMap
|
|
143
130
|
"""
|
|
144
131
|
self.columns: list[ColumnNames] = []
|
|
145
|
-
self.spark_to_col = defaultdict(list)
|
|
132
|
+
self.spark_to_col: defaultdict[str, list[ColumnNames]] = defaultdict(list)
|
|
146
133
|
self.uppercase_spark_to_col = defaultdict(list)
|
|
147
134
|
self.snowpark_to_col = defaultdict(list)
|
|
148
135
|
self.is_case_sensitive = is_case_sensitive
|
|
@@ -181,21 +168,18 @@ class ColumnNameMap:
|
|
|
181
168
|
c = ColumnNames(
|
|
182
169
|
spark_name=spark_name,
|
|
183
170
|
snowpark_name=snowpark_column_names[i],
|
|
184
|
-
qualifiers=column_qualifiers[i]
|
|
171
|
+
qualifiers=column_qualifiers[i]
|
|
172
|
+
if column_qualifiers and column_qualifiers[i]
|
|
173
|
+
else {ColumnQualifier.no_qualifier()},
|
|
185
174
|
catalog_info=catalog_info,
|
|
186
175
|
database_info=database_info,
|
|
187
176
|
)
|
|
188
177
|
self.columns.append(c)
|
|
189
178
|
|
|
190
|
-
|
|
191
|
-
spark_names_including_qualifier = get_list_of_spark_names_for_column(c)
|
|
192
|
-
|
|
193
|
-
for spark_name_including_qualifier in spark_names_including_qualifier:
|
|
179
|
+
for spark_name in c.all_spark_names_including_qualified_names():
|
|
194
180
|
# the same spark name can map to multiple snowpark names
|
|
195
|
-
self.spark_to_col[
|
|
196
|
-
self.uppercase_spark_to_col[
|
|
197
|
-
spark_name_including_qualifier.upper()
|
|
198
|
-
].append(c)
|
|
181
|
+
self.spark_to_col[spark_name].append(c)
|
|
182
|
+
self.uppercase_spark_to_col[spark_name.upper()].append(c)
|
|
199
183
|
|
|
200
184
|
# the same snowpark name can map to multiple spark column
|
|
201
185
|
# e.g. df.select(date_format('dt', 'yyy'), date_format('dt', 'yyyy')) ->
|
|
@@ -353,18 +337,77 @@ class ColumnNameMap:
|
|
|
353
337
|
|
|
354
338
|
snowpark_names_len = len(snowpark_names)
|
|
355
339
|
if snowpark_names_len > 1:
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
340
|
+
# Check if this is a case where we have identical expressions that can be safely resolved to the first one
|
|
341
|
+
# This commonly happens with GROUP BY expressions that also appear in SELECT clauses
|
|
342
|
+
if (
|
|
343
|
+
get_is_processing_order_by()
|
|
344
|
+
and self._can_resolve_ambiguous_identical_expressions(
|
|
345
|
+
resolved_name, snowpark_names
|
|
346
|
+
)
|
|
347
|
+
):
|
|
348
|
+
# All the ambiguous columns represent the same expression, so we can safely use the first one
|
|
349
|
+
return snowpark_names[0]
|
|
350
|
+
else:
|
|
351
|
+
exception = AnalysisException(
|
|
352
|
+
f"Ambiguous spark column name {spark_column_name}, potential snowpark column names {snowpark_names}"
|
|
353
|
+
)
|
|
354
|
+
attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
|
|
355
|
+
raise exception
|
|
359
356
|
elif snowpark_names_len == 0:
|
|
360
357
|
if allow_non_exists:
|
|
361
358
|
return None
|
|
362
359
|
else:
|
|
363
|
-
|
|
360
|
+
exception = AnalysisException(
|
|
364
361
|
f"Spark column name {spark_column_name} does not exist"
|
|
365
362
|
)
|
|
363
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
364
|
+
raise exception
|
|
366
365
|
return snowpark_names[0]
|
|
367
366
|
|
|
367
|
+
def _can_resolve_ambiguous_identical_expressions(
|
|
368
|
+
self, spark_column_name: str, snowpark_names: list[str]
|
|
369
|
+
) -> bool:
|
|
370
|
+
"""
|
|
371
|
+
Determine if ambiguous columns represent identical expressions that can be safely resolved to the first one.
|
|
372
|
+
|
|
373
|
+
This handles the common case where the same expression (like a UDF call) appears multiple times
|
|
374
|
+
in a SELECT clause within a GROUP BY query. Since they're the same expression operating on the
|
|
375
|
+
same grouped data, they will have identical values, so we can safely resolve to any of them.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
spark_column_name: The Spark column name that has multiple mappings, make sure resolve this reforehand
|
|
379
|
+
snowpark_names: List of Snowpark column names that map to this Spark column name
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
True if we can safely resolve to the first snowpark column, False otherwise
|
|
383
|
+
"""
|
|
384
|
+
if spark_column_name not in self.spark_to_col:
|
|
385
|
+
return False
|
|
386
|
+
|
|
387
|
+
columns: list[ColumnNames] = self.spark_to_col[spark_column_name]
|
|
388
|
+
|
|
389
|
+
# If we don't have multiple columns, there's no ambiguity to resolve
|
|
390
|
+
if len(columns) <= 1:
|
|
391
|
+
return False
|
|
392
|
+
|
|
393
|
+
# Check if all the snowpark names correspond to columns that have identical underlying expressions
|
|
394
|
+
# We'll compare the actual column objects to see if they represent the same computation
|
|
395
|
+
first_column = columns[0]
|
|
396
|
+
|
|
397
|
+
for column in columns[1:]:
|
|
398
|
+
if first_column.qualifiers != column.qualifiers:
|
|
399
|
+
return False
|
|
400
|
+
|
|
401
|
+
# Additional safety check: ensure all snowpark names are actually in our mapping
|
|
402
|
+
for snowpark_name in snowpark_names:
|
|
403
|
+
if snowpark_name not in self.snowpark_to_col:
|
|
404
|
+
return False
|
|
405
|
+
|
|
406
|
+
# If we reach here, the columns appear to be identical expressions from the same context
|
|
407
|
+
# This commonly happens in GROUP BY scenarios where the same expression appears in both
|
|
408
|
+
# the grouping clause and the select clause
|
|
409
|
+
return True
|
|
410
|
+
|
|
368
411
|
def get_spark_column_names_from_snowpark_column_names(
|
|
369
412
|
self,
|
|
370
413
|
snowpark_column_names: list[str],
|
|
@@ -390,16 +433,20 @@ class ColumnNameMap:
|
|
|
390
433
|
)
|
|
391
434
|
spark_names_len = len(spark_names)
|
|
392
435
|
if spark_names_len > 1:
|
|
393
|
-
|
|
436
|
+
exception = AnalysisException(
|
|
394
437
|
f"Ambiguous snowpark column name {snowpark_column_name}, potential spark column names {spark_names}"
|
|
395
438
|
)
|
|
439
|
+
attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
|
|
440
|
+
raise exception
|
|
396
441
|
elif spark_names_len == 0:
|
|
397
442
|
if allow_non_exists:
|
|
398
443
|
return None
|
|
399
444
|
else:
|
|
400
|
-
|
|
445
|
+
exception = AnalysisException(
|
|
401
446
|
f"Snowpark column name {snowpark_column_name} does not exist"
|
|
402
447
|
)
|
|
448
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
449
|
+
raise exception
|
|
403
450
|
return spark_names[0]
|
|
404
451
|
|
|
405
452
|
def get_spark_column_name(self, idx: int) -> str:
|
|
@@ -409,32 +456,30 @@ class ColumnNameMap:
|
|
|
409
456
|
return [c.spark_name for c in self.columns]
|
|
410
457
|
|
|
411
458
|
def get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
412
|
-
self,
|
|
413
|
-
) -> tuple[list[str], list[str], list[
|
|
459
|
+
self, target_qualifier: ColumnQualifier
|
|
460
|
+
) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
|
|
414
461
|
"""
|
|
415
|
-
Returns the Spark and Snowpark column names along with their qualifiers for the specified
|
|
416
|
-
If a column does not have a qualifier, it will be None.
|
|
462
|
+
Returns the Spark and Snowpark column names along with their qualifiers for the specified qualifier.
|
|
417
463
|
"""
|
|
418
|
-
spark_columns = []
|
|
419
|
-
snowpark_columns = []
|
|
420
|
-
qualifiers = []
|
|
464
|
+
spark_columns: list[str] = []
|
|
465
|
+
snowpark_columns: list[str] = []
|
|
466
|
+
qualifiers: list[set[ColumnQualifier]] = []
|
|
421
467
|
|
|
468
|
+
normalized_qualifier = target_qualifier
|
|
422
469
|
if not self.is_case_sensitive():
|
|
423
|
-
|
|
470
|
+
normalized_qualifier = target_qualifier.to_upper()
|
|
424
471
|
|
|
425
|
-
for
|
|
426
|
-
|
|
427
|
-
|
|
472
|
+
for column in self.columns:
|
|
473
|
+
# Normalize all qualifiers for comparison
|
|
474
|
+
column_qualifiers: set[ColumnQualifier] = (
|
|
475
|
+
{q.to_upper() for q in iter(column.qualifiers)}
|
|
428
476
|
if not self.is_case_sensitive()
|
|
429
|
-
else
|
|
477
|
+
else column.qualifiers
|
|
430
478
|
)
|
|
431
|
-
if
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
spark_columns.append(c.spark_name)
|
|
436
|
-
snowpark_columns.append(c.snowpark_name)
|
|
437
|
-
qualifiers.append(c.qualifiers)
|
|
479
|
+
if any([q.matches(normalized_qualifier) for q in column_qualifiers]):
|
|
480
|
+
spark_columns.append(column.spark_name)
|
|
481
|
+
snowpark_columns.append(column.snowpark_name)
|
|
482
|
+
qualifiers.append(column.qualifiers)
|
|
438
483
|
|
|
439
484
|
return spark_columns, snowpark_columns, qualifiers
|
|
440
485
|
|
|
@@ -448,19 +493,17 @@ class ColumnNameMap:
|
|
|
448
493
|
if self._quote_if_unquoted(c) not in cols_to_drop
|
|
449
494
|
]
|
|
450
495
|
|
|
451
|
-
def get_qualifiers(self) -> list[
|
|
496
|
+
def get_qualifiers(self) -> list[set[ColumnQualifier]]:
|
|
452
497
|
"""
|
|
453
498
|
Returns the qualifiers for the columns.
|
|
454
|
-
If a column does not have a qualifier, it will be None.
|
|
455
499
|
"""
|
|
456
500
|
return [c.qualifiers for c in self.columns]
|
|
457
501
|
|
|
458
502
|
def get_qualifiers_for_columns_after_drop(
|
|
459
503
|
self, cols_to_drop: list[str]
|
|
460
|
-
) -> list[
|
|
504
|
+
) -> list[set[ColumnQualifier]]:
|
|
461
505
|
"""
|
|
462
506
|
Returns the qualifiers for the columns after dropping the specified columns.
|
|
463
|
-
If a column is dropped, its qualifier will be None.
|
|
464
507
|
"""
|
|
465
508
|
return [
|
|
466
509
|
c.qualifiers
|
|
@@ -471,10 +514,25 @@ class ColumnNameMap:
|
|
|
471
514
|
def get_qualifier_for_spark_column(
|
|
472
515
|
self,
|
|
473
516
|
spark_column_name: str,
|
|
474
|
-
) ->
|
|
517
|
+
) -> ColumnQualifier:
|
|
518
|
+
"""
|
|
519
|
+
Backward compatibility: returns the first qualifier for the given Spark column name.
|
|
520
|
+
Throws if more than one qualifier exists.
|
|
521
|
+
"""
|
|
522
|
+
qualifiers = self.get_qualifiers_for_spark_column(spark_column_name)
|
|
523
|
+
if len(qualifiers) > 1:
|
|
524
|
+
raise ValueError(
|
|
525
|
+
"Shouldn't happen. Multiple qualifiers found; expected only one."
|
|
526
|
+
)
|
|
527
|
+
return next(iter(qualifiers))
|
|
528
|
+
|
|
529
|
+
def get_qualifiers_for_spark_column(
|
|
530
|
+
self,
|
|
531
|
+
spark_column_name: str,
|
|
532
|
+
) -> set[ColumnQualifier]:
|
|
475
533
|
"""
|
|
476
534
|
Returns the qualifier for the specified Spark column name.
|
|
477
|
-
If the column does not exist, returns
|
|
535
|
+
If the column does not exist, returns empty ColumnQualifier.
|
|
478
536
|
"""
|
|
479
537
|
if not self.is_case_sensitive():
|
|
480
538
|
name = spark_column_name.upper()
|
|
@@ -486,7 +544,7 @@ class ColumnNameMap:
|
|
|
486
544
|
col = mapping.get(name)
|
|
487
545
|
|
|
488
546
|
if col is None or len(col) == 0:
|
|
489
|
-
return
|
|
547
|
+
return {ColumnQualifier.no_qualifier()}
|
|
490
548
|
|
|
491
549
|
return col[0].qualifiers
|
|
492
550
|
|
|
@@ -518,7 +576,7 @@ class ColumnNameMap:
|
|
|
518
576
|
|
|
519
577
|
def with_columns(
|
|
520
578
|
self, new_spark_columns: list[str], new_snowpark_columns: list[str]
|
|
521
|
-
) -> tuple[list[str], list[str], list[
|
|
579
|
+
) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
|
|
522
580
|
"""
|
|
523
581
|
Returns an ordered list of spark and snowpark column names after adding the new columns through a withColumns call.
|
|
524
582
|
All replaced columns retain their ordering in the dataframe. The new columns are added to the end of the list.
|
|
@@ -547,7 +605,7 @@ class ColumnNameMap:
|
|
|
547
605
|
removed_index.add(index)
|
|
548
606
|
spark_columns.append(new_spark_columns[index])
|
|
549
607
|
snowpark_columns.append(new_snowpark_columns[index])
|
|
550
|
-
qualifiers.append(
|
|
608
|
+
qualifiers.append({ColumnQualifier.no_qualifier()})
|
|
551
609
|
else:
|
|
552
610
|
spark_columns.append(c.spark_name)
|
|
553
611
|
snowpark_columns.append(c.snowpark_name)
|
|
@@ -557,7 +615,7 @@ class ColumnNameMap:
|
|
|
557
615
|
if i not in removed_index:
|
|
558
616
|
spark_columns.append(new_spark_columns[i])
|
|
559
617
|
snowpark_columns.append(new_snowpark_columns[i])
|
|
560
|
-
qualifiers.append(
|
|
618
|
+
qualifiers.append({ColumnQualifier.no_qualifier()})
|
|
561
619
|
|
|
562
620
|
return spark_columns, snowpark_columns, qualifiers
|
|
563
621
|
|
|
@@ -604,14 +662,18 @@ class JoinColumnNameMap(ColumnNameMap):
|
|
|
604
662
|
if allow_non_exists:
|
|
605
663
|
return None
|
|
606
664
|
else:
|
|
607
|
-
|
|
665
|
+
exception = AnalysisException(
|
|
608
666
|
f"Spark column name {spark_column_name} does not exist in either left or right DataFrame"
|
|
609
667
|
)
|
|
668
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
669
|
+
raise exception
|
|
610
670
|
|
|
611
671
|
if (snowpark_column_name_in_right is not None) and (
|
|
612
672
|
snowpark_column_name_in_left is not None
|
|
613
673
|
):
|
|
614
|
-
|
|
674
|
+
exception = AnalysisException(f"Ambiguous column name {spark_column_name}")
|
|
675
|
+
attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
|
|
676
|
+
raise exception
|
|
615
677
|
|
|
616
678
|
snowpark_name = (
|
|
617
679
|
snowpark_column_name_in_right
|
|
@@ -637,60 +699,94 @@ class JoinColumnNameMap(ColumnNameMap):
|
|
|
637
699
|
def get_snowpark_column_names_from_spark_column_names(
|
|
638
700
|
self, spark_column_names: list[str], return_first: bool = False
|
|
639
701
|
) -> list[str]:
|
|
640
|
-
|
|
702
|
+
exception = NotImplementedError("Method not implemented!")
|
|
703
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
704
|
+
raise exception
|
|
641
705
|
|
|
642
706
|
def get_spark_column_names_from_snowpark_column_names(
|
|
643
707
|
self,
|
|
644
708
|
snowpark_column_names: list[str],
|
|
645
709
|
) -> list[str]:
|
|
646
|
-
|
|
710
|
+
exception = NotImplementedError("Method not implemented!")
|
|
711
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
712
|
+
raise exception
|
|
647
713
|
|
|
648
714
|
def get_spark_column_name_from_snowpark_column_name(
|
|
649
|
-
self,
|
|
715
|
+
self,
|
|
716
|
+
snowpark_column_name: str,
|
|
717
|
+
allow_non_exists: bool = False,
|
|
650
718
|
) -> str:
|
|
651
|
-
|
|
719
|
+
exception = NotImplementedError("Method not implemented!")
|
|
720
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
721
|
+
raise exception
|
|
652
722
|
|
|
653
723
|
def get_spark_columns(self) -> list[str]:
|
|
654
|
-
|
|
724
|
+
exception = NotImplementedError("Method not implemented!")
|
|
725
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
726
|
+
raise exception
|
|
655
727
|
|
|
656
728
|
def get_snowpark_columns(self) -> list[str]:
|
|
657
|
-
|
|
729
|
+
exception = NotImplementedError("Method not implemented!")
|
|
730
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
731
|
+
raise exception
|
|
658
732
|
|
|
659
733
|
def get_snowpark_columns_after_drop(self, cols_to_drop: list[str]) -> list[str]:
|
|
660
|
-
|
|
734
|
+
exception = NotImplementedError("Method not implemented!")
|
|
735
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
736
|
+
raise exception
|
|
661
737
|
|
|
662
738
|
def get_renamed_nested_column_name(self, name) -> str | None:
|
|
663
|
-
|
|
739
|
+
exception = NotImplementedError("Method not implemented!")
|
|
740
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
741
|
+
raise exception
|
|
664
742
|
|
|
665
743
|
def has_spark_column(self, spark_column_name: str) -> bool:
|
|
666
|
-
|
|
744
|
+
exception = NotImplementedError("Method not implemented!")
|
|
745
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
746
|
+
raise exception
|
|
667
747
|
|
|
668
748
|
def snowpark_to_spark_map(self) -> dict[str, str]:
|
|
669
|
-
|
|
749
|
+
exception = NotImplementedError("Method not implemented!")
|
|
750
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
751
|
+
raise exception
|
|
670
752
|
|
|
671
753
|
def spark_to_snowpark_for_pattern(self, pattern: str) -> list[tuple[str, str]]:
|
|
672
|
-
|
|
754
|
+
exception = NotImplementedError("Method not implemented!")
|
|
755
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
756
|
+
raise exception
|
|
673
757
|
|
|
674
758
|
def with_columns(
|
|
675
759
|
self, new_spark_columns: list[str], new_snowpark_columns: list[str]
|
|
676
|
-
) -> tuple[list[str], list[str], list[
|
|
677
|
-
|
|
760
|
+
) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
|
|
761
|
+
exception = NotImplementedError("Method not implemented!")
|
|
762
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
763
|
+
raise exception
|
|
678
764
|
|
|
679
|
-
def get_qualifiers(self) -> list[
|
|
680
|
-
|
|
765
|
+
def get_qualifiers(self) -> list[set[ColumnQualifier]]:
|
|
766
|
+
exception = NotImplementedError("Method not implemented!")
|
|
767
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
768
|
+
raise exception
|
|
681
769
|
|
|
682
770
|
def get_qualifiers_for_columns_after_drop(
|
|
683
771
|
self, cols_to_drop: list[str]
|
|
684
|
-
) -> list[
|
|
685
|
-
|
|
772
|
+
) -> list[set[ColumnQualifier]]:
|
|
773
|
+
exception = NotImplementedError("Method not implemented!")
|
|
774
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
775
|
+
raise exception
|
|
686
776
|
|
|
687
777
|
def get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
688
|
-
self,
|
|
689
|
-
) -> tuple[list[str], list[str], list[
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
778
|
+
self, target_qualifier: list[str]
|
|
779
|
+
) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
|
|
780
|
+
exception = NotImplementedError("Method not implemented!")
|
|
781
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
782
|
+
raise exception
|
|
783
|
+
|
|
784
|
+
def get_qualifiers_for_spark_column(
|
|
785
|
+
self, spark_column_name: str
|
|
786
|
+
) -> set[ColumnQualifier]:
|
|
787
|
+
return {self.get_qualifier_for_spark_column(spark_column_name)}
|
|
788
|
+
|
|
789
|
+
def get_qualifier_for_spark_column(self, spark_column_name: str) -> ColumnQualifier:
|
|
694
790
|
qualifier_left = self.left_column_mapping.get_qualifier_for_spark_column(
|
|
695
791
|
spark_column_name
|
|
696
792
|
)
|
|
@@ -698,7 +794,9 @@ class JoinColumnNameMap(ColumnNameMap):
|
|
|
698
794
|
spark_column_name
|
|
699
795
|
)
|
|
700
796
|
|
|
701
|
-
if (
|
|
702
|
-
|
|
797
|
+
if (not qualifier_left.is_empty) and (not qualifier_right.is_empty):
|
|
798
|
+
exception = AnalysisException(f"Ambiguous column name {spark_column_name}")
|
|
799
|
+
attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
|
|
800
|
+
raise exception
|
|
703
801
|
|
|
704
|
-
return qualifier_right if
|
|
802
|
+
return qualifier_right if qualifier_left.is_empty else qualifier_left
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
10
|
+
quote_name_without_upper_casing,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class ColumnQualifier:
|
|
16
|
+
parts: tuple[str, ...]
|
|
17
|
+
|
|
18
|
+
def __post_init__(self) -> None:
|
|
19
|
+
if not all(isinstance(x, str) for x in self.parts):
|
|
20
|
+
raise TypeError("ColumnQualifier.parts must be strings")
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def is_empty(self) -> bool:
|
|
24
|
+
return len(self.parts) == 0
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def no_qualifier(cls) -> ColumnQualifier:
|
|
28
|
+
return cls(())
|
|
29
|
+
|
|
30
|
+
def all_qualified_names(self, name: str) -> list[str]:
|
|
31
|
+
qualifier_parts = self.parts
|
|
32
|
+
qualifier_prefixes = [
|
|
33
|
+
".".join(quote_name_without_upper_casing(x) for x in qualifier_parts[i:])
|
|
34
|
+
for i in range(len(qualifier_parts))
|
|
35
|
+
]
|
|
36
|
+
return [f"{prefix}.{name}" for prefix in qualifier_prefixes]
|
|
37
|
+
|
|
38
|
+
def to_upper(self):
|
|
39
|
+
return ColumnQualifier(tuple(part.upper() for part in self.parts))
|
|
40
|
+
|
|
41
|
+
def matches(self, target: ColumnQualifier) -> bool:
|
|
42
|
+
if self.is_empty or target.is_empty:
|
|
43
|
+
return False
|
|
44
|
+
# If the column has fewer qualifiers than the target, it cannot match
|
|
45
|
+
if len(self.parts) < len(target.parts):
|
|
46
|
+
return False
|
|
47
|
+
return self.parts[-len(target.parts) :] == target.parts
|