snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ sys.path.append(str(pathlib.Path(__file__).parent / "includes/python"))
10
10
 
11
11
  from .server import get_session # noqa: E402, F401
12
12
  from .server import start_session # noqa: E402, F401
13
+ from .utils.session import skip_session_configuration # noqa: E402, F401
13
14
 
14
15
  # Turn off catalog warning for Snowpark
15
16
  sp_logger = logging.getLogger("snowflake.snowpark")
@@ -13,14 +13,17 @@ from functools import cached_property
13
13
  from pyspark.errors.exceptions.base import AnalysisException
14
14
 
15
15
  from snowflake.snowpark import DataFrame
16
- from snowflake.snowpark._internal.analyzer.analyzer_utils import (
17
- quote_name_without_upper_casing,
18
- unquote_if_quoted,
19
- )
16
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
20
17
  from snowflake.snowpark._internal.utils import quote_name
21
18
  from snowflake.snowpark.types import StructType
19
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
22
20
  from snowflake.snowpark_connect.config import global_config
23
- from snowflake.snowpark_connect.utils.context import get_current_operation_scope
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
23
+ from snowflake.snowpark_connect.utils.context import (
24
+ get_current_operation_scope,
25
+ get_is_processing_order_by,
26
+ )
24
27
  from snowflake.snowpark_connect.utils.identifiers import (
25
28
  split_fully_qualified_spark_name,
26
29
  )
@@ -92,31 +95,15 @@ def make_column_names_snowpark_compatible(
92
95
  class ColumnNames:
93
96
  spark_name: str
94
97
  snowpark_name: str
95
- qualifiers: list[str]
98
+ qualifiers: set[ColumnQualifier]
96
99
  catalog_info: str | None = None # Catalog from fully qualified name
97
100
  database_info: str | None = None # Database from fully qualified name
98
101
 
99
-
100
- def get_list_of_spark_names_for_column(column_names: ColumnNames) -> list[str]:
101
- """
102
- Returns a list of Spark names for a given ColumnNames object.
103
- This is useful when a single Spark name maps to multiple names due to table alias.
104
-
105
- For example, if the column name is 'id' and the qualifiers are ['db', 'table'],
106
- then the possible Spark names are:
107
- ['id', 'db.table.id', 'table.id']
108
- """
109
- spark_name = column_names.spark_name
110
- qualifiers = column_names.qualifiers
111
-
112
- qualifier_suffixes_list = [
113
- ".".join(quote_name_without_upper_casing(x) for x in qualifiers[i:])
114
- for i in range(len(qualifiers))
115
- ]
116
- return [spark_name] + [
117
- f"{qualifier_suffix}.{spark_name}"
118
- for qualifier_suffix in qualifier_suffixes_list
119
- ]
102
+ def all_spark_names_including_qualified_names(self):
103
+ all_names = [self.spark_name]
104
+ for qualifier in self.qualifiers:
105
+ all_names.extend(qualifier.all_qualified_names(self.spark_name))
106
+ return all_names
120
107
 
121
108
 
122
109
  class ColumnNameMap:
@@ -128,13 +115,13 @@ class ColumnNameMap:
128
115
  [], bool
129
116
  ] = lambda: global_config.spark_sql_caseSensitive,
130
117
  column_metadata: dict | None = None,
131
- column_qualifiers: list[list[str]] | None = None,
118
+ column_qualifiers: list[set[ColumnQualifier]] = None,
132
119
  parent_column_name_map: ColumnNameMap | None = None,
133
120
  ) -> None:
134
121
  """
135
122
  spark_column_names: Original spark column names
136
123
  snowpark_column_names: Snowpark column names
137
- column_metadata: This field is used to store metadata related to columns. Since Snowparks Struct type does not support metadata,
124
+ column_metadata: This field is used to store metadata related to columns. Since Snowpark's Struct type does not support metadata,
138
125
  we use this attribute to store any metadata related to the columns.
139
126
  The key is the original Spark column name, and the value is the metadata.
140
127
  example: Dict('age', {'foo': 'bar'})
@@ -142,7 +129,7 @@ class ColumnNameMap:
142
129
  parent_column_name_map: parent ColumnNameMap
143
130
  """
144
131
  self.columns: list[ColumnNames] = []
145
- self.spark_to_col = defaultdict(list)
132
+ self.spark_to_col: defaultdict[str, list[ColumnNames]] = defaultdict(list)
146
133
  self.uppercase_spark_to_col = defaultdict(list)
147
134
  self.snowpark_to_col = defaultdict(list)
148
135
  self.is_case_sensitive = is_case_sensitive
@@ -181,21 +168,18 @@ class ColumnNameMap:
181
168
  c = ColumnNames(
182
169
  spark_name=spark_name,
183
170
  snowpark_name=snowpark_column_names[i],
184
- qualifiers=column_qualifiers[i] if column_qualifiers else [],
171
+ qualifiers=column_qualifiers[i]
172
+ if column_qualifiers and column_qualifiers[i]
173
+ else {ColumnQualifier.no_qualifier()},
185
174
  catalog_info=catalog_info,
186
175
  database_info=database_info,
187
176
  )
188
177
  self.columns.append(c)
189
178
 
190
- # we want to store all the spark names including qualifiers (these are generated from table alias or dataframe alias)
191
- spark_names_including_qualifier = get_list_of_spark_names_for_column(c)
192
-
193
- for spark_name_including_qualifier in spark_names_including_qualifier:
179
+ for spark_name in c.all_spark_names_including_qualified_names():
194
180
  # the same spark name can map to multiple snowpark names
195
- self.spark_to_col[spark_name_including_qualifier].append(c)
196
- self.uppercase_spark_to_col[
197
- spark_name_including_qualifier.upper()
198
- ].append(c)
181
+ self.spark_to_col[spark_name].append(c)
182
+ self.uppercase_spark_to_col[spark_name.upper()].append(c)
199
183
 
200
184
  # the same snowpark name can map to multiple spark column
201
185
  # e.g. df.select(date_format('dt', 'yyy'), date_format('dt', 'yyyy')) ->
@@ -353,18 +337,77 @@ class ColumnNameMap:
353
337
 
354
338
  snowpark_names_len = len(snowpark_names)
355
339
  if snowpark_names_len > 1:
356
- raise AnalysisException(
357
- f"Ambiguous spark column name {spark_column_name}, potential snowpark column names {snowpark_names}"
358
- )
340
+ # Check if this is a case where we have identical expressions that can be safely resolved to the first one
341
+ # This commonly happens with GROUP BY expressions that also appear in SELECT clauses
342
+ if (
343
+ get_is_processing_order_by()
344
+ and self._can_resolve_ambiguous_identical_expressions(
345
+ resolved_name, snowpark_names
346
+ )
347
+ ):
348
+ # All the ambiguous columns represent the same expression, so we can safely use the first one
349
+ return snowpark_names[0]
350
+ else:
351
+ exception = AnalysisException(
352
+ f"Ambiguous spark column name {spark_column_name}, potential snowpark column names {snowpark_names}"
353
+ )
354
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
355
+ raise exception
359
356
  elif snowpark_names_len == 0:
360
357
  if allow_non_exists:
361
358
  return None
362
359
  else:
363
- raise AnalysisException(
360
+ exception = AnalysisException(
364
361
  f"Spark column name {spark_column_name} does not exist"
365
362
  )
363
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
364
+ raise exception
366
365
  return snowpark_names[0]
367
366
 
367
+ def _can_resolve_ambiguous_identical_expressions(
368
+ self, spark_column_name: str, snowpark_names: list[str]
369
+ ) -> bool:
370
+ """
371
+ Determine if ambiguous columns represent identical expressions that can be safely resolved to the first one.
372
+
373
+ This handles the common case where the same expression (like a UDF call) appears multiple times
374
+ in a SELECT clause within a GROUP BY query. Since they're the same expression operating on the
375
+ same grouped data, they will have identical values, so we can safely resolve to any of them.
376
+
377
+ Args:
378
+ spark_column_name: The Spark column name that has multiple mappings, make sure resolve this reforehand
379
+ snowpark_names: List of Snowpark column names that map to this Spark column name
380
+
381
+ Returns:
382
+ True if we can safely resolve to the first snowpark column, False otherwise
383
+ """
384
+ if spark_column_name not in self.spark_to_col:
385
+ return False
386
+
387
+ columns: list[ColumnNames] = self.spark_to_col[spark_column_name]
388
+
389
+ # If we don't have multiple columns, there's no ambiguity to resolve
390
+ if len(columns) <= 1:
391
+ return False
392
+
393
+ # Check if all the snowpark names correspond to columns that have identical underlying expressions
394
+ # We'll compare the actual column objects to see if they represent the same computation
395
+ first_column = columns[0]
396
+
397
+ for column in columns[1:]:
398
+ if first_column.qualifiers != column.qualifiers:
399
+ return False
400
+
401
+ # Additional safety check: ensure all snowpark names are actually in our mapping
402
+ for snowpark_name in snowpark_names:
403
+ if snowpark_name not in self.snowpark_to_col:
404
+ return False
405
+
406
+ # If we reach here, the columns appear to be identical expressions from the same context
407
+ # This commonly happens in GROUP BY scenarios where the same expression appears in both
408
+ # the grouping clause and the select clause
409
+ return True
410
+
368
411
  def get_spark_column_names_from_snowpark_column_names(
369
412
  self,
370
413
  snowpark_column_names: list[str],
@@ -390,16 +433,20 @@ class ColumnNameMap:
390
433
  )
391
434
  spark_names_len = len(spark_names)
392
435
  if spark_names_len > 1:
393
- raise AnalysisException(
436
+ exception = AnalysisException(
394
437
  f"Ambiguous snowpark column name {snowpark_column_name}, potential spark column names {spark_names}"
395
438
  )
439
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
440
+ raise exception
396
441
  elif spark_names_len == 0:
397
442
  if allow_non_exists:
398
443
  return None
399
444
  else:
400
- raise AnalysisException(
445
+ exception = AnalysisException(
401
446
  f"Snowpark column name {snowpark_column_name} does not exist"
402
447
  )
448
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
449
+ raise exception
403
450
  return spark_names[0]
404
451
 
405
452
  def get_spark_column_name(self, idx: int) -> str:
@@ -409,32 +456,30 @@ class ColumnNameMap:
409
456
  return [c.spark_name for c in self.columns]
410
457
 
411
458
  def get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
412
- self, qualifiers_input: list[str]
413
- ) -> tuple[list[str], list[str], list[list[str]]]:
459
+ self, target_qualifier: ColumnQualifier
460
+ ) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
414
461
  """
415
- Returns the Spark and Snowpark column names along with their qualifiers for the specified qualifiers.
416
- If a column does not have a qualifier, it will be None.
462
+ Returns the Spark and Snowpark column names along with their qualifiers for the specified qualifier.
417
463
  """
418
- spark_columns = []
419
- snowpark_columns = []
420
- qualifiers = []
464
+ spark_columns: list[str] = []
465
+ snowpark_columns: list[str] = []
466
+ qualifiers: list[set[ColumnQualifier]] = []
421
467
 
468
+ normalized_qualifier = target_qualifier
422
469
  if not self.is_case_sensitive():
423
- qualifiers_input = [q.upper() for q in qualifiers_input]
470
+ normalized_qualifier = target_qualifier.to_upper()
424
471
 
425
- for c in self.columns:
426
- col_qualifiers = (
427
- [q.upper() for q in c.qualifiers]
472
+ for column in self.columns:
473
+ # Normalize all qualifiers for comparison
474
+ column_qualifiers: set[ColumnQualifier] = (
475
+ {q.to_upper() for q in iter(column.qualifiers)}
428
476
  if not self.is_case_sensitive()
429
- else c.qualifiers
477
+ else column.qualifiers
430
478
  )
431
- if len(col_qualifiers) < len(qualifiers_input):
432
- # If the column has fewer qualifiers than the input, it cannot match
433
- continue
434
- if col_qualifiers[-len(qualifiers_input) :] == qualifiers_input:
435
- spark_columns.append(c.spark_name)
436
- snowpark_columns.append(c.snowpark_name)
437
- qualifiers.append(c.qualifiers)
479
+ if any([q.matches(normalized_qualifier) for q in column_qualifiers]):
480
+ spark_columns.append(column.spark_name)
481
+ snowpark_columns.append(column.snowpark_name)
482
+ qualifiers.append(column.qualifiers)
438
483
 
439
484
  return spark_columns, snowpark_columns, qualifiers
440
485
 
@@ -448,19 +493,17 @@ class ColumnNameMap:
448
493
  if self._quote_if_unquoted(c) not in cols_to_drop
449
494
  ]
450
495
 
451
- def get_qualifiers(self) -> list[list[str]]:
496
+ def get_qualifiers(self) -> list[set[ColumnQualifier]]:
452
497
  """
453
498
  Returns the qualifiers for the columns.
454
- If a column does not have a qualifier, it will be None.
455
499
  """
456
500
  return [c.qualifiers for c in self.columns]
457
501
 
458
502
  def get_qualifiers_for_columns_after_drop(
459
503
  self, cols_to_drop: list[str]
460
- ) -> list[list[str]]:
504
+ ) -> list[set[ColumnQualifier]]:
461
505
  """
462
506
  Returns the qualifiers for the columns after dropping the specified columns.
463
- If a column is dropped, its qualifier will be None.
464
507
  """
465
508
  return [
466
509
  c.qualifiers
@@ -471,10 +514,25 @@ class ColumnNameMap:
471
514
  def get_qualifier_for_spark_column(
472
515
  self,
473
516
  spark_column_name: str,
474
- ) -> list[str]:
517
+ ) -> ColumnQualifier:
518
+ """
519
+ Backward compatibility: returns the first qualifier for the given Spark column name.
520
+ Throws if more than one qualifier exists.
521
+ """
522
+ qualifiers = self.get_qualifiers_for_spark_column(spark_column_name)
523
+ if len(qualifiers) > 1:
524
+ raise ValueError(
525
+ "Shouldn't happen. Multiple qualifiers found; expected only one."
526
+ )
527
+ return next(iter(qualifiers))
528
+
529
+ def get_qualifiers_for_spark_column(
530
+ self,
531
+ spark_column_name: str,
532
+ ) -> set[ColumnQualifier]:
475
533
  """
476
534
  Returns the qualifier for the specified Spark column name.
477
- If the column does not exist, returns None.
535
+ If the column does not exist, returns empty ColumnQualifier.
478
536
  """
479
537
  if not self.is_case_sensitive():
480
538
  name = spark_column_name.upper()
@@ -486,7 +544,7 @@ class ColumnNameMap:
486
544
  col = mapping.get(name)
487
545
 
488
546
  if col is None or len(col) == 0:
489
- return []
547
+ return {ColumnQualifier.no_qualifier()}
490
548
 
491
549
  return col[0].qualifiers
492
550
 
@@ -518,7 +576,7 @@ class ColumnNameMap:
518
576
 
519
577
  def with_columns(
520
578
  self, new_spark_columns: list[str], new_snowpark_columns: list[str]
521
- ) -> tuple[list[str], list[str], list[list[str]]]:
579
+ ) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
522
580
  """
523
581
  Returns an ordered list of spark and snowpark column names after adding the new columns through a withColumns call.
524
582
  All replaced columns retain their ordering in the dataframe. The new columns are added to the end of the list.
@@ -547,7 +605,7 @@ class ColumnNameMap:
547
605
  removed_index.add(index)
548
606
  spark_columns.append(new_spark_columns[index])
549
607
  snowpark_columns.append(new_snowpark_columns[index])
550
- qualifiers.append([])
608
+ qualifiers.append({ColumnQualifier.no_qualifier()})
551
609
  else:
552
610
  spark_columns.append(c.spark_name)
553
611
  snowpark_columns.append(c.snowpark_name)
@@ -557,7 +615,7 @@ class ColumnNameMap:
557
615
  if i not in removed_index:
558
616
  spark_columns.append(new_spark_columns[i])
559
617
  snowpark_columns.append(new_snowpark_columns[i])
560
- qualifiers.append([])
618
+ qualifiers.append({ColumnQualifier.no_qualifier()})
561
619
 
562
620
  return spark_columns, snowpark_columns, qualifiers
563
621
 
@@ -604,14 +662,18 @@ class JoinColumnNameMap(ColumnNameMap):
604
662
  if allow_non_exists:
605
663
  return None
606
664
  else:
607
- raise AnalysisException(
665
+ exception = AnalysisException(
608
666
  f"Spark column name {spark_column_name} does not exist in either left or right DataFrame"
609
667
  )
668
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
669
+ raise exception
610
670
 
611
671
  if (snowpark_column_name_in_right is not None) and (
612
672
  snowpark_column_name_in_left is not None
613
673
  ):
614
- raise AnalysisException(f"Ambiguous column name {spark_column_name}")
674
+ exception = AnalysisException(f"Ambiguous column name {spark_column_name}")
675
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
676
+ raise exception
615
677
 
616
678
  snowpark_name = (
617
679
  snowpark_column_name_in_right
@@ -637,60 +699,94 @@ class JoinColumnNameMap(ColumnNameMap):
637
699
  def get_snowpark_column_names_from_spark_column_names(
638
700
  self, spark_column_names: list[str], return_first: bool = False
639
701
  ) -> list[str]:
640
- raise NotImplementedError("Method not implemented!")
702
+ exception = NotImplementedError("Method not implemented!")
703
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
704
+ raise exception
641
705
 
642
706
  def get_spark_column_names_from_snowpark_column_names(
643
707
  self,
644
708
  snowpark_column_names: list[str],
645
709
  ) -> list[str]:
646
- raise NotImplementedError("Method not implemented!")
710
+ exception = NotImplementedError("Method not implemented!")
711
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
712
+ raise exception
647
713
 
648
714
  def get_spark_column_name_from_snowpark_column_name(
649
- self, snowpark_column_name: str
715
+ self,
716
+ snowpark_column_name: str,
717
+ allow_non_exists: bool = False,
650
718
  ) -> str:
651
- raise NotImplementedError("Method not implemented!")
719
+ exception = NotImplementedError("Method not implemented!")
720
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
721
+ raise exception
652
722
 
653
723
  def get_spark_columns(self) -> list[str]:
654
- raise NotImplementedError("Method not implemented!")
724
+ exception = NotImplementedError("Method not implemented!")
725
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
726
+ raise exception
655
727
 
656
728
  def get_snowpark_columns(self) -> list[str]:
657
- raise NotImplementedError("Method not implemented!")
729
+ exception = NotImplementedError("Method not implemented!")
730
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
731
+ raise exception
658
732
 
659
733
  def get_snowpark_columns_after_drop(self, cols_to_drop: list[str]) -> list[str]:
660
- raise NotImplementedError("Method not implemented!")
734
+ exception = NotImplementedError("Method not implemented!")
735
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
736
+ raise exception
661
737
 
662
738
  def get_renamed_nested_column_name(self, name) -> str | None:
663
- raise NotImplementedError("Method not implemented!")
739
+ exception = NotImplementedError("Method not implemented!")
740
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
741
+ raise exception
664
742
 
665
743
  def has_spark_column(self, spark_column_name: str) -> bool:
666
- raise NotImplementedError("Method not implemented!")
744
+ exception = NotImplementedError("Method not implemented!")
745
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
746
+ raise exception
667
747
 
668
748
  def snowpark_to_spark_map(self) -> dict[str, str]:
669
- raise NotImplementedError("Method not implemented!")
749
+ exception = NotImplementedError("Method not implemented!")
750
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
751
+ raise exception
670
752
 
671
753
  def spark_to_snowpark_for_pattern(self, pattern: str) -> list[tuple[str, str]]:
672
- raise NotImplementedError("Method not implemented!")
754
+ exception = NotImplementedError("Method not implemented!")
755
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
756
+ raise exception
673
757
 
674
758
  def with_columns(
675
759
  self, new_spark_columns: list[str], new_snowpark_columns: list[str]
676
- ) -> tuple[list[str], list[str], list[list[str]]]:
677
- raise NotImplementedError("Method not implemented!")
760
+ ) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
761
+ exception = NotImplementedError("Method not implemented!")
762
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
763
+ raise exception
678
764
 
679
- def get_qualifiers(self) -> list[list[str]]:
680
- raise NotImplementedError("Method not implemented!")
765
+ def get_qualifiers(self) -> list[set[ColumnQualifier]]:
766
+ exception = NotImplementedError("Method not implemented!")
767
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
768
+ raise exception
681
769
 
682
770
  def get_qualifiers_for_columns_after_drop(
683
771
  self, cols_to_drop: list[str]
684
- ) -> list[list[str]]:
685
- raise NotImplementedError("Method not implemented!")
772
+ ) -> list[set[ColumnQualifier]]:
773
+ exception = NotImplementedError("Method not implemented!")
774
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
775
+ raise exception
686
776
 
687
777
  def get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
688
- self, qualifiers_input: list[str]
689
- ) -> tuple[list[str], list[str], list[list[str]]]:
690
- raise NotImplementedError("Method not implemented!")
691
-
692
- def get_qualifier_for_spark_column(self, spark_column_name: str) -> list[str]:
693
-
778
+ self, target_qualifier: list[str]
779
+ ) -> tuple[list[str], list[str], list[set[ColumnQualifier]]]:
780
+ exception = NotImplementedError("Method not implemented!")
781
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
782
+ raise exception
783
+
784
+ def get_qualifiers_for_spark_column(
785
+ self, spark_column_name: str
786
+ ) -> set[ColumnQualifier]:
787
+ return {self.get_qualifier_for_spark_column(spark_column_name)}
788
+
789
+ def get_qualifier_for_spark_column(self, spark_column_name: str) -> ColumnQualifier:
694
790
  qualifier_left = self.left_column_mapping.get_qualifier_for_spark_column(
695
791
  spark_column_name
696
792
  )
@@ -698,7 +794,9 @@ class JoinColumnNameMap(ColumnNameMap):
698
794
  spark_column_name
699
795
  )
700
796
 
701
- if (len(qualifier_left) > 0) and (len(qualifier_right) > 0):
702
- raise AnalysisException(f"Ambiguous column name {spark_column_name}")
797
+ if (not qualifier_left.is_empty) and (not qualifier_right.is_empty):
798
+ exception = AnalysisException(f"Ambiguous column name {spark_column_name}")
799
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
800
+ raise exception
703
801
 
704
- return qualifier_right if len(qualifier_left) == 0 else qualifier_left
802
+ return qualifier_right if qualifier_left.is_empty else qualifier_left
@@ -0,0 +1,47 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
10
+ quote_name_without_upper_casing,
11
+ )
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ColumnQualifier:
16
+ parts: tuple[str, ...]
17
+
18
+ def __post_init__(self) -> None:
19
+ if not all(isinstance(x, str) for x in self.parts):
20
+ raise TypeError("ColumnQualifier.parts must be strings")
21
+
22
+ @property
23
+ def is_empty(self) -> bool:
24
+ return len(self.parts) == 0
25
+
26
+ @classmethod
27
+ def no_qualifier(cls) -> ColumnQualifier:
28
+ return cls(())
29
+
30
+ def all_qualified_names(self, name: str) -> list[str]:
31
+ qualifier_parts = self.parts
32
+ qualifier_prefixes = [
33
+ ".".join(quote_name_without_upper_casing(x) for x in qualifier_parts[i:])
34
+ for i in range(len(qualifier_parts))
35
+ ]
36
+ return [f"{prefix}.{name}" for prefix in qualifier_prefixes]
37
+
38
+ def to_upper(self):
39
+ return ColumnQualifier(tuple(part.upper() for part in self.parts))
40
+
41
+ def matches(self, target: ColumnQualifier) -> bool:
42
+ if self.is_empty or target.is_empty:
43
+ return False
44
+ # If the column has fewer qualifiers than the target, it cannot match
45
+ if len(self.parts) < len(target.parts):
46
+ return False
47
+ return self.parts[-len(target.parts) :] == target.parts