snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,8 @@ import pandas
8
8
  import pyspark.sql.connect.proto.catalog_pb2 as catalog_proto
9
9
 
10
10
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.relation.catalogs import CATALOGS
12
14
  from snowflake.snowpark_connect.relation.catalogs.utils import (
13
15
  CURRENT_CATALOG_NAME,
@@ -148,4 +150,6 @@ def map_catalog(
148
150
  return get_current_catalog().uncacheTable(rel.uncache_table.table_name)
149
151
  case other:
150
152
  # TODO: list_function implementation is blocked on SNOW-1787268
151
- raise SnowparkConnectNotImplementedError(f"Other Relation {other}")
153
+ exception = SnowparkConnectNotImplementedError(f"Other Relation {other}")
154
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
155
+ raise exception
@@ -29,11 +29,16 @@ from snowflake.snowpark.column import Column
29
29
  from snowflake.snowpark.table_function import _ExplodeFunctionCall
30
30
  from snowflake.snowpark.types import DataType, StructField, StructType, _NumericType
31
31
  from snowflake.snowpark_connect.column_name_handler import (
32
+ ColumnQualifier,
32
33
  make_column_names_snowpark_compatible,
33
34
  )
34
35
  from snowflake.snowpark_connect.config import global_config
35
36
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
36
- from snowflake.snowpark_connect.error.error_utils import SparkException
37
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
38
+ from snowflake.snowpark_connect.error.error_utils import (
39
+ SparkException,
40
+ attach_custom_error_code,
41
+ )
37
42
  from snowflake.snowpark_connect.expression.map_expression import (
38
43
  map_alias,
39
44
  map_expression,
@@ -369,56 +374,64 @@ def map_sort(
369
374
  for col in input_container.column_map.get_spark_columns()
370
375
  ]
371
376
 
372
- for so in sort_order:
373
- if so.child.HasField("literal"):
374
- column_index = unwrap_literal(so.child)
375
- try:
376
- if column_index <= 0:
377
- raise IndexError
378
- col = input_df[column_index - 1]
379
- except IndexError:
380
- raise AnalysisException(
381
- f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
377
+ # Process ORDER BY expressions with a context flag to enable column reuse optimization
378
+ from snowflake.snowpark_connect.utils.context import push_processing_order_by_scope
379
+
380
+ with push_processing_order_by_scope():
381
+ for so in sort_order:
382
+ if so.child.HasField("literal"):
383
+ column_index = unwrap_literal(so.child)
384
+ try:
385
+ if column_index <= 0:
386
+ exception = IndexError()
387
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
388
+ raise exception
389
+ col = input_df[column_index - 1]
390
+ except IndexError:
391
+ exception = AnalysisException(
392
+ f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
393
+ )
394
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
395
+ raise exception
396
+ else:
397
+ _, typed_column = map_single_column_expression(
398
+ so.child, input_container.column_map, typer
382
399
  )
383
- else:
384
- _, typed_column = map_single_column_expression(
385
- so.child, input_container.column_map, typer
386
- )
387
- col = typed_column.col
400
+ col = typed_column.col
388
401
 
389
- match (so.direction, so.null_ordering):
390
- case (
391
- expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
392
- expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
393
- ):
394
- col = col.asc_nulls_first()
395
- case (
396
- expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
397
- expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
398
- ):
399
- col = col.asc_nulls_last()
400
- case (
401
- expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
402
- expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
403
- ):
404
- col = col.desc_nulls_first()
405
- case (
406
- expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
407
- expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
408
- ):
409
- col = col.desc_nulls_last()
402
+ match (so.direction, so.null_ordering):
403
+ case (
404
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
405
+ expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
406
+ ):
407
+ col = col.asc_nulls_first()
408
+ case (
409
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
410
+ expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
411
+ ):
412
+ col = col.asc_nulls_last()
413
+ case (
414
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
415
+ expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
416
+ ):
417
+ col = col.desc_nulls_first()
418
+ case (
419
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
420
+ expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
421
+ ):
422
+ col = col.desc_nulls_last()
410
423
 
411
- cols.append(col)
424
+ cols.append(col)
412
425
 
413
- ascending.append(
414
- so.direction
415
- == expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
416
- )
417
- if (
418
- so.direction
419
- != expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
420
- ):
421
- order_specified = True
426
+ ascending.append(
427
+ so.direction
428
+ == expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
429
+ )
430
+ if (
431
+ so.direction
432
+ != expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
433
+ ):
434
+ order_specified = True
422
435
 
423
436
  # TODO: sort.isglobal.
424
437
  if not order_specified:
@@ -446,9 +459,11 @@ def map_to_df(
446
459
  new_column_names = list(rel.to_df.column_names)
447
460
  if len(new_column_names) != len(input_container.column_map.columns):
448
461
  # TODO: Check error type here
449
- raise ValueError(
462
+ exception = ValueError(
450
463
  "Number of column names must match number of columns in DataFrame"
451
464
  )
465
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
466
+ raise exception
452
467
  snowpark_new_column_names = make_column_names_snowpark_compatible(
453
468
  new_column_names, rel.common.plan_id
454
469
  )
@@ -507,9 +522,11 @@ def map_to_schema(
507
522
  for field in rel.to_schema.schema.struct.fields:
508
523
  if field.name in already_existing_columns:
509
524
  if count_case_insensitive_column_names[field.name.lower()] > 1:
510
- raise AnalysisException(
525
+ exception = AnalysisException(
511
526
  f"[AMBIGUOUS_COLUMN_OR_FIELD] Column or field `{field.name}` is ambiguous and has {len(input_container.column_map.spark_to_col[field.name])} matches."
512
527
  )
528
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
529
+ raise exception
513
530
  snowpark_name = None
514
531
  for name in input_container.column_map.spark_to_col:
515
532
  if name.lower() == field.name.lower():
@@ -526,17 +543,23 @@ def map_to_schema(
526
543
  and snowpark_field.nullable
527
544
  and not isinstance(snowpark_field.datatype, StructType)
528
545
  ):
529
- raise AnalysisException(
546
+ exception = AnalysisException(
530
547
  f"[NULLABLE_COLUMN_OR_FIELD] Column or field `{field.name}` is nullable while it's required to be non-nullable."
531
548
  )
549
+ attach_custom_error_code(
550
+ exception, ErrorCodes.INVALID_OPERATION
551
+ )
552
+ raise exception
532
553
 
533
554
  # Check type casting validation
534
555
  if not _can_cast_column_in_schema(
535
556
  snowpark_field.datatype, proto_to_snowpark_type(field.data_type)
536
557
  ):
537
- raise AnalysisException(
558
+ exception = AnalysisException(
538
559
  f"""[INVALID_COLUMN_OR_FIELD_DATA_TYPE] Column or field `{field.name}` is of type "{map_snowpark_to_pyspark_types(proto_to_snowpark_type(field.data_type))}" while it's required to be "{map_snowpark_to_pyspark_types(snowpark_field.datatype)}"."""
539
560
  )
561
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
562
+ raise exception
540
563
  if len(already_existing_columns) == len(new_column_names):
541
564
  # All columns already exist, we're doing a simple update.
542
565
  snowpark_new_column_names = []
@@ -761,9 +784,11 @@ def map_with_columns(
761
784
  name = names_list[0]
762
785
  name_normalized = input_container.column_map._normalized_spark_name(name)
763
786
  if name_normalized in seen_columns:
764
- raise ValueError(
787
+ exception = ValueError(
765
788
  f"[COLUMN_ALREADY_EXISTS] The column `{name}` already exists."
766
789
  )
790
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
791
+ raise exception
767
792
  seen_columns.add(name_normalized)
768
793
  # If the column name is already in the DataFrame, we replace it, so we use the
769
794
  # mapping to get the correct column name.
@@ -772,7 +797,9 @@ def map_with_columns(
772
797
  [name]
773
798
  )
774
799
  if len(all_instances_of_spark_column_name) == 0:
775
- raise KeyError(f"Spark column name {name} does not exist")
800
+ exception = KeyError(f"Spark column name {name} does not exist")
801
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
802
+ raise exception
776
803
  with_columns_names.extend(all_instances_of_spark_column_name)
777
804
  with_columns_exprs.extend(
778
805
  [expr.col] * len(all_instances_of_spark_column_name)
@@ -852,7 +879,9 @@ def map_unpivot(
852
879
  # Spark API: df.unpivot([id_columns], [unpivot_columns], var_column, val_column)
853
880
  # Snowpark API: df.unpivot(val_column, var_column, [unpivot_columns])
854
881
  if rel.unpivot.HasField("values") and len(rel.unpivot.values.values) == 0:
855
- raise SparkException.unpivot_requires_value_columns()
882
+ exception = SparkException.unpivot_requires_value_columns()
883
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
884
+ raise exception
856
885
 
857
886
  input_container = map_relation(rel.unpivot.input)
858
887
  input_df = input_container.dataframe
@@ -893,7 +922,7 @@ def map_unpivot(
893
922
  )
894
923
  if not get_lease_common_ancestor_classes(type_list):
895
924
  # TODO: match exactly how spark shows mismatched columns
896
- raise SparkException.unpivot_value_data_type_mismatch(
925
+ exception = SparkException.unpivot_value_data_type_mismatch(
897
926
  ", ".join(
898
927
  [
899
928
  f"{dtype} {column_name}"
@@ -901,6 +930,8 @@ def map_unpivot(
901
930
  ]
902
931
  )
903
932
  )
933
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
934
+ raise exception
904
935
  return not is_same_type and contains_numeric_type
905
936
 
906
937
  def get_column_names(
@@ -984,7 +1015,7 @@ def map_unpivot(
984
1015
  column_project = []
985
1016
  column_reverse_project = []
986
1017
  snowpark_columns = []
987
- qualifiers = []
1018
+ qualifiers: list[set[ColumnQualifier]] = []
988
1019
  for c in input_container.column_map.get_snowpark_columns():
989
1020
  c_name = snowpark_functions_col(c, input_container.column_map).get_name()
990
1021
  if c_name in unpivot_col_names:
@@ -1012,7 +1043,7 @@ def map_unpivot(
1012
1043
  )
1013
1044
  snowpark_columns.append(c)
1014
1045
  qualifiers.append(
1015
- input_container.column_map.get_qualifier_for_spark_column(c)
1046
+ input_container.column_map.get_qualifiers_for_spark_column(c)
1016
1047
  )
1017
1048
 
1018
1049
  # Without the case when postprocessing, the result Spark dataframe is:
@@ -1057,7 +1088,7 @@ def map_unpivot(
1057
1088
  snowpark_functions_col(snowpark_value_column_name, input_container.column_map)
1058
1089
  )
1059
1090
  snowpark_columns.append(snowpark_value_column_name)
1060
- qualifiers.extend([[]] * 2)
1091
+ qualifiers.extend([set() for _ in range(2)])
1061
1092
 
1062
1093
  result = (
1063
1094
  input_df.select(*column_project)
@@ -1097,7 +1128,9 @@ def map_group_map(
1097
1128
  snowpark_grouping_expressions.append(snowpark_column.col)
1098
1129
  group_name_list.append(new_name)
1099
1130
  if rel.group_map.func.python_udf is None:
1100
- raise ValueError("group_map relation without python udf is not supported")
1131
+ exception = ValueError("group_map relation without python udf is not supported")
1132
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1133
+ raise exception
1101
1134
 
1102
1135
  python_major, python_minor = rel.group_map.func.python_udf.python_ver.split(".")
1103
1136
  is_compatible_python = sys.version_info.major == int(
@@ -15,8 +15,11 @@ from snowflake.snowpark_connect.column_name_handler import (
15
15
  ColumnNameMap,
16
16
  make_column_names_snowpark_compatible,
17
17
  )
18
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
18
19
  from snowflake.snowpark_connect.config import get_boolean_session_config_param
19
20
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
20
23
  from snowflake.snowpark_connect.expression.map_expression import map_expression
21
24
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
22
25
  from snowflake.snowpark_connect.relation.map_relation import map_relation
@@ -84,11 +87,13 @@ def map_extension(
84
87
  input_df = result.dataframe
85
88
  snowpark_col_names = result.column_map.get_snowpark_columns()
86
89
  if len(subquery_aliases.aliases) != len(snowpark_col_names):
87
- raise AnalysisException(
90
+ exception = AnalysisException(
88
91
  "Number of column aliases does not match number of columns. "
89
92
  f"Number of column aliases: {len(subquery_aliases.aliases)}; "
90
93
  f"number of columns: {len(snowpark_col_names)}."
91
94
  )
95
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
96
+ raise exception
92
97
  return DataFrameContainer.create_with_column_mapping(
93
98
  dataframe=input_df,
94
99
  spark_column_names=subquery_aliases.aliases,
@@ -108,18 +113,22 @@ def map_extension(
108
113
 
109
114
  left_queries = left_df.queries["queries"]
110
115
  if len(left_queries) != 1:
111
- raise SnowparkConnectNotImplementedError(
116
+ exception = SnowparkConnectNotImplementedError(
112
117
  f"Unexpected number of queries: {len(left_queries)}"
113
118
  )
119
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
120
+ raise exception
114
121
  left_query = left_queries[0]
115
122
  with push_outer_dataframe(left_result):
116
123
  right_result = map_relation(lateral_join.right)
117
124
  right_df = right_result.dataframe
118
125
  right_queries = right_df.queries["queries"]
119
126
  if len(right_queries) != 1:
120
- raise SnowparkConnectNotImplementedError(
127
+ exception = SnowparkConnectNotImplementedError(
121
128
  f"Unexpected number of queries: {len(right_queries)}"
122
129
  )
130
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
131
+ raise exception
123
132
  right_query = right_queries[0]
124
133
  input_df_sql = f"WITH __left AS ({left_query}) SELECT * FROM __left INNER JOIN LATERAL ({right_query})"
125
134
  session = snowpark.Session.get_active_session()
@@ -139,7 +148,11 @@ def map_extension(
139
148
  case "aggregate":
140
149
  return map_aggregate(extension.aggregate, rel.common.plan_id)
141
150
  case other:
142
- raise SnowparkConnectNotImplementedError(f"Unexpected extension {other}")
151
+ exception = SnowparkConnectNotImplementedError(
152
+ f"Unexpected extension {other}"
153
+ )
154
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
155
+ raise exception
143
156
 
144
157
 
145
158
  def get_udtf_project(relation: relation_proto.Relation) -> bool:
@@ -166,7 +179,7 @@ def get_udtf_project(relation: relation_proto.Relation) -> bool:
166
179
 
167
180
  def handle_udtf_with_table_arguments(
168
181
  udtf_info: snowflake_proto.UDTFWithTableArguments,
169
- ) -> snowpark.DataFrame:
182
+ ) -> DataFrameContainer:
170
183
  """
171
184
  Handle UDTF with one or more table arguments using Snowpark's join_table_function.
172
185
  For multiple table arguments, this creates a Cartesian product of all input tables.
@@ -174,7 +187,9 @@ def handle_udtf_with_table_arguments(
174
187
  session = snowpark.Session.get_active_session()
175
188
  udtf_name_lower = udtf_info.function_name.lower()
176
189
  if udtf_name_lower not in session._udtfs:
177
- raise ValueError(f"UDTF '{udtf_info.function_name}' not found.")
190
+ exception = ValueError(f"UDTF '{udtf_info.function_name}' not found.")
191
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
192
+ raise exception
178
193
  _udtf_obj, udtf_spark_output_names = session._udtfs[udtf_name_lower]
179
194
 
180
195
  table_containers = []
@@ -188,10 +203,12 @@ def handle_udtf_with_table_arguments(
188
203
  if not get_boolean_session_config_param(
189
204
  "spark.sql.tvf.allowMultipleTableArguments.enabled"
190
205
  ):
191
- raise AnalysisException(
206
+ exception = AnalysisException(
192
207
  "[TABLE_VALUED_FUNCTION_TOO_MANY_TABLE_ARGUMENTS] Multiple table arguments are not enabled. "
193
208
  "Please set `spark.sql.tvf.allowMultipleTableArguments.enabled` to `true`"
194
209
  )
210
+ attach_custom_error_code(exception, ErrorCodes.CONFIG_NOT_ENABLED)
211
+ raise exception
195
212
 
196
213
  base_df = table_containers[0][0].dataframe
197
214
  first_table_col_count = len(base_df.columns)
@@ -270,7 +287,7 @@ def handle_lateral_join_with_udtf(
270
287
  left_result: DataFrameContainer,
271
288
  udtf_relation: relation_proto.Relation,
272
289
  udtf_info: tuple[snowpark.udtf.UserDefinedTableFunction, list],
273
- ) -> snowpark.DataFrame:
290
+ ) -> DataFrameContainer:
274
291
  """
275
292
  Handle lateral join with UDTF on the right side using join_table_function.
276
293
  """
@@ -303,7 +320,7 @@ def handle_lateral_join_with_udtf(
303
320
 
304
321
  def map_aggregate(
305
322
  aggregate: snowflake_proto.Aggregate, plan_id: int
306
- ) -> snowpark.DataFrame:
323
+ ) -> DataFrameContainer:
307
324
  input_container = map_relation(aggregate.input)
308
325
  input_df: snowpark.DataFrame = input_container.dataframe
309
326
 
@@ -339,13 +356,15 @@ def map_aggregate(
339
356
  exp, input_container.column_map, typer
340
357
  )
341
358
  if len(new_names) != 1:
342
- raise SnowparkConnectNotImplementedError(
359
+ exception = SnowparkConnectNotImplementedError(
343
360
  "Multi-column aggregate expressions are not supported"
344
361
  )
362
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
363
+ raise exception
345
364
  return new_names[0], snowpark_column
346
365
 
347
366
  raw_groupings: list[tuple[str, TypedColumn]] = []
348
- raw_aggregations: list[tuple[str, TypedColumn, list[str]]] = []
367
+ raw_aggregations: list[tuple[str, TypedColumn, set[ColumnQualifier]]] = []
349
368
 
350
369
  if not is_group_by_all:
351
370
  raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
@@ -383,11 +402,11 @@ def map_aggregate(
383
402
  col = _map_column(exp)
384
403
  if exp.WhichOneof("expr_type") == "unresolved_attribute":
385
404
  spark_name = col[0]
386
- qualifiers = input_container.column_map.get_qualifier_for_spark_column(
387
- spark_name
388
- )
405
+ qualifiers: set[
406
+ ColumnQualifier
407
+ ] = input_container.column_map.get_qualifiers_for_spark_column(spark_name)
389
408
  else:
390
- qualifiers = []
409
+ qualifiers = set()
391
410
 
392
411
  raw_aggregations.append((col[0], col[1], qualifiers))
393
412
 
@@ -420,7 +439,7 @@ def map_aggregate(
420
439
  spark_columns: list[str] = []
421
440
  snowpark_columns: list[str] = []
422
441
  snowpark_column_types: list[snowpark_types.DataType] = []
423
- all_qualifiers: list[list[str]] = []
442
+ all_qualifiers: list[set[ColumnQualifier]] = []
424
443
 
425
444
  # Use grouping columns directly without aliases
426
445
  groupings = [col.col for _, col in raw_groupings]
@@ -474,9 +493,11 @@ def map_aggregate(
474
493
  snowpark.GroupingSets(*sets_mapped)
475
494
  )
476
495
  case other:
477
- raise SnowparkConnectNotImplementedError(
496
+ exception = SnowparkConnectNotImplementedError(
478
497
  f"Unsupported GROUP BY type: {other}"
479
498
  )
499
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
500
+ raise exception
480
501
 
481
502
  result = result.agg(*aggregations, exclude_grouping_columns=True)
482
503
 
@@ -5,14 +5,20 @@
5
5
  from functools import reduce
6
6
 
7
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
+ from pyspark.errors import AnalysisException
8
9
 
9
10
  import snowflake.snowpark.functions as snowpark_fn
10
11
  from snowflake import snowpark
11
12
  from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
13
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
12
14
  from snowflake.snowpark_connect.config import global_config
13
15
  from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
14
16
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
15
- from snowflake.snowpark_connect.error.error_utils import SparkException
17
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
18
+ from snowflake.snowpark_connect.error.error_utils import (
19
+ SparkException,
20
+ attach_custom_error_code,
21
+ )
16
22
  from snowflake.snowpark_connect.expression.map_expression import (
17
23
  map_single_column_expression,
18
24
  )
@@ -62,7 +68,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
62
68
  match rel.join.join_type:
63
69
  case relation_proto.Join.JOIN_TYPE_UNSPECIFIED:
64
70
  # TODO: Understand what UNSPECIFIED Join type is
65
- raise SnowparkConnectNotImplementedError("Unspecified Join Type")
71
+ exception = SnowparkConnectNotImplementedError("Unspecified Join Type")
72
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
73
+ raise exception
66
74
  case relation_proto.Join.JOIN_TYPE_INNER:
67
75
  join_type = "inner"
68
76
  case relation_proto.Join.JOIN_TYPE_FULL_OUTER:
@@ -78,7 +86,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
78
86
  case relation_proto.Join.JOIN_TYPE_CROSS:
79
87
  join_type = "cross"
80
88
  case other:
81
- raise SnowparkConnectNotImplementedError(f"Other Join Type: {other}")
89
+ exception = SnowparkConnectNotImplementedError(f"Other Join Type: {other}")
90
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
91
+ raise exception
82
92
 
83
93
  # This handles case sensitivity for using_columns
84
94
  case_corrected_right_columns: list[str] = []
@@ -124,9 +134,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
124
134
  is None
125
135
  for c in using_columns
126
136
  ):
127
- import pyspark
128
-
129
- raise pyspark.errors.AnalysisException(
137
+ exception = AnalysisException(
130
138
  USING_COLUMN_NOT_FOUND_ERROR.format(
131
139
  next(
132
140
  c
@@ -140,6 +148,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
140
148
  left_container.column_map.get_spark_columns(),
141
149
  )
142
150
  )
151
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
152
+ raise exception
143
153
  if any(
144
154
  right_container.column_map.get_snowpark_column_name_from_spark_column_name(
145
155
  c, allow_non_exists=True, return_first=True
@@ -147,9 +157,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
147
157
  is None
148
158
  for c in using_columns
149
159
  ):
150
- import pyspark
151
-
152
- raise pyspark.errors.AnalysisException(
160
+ exception = AnalysisException(
153
161
  USING_COLUMN_NOT_FOUND_ERROR.format(
154
162
  next(
155
163
  c
@@ -163,6 +171,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
163
171
  right_container.column_map.get_spark_columns(),
164
172
  )
165
173
  )
174
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
175
+ raise exception
166
176
 
167
177
  # Round trip the using columns through the column map to get the correct names
168
178
  # in order to support case sensitivity.
@@ -227,7 +237,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
227
237
  result = joined_df.drop(*(right for _, right in snowpark_using_columns))
228
238
  else:
229
239
  if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
230
- raise SparkException.implicit_cartesian_product("inner")
240
+ exception = SparkException.implicit_cartesian_product("inner")
241
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
242
+ raise exception
231
243
  result: snowpark.DataFrame = left_input.join(
232
244
  right=right_input,
233
245
  how=join_type,
@@ -256,8 +268,10 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
256
268
  ] # this is to make sure we only remove the column once
257
269
  ]
258
270
 
259
- qualifiers = list(left_container.column_map.get_qualifiers()) + [
260
- right_container.column_map.get_qualifier_for_spark_column(spark_col)
271
+ qualifiers: list[set[ColumnQualifier]] = list(
272
+ left_container.column_map.get_qualifiers()
273
+ ) + [
274
+ {right_container.column_map.get_qualifier_for_spark_column(spark_col)}
261
275
  for i, spark_col in enumerate(
262
276
  right_container.column_map.get_spark_columns()
263
277
  )
@@ -19,6 +19,8 @@ from snowflake.snowpark_connect.column_name_handler import (
19
19
  make_column_names_snowpark_compatible,
20
20
  )
21
21
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
22
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
23
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
22
24
  from snowflake.snowpark_connect.type_mapping import (
23
25
  get_python_sql_utils_class,
24
26
  map_json_schema_to_snowpark,
@@ -327,9 +329,11 @@ def map_local_relation(
327
329
  column_metadata=column_metadata,
328
330
  )
329
331
  else:
330
- raise SnowparkConnectNotImplementedError(
332
+ exception = SnowparkConnectNotImplementedError(
331
333
  "LocalRelation without data & schema is not supported"
332
334
  )
335
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
336
+ raise exception
333
337
 
334
338
 
335
339
  def map_range(