snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,8 @@ import pandas
8
8
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
9
9
 
10
10
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.utils.cache import (
12
14
  df_cache_map_get,
13
15
  df_cache_map_put_if_absent,
@@ -103,7 +105,9 @@ def map_relation(
103
105
  else:
104
106
  # This happens when the relation is empty, usually because the incoming message
105
107
  # type was incorrectly routed here.
106
- raise SnowparkConnectNotImplementedError("No Relation Type")
108
+ exception = SnowparkConnectNotImplementedError("No Relation Type")
109
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
110
+ raise exception
107
111
 
108
112
  result: DataFrameContainer | pandas.DataFrame
109
113
  operation = rel.WhichOneof("rel_type")
@@ -121,11 +125,19 @@ def map_relation(
121
125
  case relation_proto.Aggregate.GroupType.GROUP_TYPE_PIVOT:
122
126
  result = map_aggregate.map_pivot_aggregate(rel)
123
127
  case other:
124
- raise SnowparkConnectNotImplementedError(f"AGGREGATE {other}")
128
+ exception = SnowparkConnectNotImplementedError(
129
+ f"AGGREGATE {other}"
130
+ )
131
+ attach_custom_error_code(
132
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
133
+ )
134
+ raise exception
125
135
  case "approx_quantile":
126
136
  result = map_stats.map_approx_quantile(rel)
127
137
  case "as_of_join":
128
- raise SnowparkConnectNotImplementedError("AS_OF_JOIN")
138
+ exception = SnowparkConnectNotImplementedError("AS_OF_JOIN")
139
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
140
+ raise exception
129
141
  case "catalog": # TODO: order these alphabetically
130
142
  result = map_catalog.map_catalog(rel.catalog)
131
143
  case "collect_metrics":
@@ -179,9 +191,11 @@ def map_relation(
179
191
  (get_session_id(), rel.cached_local_relation.hash)
180
192
  )
181
193
  if cached_df is None:
182
- raise ValueError(
194
+ exception = ValueError(
183
195
  f"Local relation with hash {rel.cached_local_relation.hash} not found in cache."
184
196
  )
197
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
198
+ raise exception
185
199
  return cached_df
186
200
  case "map_partitions":
187
201
  result = map_map_partitions.map_map_partitions(rel)
@@ -235,7 +249,13 @@ def map_relation(
235
249
  case relation_proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT:
236
250
  result = map_row_ops.map_except(rel)
237
251
  case other:
238
- raise SnowparkConnectNotImplementedError(f"SET_OP {other}")
252
+ exception = SnowparkConnectNotImplementedError(
253
+ f"SET_OP {other}"
254
+ )
255
+ attach_custom_error_code(
256
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
257
+ )
258
+ raise exception
239
259
  case "show_string":
240
260
  result = map_show_string.map_show_string(rel)
241
261
  case "sort":
@@ -261,11 +281,17 @@ def map_relation(
261
281
  case "with_columns_renamed":
262
282
  result = map_column_ops.map_with_columns_renamed(rel)
263
283
  case "with_relations":
264
- raise SnowparkConnectNotImplementedError("WITH_RELATIONS")
284
+ exception = SnowparkConnectNotImplementedError("WITH_RELATIONS")
285
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
286
+ raise exception
265
287
  case "group_map":
266
288
  result = map_column_ops.map_group_map(rel)
267
289
  case other:
268
- raise SnowparkConnectNotImplementedError(f"Other Relation {other}")
290
+ exception = SnowparkConnectNotImplementedError(
291
+ f"Other Relation {other}"
292
+ )
293
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
294
+ raise exception
269
295
 
270
296
  # Store container in plan cache
271
297
  if isinstance(result, DataFrameContainer):
@@ -29,6 +29,8 @@ from snowflake.snowpark_connect.column_name_handler import (
29
29
  )
30
30
  from snowflake.snowpark_connect.config import global_config
31
31
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
32
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
33
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
32
34
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
33
35
  from snowflake.snowpark_connect.expression.map_expression import (
34
36
  map_single_column_expression,
@@ -58,9 +60,11 @@ def map_deduplicate(
58
60
  rel.deduplicate.HasField("within_watermark")
59
61
  and rel.deduplicate.within_watermark
60
62
  ):
61
- raise AnalysisException(
63
+ exception = AnalysisException(
62
64
  "dropDuplicatesWithinWatermark is not supported with batch DataFrames/DataSets"
63
65
  )
66
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
67
+ raise exception
64
68
 
65
69
  if (
66
70
  rel.deduplicate.HasField("all_columns_as_keys")
@@ -223,7 +227,9 @@ def map_union(
223
227
  spark_sql_ansi_enabled = global_config.spark_sql_ansi_enabled
224
228
  if left_dtypes != right_dtypes and not rel.set_op.by_name:
225
229
  if len(left_dtypes) != len(right_dtypes):
226
- raise AnalysisException("UNION: the number of columns must match")
230
+ exception = AnalysisException("UNION: the number of columns must match")
231
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
232
+ raise exception
227
233
  target_left_dtypes, target_right_dtypes = [], []
228
234
  for left_type, right_type in zip(left_dtypes, right_dtypes):
229
235
  match (left_type, right_type):
@@ -259,9 +265,11 @@ def map_union(
259
265
  not spark_sql_ansi_enabled
260
266
  or snowpark.types.StringType() not in [left_type, right_type]
261
267
  ): # In ansi mode , string type union boolean type is acceptable
262
- raise AnalysisException(
268
+ exception = AnalysisException(
263
269
  f"""[INCOMPATIBLE_COLUMN_TYPE] UNION can only be performed on tables with compatible column types. "{str(left_type)}" type which is not compatible with "{str(right_type)}". """
264
270
  )
271
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
272
+ raise exception
265
273
  target_left_dtypes.append(left_type)
266
274
  target_right_dtypes.append(right_type)
267
275
  case _:
@@ -787,7 +795,9 @@ def map_sample(
787
795
 
788
796
  frac = rel.sample.upper_bound - rel.sample.lower_bound
789
797
  if frac < 0 or frac > 1:
790
- raise IllegalArgumentException("Sample fraction must be between 0 and 1")
798
+ exception = IllegalArgumentException("Sample fraction must be between 0 and 1")
799
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
800
+ raise exception
791
801
  # The seed argument is not supported here. There are a number of reasons that implementing
792
802
  # this will be complicated in Snowflake. Here is a list of complications:
793
803
  #
@@ -802,9 +812,11 @@ def map_sample(
802
812
  # these issues.
803
813
  if rel.sample.with_replacement:
804
814
  # TODO: Use a random number generator with ROW_NUMBER and SELECT.
805
- raise SnowparkConnectNotImplementedError(
815
+ exception = SnowparkConnectNotImplementedError(
806
816
  "Sample with replacement is not supported"
807
817
  )
818
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
819
+ raise exception
808
820
  else:
809
821
  result: snowpark.DataFrame = input_df.sample(frac=frac)
810
822
  return DataFrameContainer(
@@ -912,9 +924,13 @@ def _union_by_name_optimized(
912
924
  set_schema_getter(result, lambda: StructType(result_fields))
913
925
  return result
914
926
  else:
915
- raise SnowparkClientExceptionMessages.DF_CANNOT_RESOLVE_COLUMN_NAME_AMONG(
916
- missing_left, missing_right
927
+ exception = (
928
+ SnowparkClientExceptionMessages.DF_CANNOT_RESOLVE_COLUMN_NAME_AMONG(
929
+ missing_left, missing_right
930
+ )
917
931
  )
932
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
933
+ raise exception
918
934
 
919
935
  result = left_df.unionAllByName(
920
936
  right_df, allow_missing_columns=allow_missing_columns
@@ -46,6 +46,8 @@ from snowflake.snowpark_connect.config import (
46
46
  unset_config_param,
47
47
  )
48
48
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
49
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
50
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
49
51
  from snowflake.snowpark_connect.expression.map_expression import (
50
52
  ColumnNameMap,
51
53
  map_single_column_expression,
@@ -257,7 +259,8 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
257
259
 
258
260
 
259
261
  def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
260
- # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
262
+ # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase"
263
+ # if present, or to "spark.sql.caseSensitive".
261
264
  # and struct fields will be left as is. This should allow users to use the same names
262
265
  # in spark and Snowflake in most cases.
263
266
  if is_column:
@@ -377,14 +380,18 @@ def _get_assignments_from_action(
377
380
  or action.getClass().getSimpleName() == "UpdateStarAction"
378
381
  ):
379
382
  if len(column_mapping_source.columns) != len(column_mapping_target.columns):
380
- raise ValueError(
383
+ exception = ValueError(
381
384
  "source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
382
385
  )
386
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
387
+ raise exception
383
388
  for i, col in enumerate(column_mapping_target.columns):
384
389
  if assignments.get(col.snowpark_name) is not None:
385
- raise SnowparkConnectNotImplementedError(
390
+ exception = SnowparkConnectNotImplementedError(
386
391
  "UpdateStarAction or InsertStarAction is not supported with duplicate columns."
387
392
  )
393
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
394
+ raise exception
388
395
  assignments[col.snowpark_name] = snowpark_fn.col(
389
396
  column_mapping_source.columns[i].snowpark_name
390
397
  )
@@ -489,9 +496,11 @@ def map_sql_to_pandas_df(
489
496
  snowflake_sql = f"ALTER TABLE {table_name} ALTER COLUMN {column_name} {alter_clause}"
490
497
  session.sql(snowflake_sql).collect()
491
498
  else:
492
- raise ValueError(
499
+ exception = ValueError(
493
500
  f"No alter operations found in AlterColumn logical plan for table {table_name}, column {column_name}"
494
501
  )
502
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
503
+ raise exception
495
504
  case "CreateNamespace":
496
505
  name = get_relation_identifier_name(logical_plan.name(), True)
497
506
  previous_name = session.connection.schema
@@ -603,9 +612,11 @@ def map_sql_to_pandas_df(
603
612
  )
604
613
  temp_view = get_temp_view(snowflake_view_name)
605
614
  if temp_view is not None and not logical_plan.replace():
606
- raise AnalysisException(
615
+ exception = AnalysisException(
607
616
  f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{spark_view_name}` because it already exists."
608
617
  )
618
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
619
+ raise exception
609
620
  else:
610
621
  unregister_temp_view(
611
622
  spark_to_sf_single_id_with_unquoting(spark_view_name)
@@ -625,11 +636,13 @@ def map_sql_to_pandas_df(
625
636
  df_container = execute_logical_plan(logical_plan.query())
626
637
  df = df_container.dataframe
627
638
  if _accessing_temp_object.get():
628
- raise AnalysisException(
639
+ exception = AnalysisException(
629
640
  f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
630
641
  "of the type VIEW because it references to a temporary object of the type VIEW. Please "
631
642
  f"make the temporary object persistent, or make the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` temporary."
632
643
  )
644
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
645
+ raise exception
633
646
 
634
647
  name = get_relation_identifier_name(logical_plan.child())
635
648
  comment = logical_plan.comment()
@@ -661,6 +674,7 @@ def map_sql_to_pandas_df(
661
674
  snowpark_column_names=df_container.column_map.get_snowpark_columns(),
662
675
  parent_column_name_map=df_container.column_map,
663
676
  )
677
+
664
678
  is_global = isinstance(
665
679
  logical_plan.viewType(),
666
680
  jpype.JClass(
@@ -757,9 +771,11 @@ def map_sql_to_pandas_df(
757
771
  del session._udtfs[func_name]
758
772
  else:
759
773
  if not logical_plan.ifExists():
760
- raise ValueError(
774
+ exception = ValueError(
761
775
  f"Function {func_name} not found among registered UDFs or UDTFs."
762
776
  )
777
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
778
+ raise exception
763
779
  if snowpark_name != "":
764
780
  argument_string = f"({', '.join(convert_sp_to_sf_type(arg) for arg in input_types)})"
765
781
  session.sql(
@@ -832,17 +848,25 @@ def map_sql_to_pandas_df(
832
848
  rows = session.sql(final_sql).collect()
833
849
  else:
834
850
  # TODO: Support other logical plans
835
- raise SnowparkConnectNotImplementedError(
851
+ exception = SnowparkConnectNotImplementedError(
836
852
  f"{logical_plan_name} is not supported yet with EXPLAIN."
837
853
  )
854
+ attach_custom_error_code(
855
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
856
+ )
857
+ raise exception
838
858
  case "InsertIntoStatement":
839
859
  df_container = execute_logical_plan(logical_plan.query())
840
860
  df = df_container.dataframe
841
861
  queries = df.queries["queries"]
842
862
  if len(queries) != 1:
843
- raise SnowparkConnectNotImplementedError(
863
+ exception = SnowparkConnectNotImplementedError(
844
864
  f"Unexpected number of queries: {len(queries)}"
845
865
  )
866
+ attach_custom_error_code(
867
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
868
+ )
869
+ raise exception
846
870
 
847
871
  name = get_relation_identifier_name(logical_plan.table(), True)
848
872
 
@@ -996,9 +1020,13 @@ def map_sql_to_pandas_df(
996
1020
  clauses.append(when_not_matched(condition).insert(assignments))
997
1021
 
998
1022
  if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
999
- raise SnowparkConnectNotImplementedError(
1023
+ exception = SnowparkConnectNotImplementedError(
1000
1024
  "Snowflake does not support 'not matched by source' actions in MERGE statements."
1001
1025
  )
1026
+ attach_custom_error_code(
1027
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1028
+ )
1029
+ raise exception
1002
1030
 
1003
1031
  target_table.merge(source_df, merge_condition_typed_col.col, clauses)
1004
1032
  case "DeleteFromTable":
@@ -1036,10 +1064,12 @@ def map_sql_to_pandas_df(
1036
1064
  case "UpdateTable":
1037
1065
  # Databricks/Delta-specific extension not supported by SAS.
1038
1066
  # Provide an actionable, clear error.
1039
- raise UnsupportedOperationException(
1067
+ exception = UnsupportedOperationException(
1040
1068
  "[UNSUPPORTED_SQL_EXTENSION] The UPDATE TABLE command failed.\n"
1041
1069
  + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
1042
1070
  )
1071
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1072
+ raise exception
1043
1073
  case "RenameColumn":
1044
1074
  full_table_identifier = get_relation_identifier_name(
1045
1075
  logical_plan.table(), True
@@ -1049,11 +1079,15 @@ def map_sql_to_pandas_df(
1049
1079
  if not check_table_supports_operation(
1050
1080
  full_table_identifier, "rename_column"
1051
1081
  ):
1052
- raise AnalysisException(
1082
+ exception = AnalysisException(
1053
1083
  f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
1054
1084
  f"This table was created as a v1 table with a data source that doesn't support column renaming. "
1055
1085
  f"To enable this operation, set 'snowpark.connect.enable_snowflake_extension_behavior' to 'true'."
1056
1086
  )
1087
+ attach_custom_error_code(
1088
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1089
+ )
1090
+ raise exception
1057
1091
 
1058
1092
  column_obj = logical_plan.column()
1059
1093
  old_column_name = ".".join(
@@ -1094,6 +1128,7 @@ def map_sql_to_pandas_df(
1094
1128
  f"ALTER ICEBERG TABLE {name} RENAME TO {new_name}"
1095
1129
  ).collect()
1096
1130
  else:
1131
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
1097
1132
  raise e
1098
1133
  case "ReplaceTableAsSelect":
1099
1134
  _create_table_as_select(logical_plan, mode="overwrite")
@@ -1113,9 +1148,11 @@ def map_sql_to_pandas_df(
1113
1148
  name = _spark_to_snowflake(logical_plan.namespace())
1114
1149
  session.sql(f"USE SCHEMA {name}").collect()
1115
1150
  case "SetNamespaceLocation" | "SetNamespaceProperties":
1116
- raise SnowparkConnectNotImplementedError(
1151
+ exception = SnowparkConnectNotImplementedError(
1117
1152
  "Altering databases is not currently supported."
1118
1153
  )
1154
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1155
+ raise exception
1119
1156
  case "ShowCreateTable":
1120
1157
  # Handle SHOW CREATE TABLE command
1121
1158
  # Spark: SHOW CREATE TABLE table_name
@@ -1137,16 +1174,24 @@ def map_sql_to_pandas_df(
1137
1174
  case "ShowNamespaces":
1138
1175
  name = get_relation_identifier_name(logical_plan.namespace(), True)
1139
1176
  if name:
1140
- raise SnowparkConnectNotImplementedError(
1177
+ exception = SnowparkConnectNotImplementedError(
1141
1178
  "'IN' clause is not supported while listing databases"
1142
1179
  )
1180
+ attach_custom_error_code(
1181
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1182
+ )
1183
+ raise exception
1143
1184
  if logical_plan.pattern().isDefined():
1144
1185
  # Snowflake SQL requires a "%" pattern.
1145
1186
  # Snowpark catalog requires a regex and does client-side filtering.
1146
1187
  # Spark, however, uses a regex-like pattern that treats '*' and '|' differently.
1147
- raise SnowparkConnectNotImplementedError(
1188
+ exception = SnowparkConnectNotImplementedError(
1148
1189
  "'LIKE' clause is not supported while listing databases"
1149
1190
  )
1191
+ attach_custom_error_code(
1192
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1193
+ )
1194
+ raise exception
1150
1195
  rows = session.sql("SHOW SCHEMAS").collect()
1151
1196
  if not rows:
1152
1197
  rows = None
@@ -1247,9 +1292,13 @@ def map_sql_to_pandas_df(
1247
1292
  spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
1248
1293
  != db_name.casefold()
1249
1294
  ):
1250
- raise AnalysisException(
1295
+ exception = AnalysisException(
1251
1296
  f"database name is not matching:{db_name} and {db_and_table_name[0]}"
1252
1297
  )
1298
+ attach_custom_error_code(
1299
+ exception, ErrorCodes.INVALID_OPERATION
1300
+ )
1301
+ raise exception
1253
1302
 
1254
1303
  # Just table name
1255
1304
  snowflake_cmd = f"SHOW COLUMNS IN TABLE {table_name}"
@@ -1294,6 +1343,33 @@ def map_sql_to_pandas_df(
1294
1343
  )
1295
1344
  SNOWFLAKE_CATALOG.refreshTable(table_name_unquoted)
1296
1345
 
1346
+ return pandas.DataFrame({"": [""]}), ""
1347
+ case "RepairTable":
1348
+ # No-Op. Snowflake doesn't have explicit partitions to repair.
1349
+ table_relation = logical_plan.child()
1350
+ db_and_table_name = as_java_list(table_relation.multipartIdentifier())
1351
+ multi_part_len = len(db_and_table_name)
1352
+
1353
+ if multi_part_len == 1:
1354
+ table_name = db_and_table_name[0]
1355
+ db_name = None
1356
+ full_table_name = table_name
1357
+ else:
1358
+ db_name = db_and_table_name[0]
1359
+ table_name = db_and_table_name[1]
1360
+ full_table_name = db_name + "." + table_name
1361
+
1362
+ df = SNOWFLAKE_CATALOG.tableExists(table_name, db_name)
1363
+
1364
+ table_exist = df.iloc[0, 0]
1365
+
1366
+ if not table_exist:
1367
+ exception = AnalysisException(
1368
+ f"[TABLE_OR_VIEW_NOT_FOUND] Table not found `{full_table_name}`."
1369
+ )
1370
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
1371
+ raise exception
1372
+
1297
1373
  return pandas.DataFrame({"": [""]}), ""
1298
1374
  case _:
1299
1375
  execute_logical_plan(logical_plan)
@@ -1434,7 +1510,12 @@ def map_sql(
1434
1510
  snowpark_connect_sql_passthrough, sql_stmt = is_valid_passthrough_sql(rel.sql.query)
1435
1511
 
1436
1512
  if not snowpark_connect_sql_passthrough:
1437
- logical_plan = sql_parser().parseQuery(sql_stmt)
1513
+ # Changed from parseQuery to parsePlan as Spark parseQuery() call generating wrong logical plan for
1514
+ # query like this: SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
1515
+ # As such other place in this file we use parsePlan.
1516
+ # Main difference between parsePlan() and parseQuery() is, parsePlan() can be called for any SQL statement, while
1517
+ # parseQuery() can only be called for query statements.
1518
+ logical_plan = sql_parser().parsePlan(sql_stmt)
1438
1519
 
1439
1520
  parsed_pos_args = parse_pos_args(logical_plan, rel.sql.pos_args)
1440
1521
  set_sql_args(rel.sql.args, parsed_pos_args)
@@ -1471,7 +1552,7 @@ def map_logical_plan_relation(
1471
1552
 
1472
1553
  # Extract aliases from the aggregate expressions (SELECT clause)
1473
1554
  alias_map = {}
1474
- for agg_expr in as_java_list(rel.aggregateExpressions()):
1555
+ for agg_expr in list(as_java_list(rel.aggregateExpressions())):
1475
1556
  if str(agg_expr.getClass().getSimpleName()) == "Alias":
1476
1557
  alias_map[str(agg_expr.name())] = agg_expr.child()
1477
1558
 
@@ -1534,9 +1615,13 @@ def map_logical_plan_relation(
1534
1615
  group_type = snowflake_proto.Aggregate.GROUP_TYPE_CUBE
1535
1616
  case "GroupingSets":
1536
1617
  if not exp.userGivenGroupByExprs().isEmpty():
1537
- raise SnowparkConnectNotImplementedError(
1618
+ exception = SnowparkConnectNotImplementedError(
1538
1619
  "User-defined group by expressions are not supported"
1539
1620
  )
1621
+ attach_custom_error_code(
1622
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1623
+ )
1624
+ raise exception
1540
1625
  group_type = (
1541
1626
  snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS
1542
1627
  )
@@ -1552,9 +1637,13 @@ def map_logical_plan_relation(
1552
1637
 
1553
1638
  if group_type != snowflake_proto.Aggregate.GROUP_TYPE_GROUPBY:
1554
1639
  if len(group_expression_list) != 1:
1555
- raise SnowparkConnectNotImplementedError(
1640
+ exception = SnowparkConnectNotImplementedError(
1556
1641
  "Multiple grouping expressions are not supported"
1557
1642
  )
1643
+ attach_custom_error_code(
1644
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1645
+ )
1646
+ raise exception
1558
1647
  if group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
1559
1648
  group_expression_list = [] # TODO: exp.userGivenGroupByExprs()?
1560
1649
  else:
@@ -1786,12 +1875,14 @@ def map_logical_plan_relation(
1786
1875
 
1787
1876
  # Check for multi-column UNPIVOT which Snowflake doesn't support
1788
1877
  if len(value_column_names) > 1:
1789
- raise UnsupportedOperationException(
1878
+ exception = UnsupportedOperationException(
1790
1879
  f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
1791
1880
  f"multiple value columns ({', '.join(value_column_names)}) in a single operation. "
1792
1881
  f"Workaround: Use separate UNPIVOT operations for each value column and join the results, "
1793
1882
  f"or restructure your query to unpivot columns individually."
1794
1883
  )
1884
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1885
+ raise exception
1795
1886
 
1796
1887
  values = []
1797
1888
  values_groups = as_java_list(rel.values().get())
@@ -1799,11 +1890,13 @@ def map_logical_plan_relation(
1799
1890
  # Check if we have multi-column groups in the IN clause
1800
1891
  if values_groups and len(as_java_list(values_groups[0])) > 1:
1801
1892
  group_sizes = [len(as_java_list(group)) for group in values_groups]
1802
- raise UnsupportedOperationException(
1893
+ exception = UnsupportedOperationException(
1803
1894
  f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
1804
1895
  f"multiple columns together in groups. Found groups with {max(group_sizes)} columns. "
1805
1896
  f"Workaround: Unpivot each column separately and then join/union the results as needed."
1806
1897
  )
1898
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1899
+ raise exception
1807
1900
 
1808
1901
  for e1 in values_groups:
1809
1902
  for e in as_java_list(e1):
@@ -1849,9 +1942,11 @@ def map_logical_plan_relation(
1849
1942
  # Store the having condition in context and process the child aggregate
1850
1943
  child_relation = rel.child()
1851
1944
  if str(child_relation.getClass().getSimpleName()) != "Aggregate":
1852
- raise SnowparkConnectNotImplementedError(
1945
+ exception = SnowparkConnectNotImplementedError(
1853
1946
  "UnresolvedHaving can only be applied to Aggregate relations"
1854
1947
  )
1948
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1949
+ raise exception
1855
1950
 
1856
1951
  # Store having condition in a context variable for the Aggregate case to pick up
1857
1952
  having_condition = map_logical_plan_expression(rel.havingCondition())
@@ -2176,7 +2271,7 @@ def map_logical_plan_relation(
2176
2271
  function_name = rel.generator().name().toString()
2177
2272
  func_arguments = [
2178
2273
  map_logical_plan_expression(e)
2179
- for e in as_java_list(rel.generator().children())
2274
+ for e in list(as_java_list(rel.generator().children()))
2180
2275
  ]
2181
2276
  unresolved_fun_proto = expressions_proto.Expression.UnresolvedFunction(
2182
2277
  function_name=function_name, arguments=func_arguments
@@ -2242,7 +2337,11 @@ def map_logical_plan_relation(
2242
2337
  )
2243
2338
  proto = generator_dataframe_proto
2244
2339
  case other:
2245
- raise SnowparkConnectNotImplementedError(f"Unimplemented relation: {other}")
2340
+ exception = SnowparkConnectNotImplementedError(
2341
+ f"Unimplemented relation: {other}"
2342
+ )
2343
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
2344
+ raise exception
2246
2345
 
2247
2346
  proto.common.plan_id = plan_id
2248
2347
 
@@ -15,6 +15,8 @@ from snowflake import snowpark
15
15
  from snowflake.snowpark.exceptions import SnowparkSQLException
16
16
  from snowflake.snowpark_connect.config import get_boolean_session_config_param
17
17
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
18
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
19
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
18
20
  from snowflake.snowpark_connect.relation.map_relation import map_relation
19
21
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
20
22
 
@@ -99,9 +101,11 @@ def map_approx_quantile(
99
101
  else ""
100
102
  )
101
103
 
102
- raise AnalysisException(
104
+ exception = AnalysisException(
103
105
  f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{col_name}` cannot be resolved.{suggestion_text}"
104
106
  )
107
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
108
+ raise exception
105
109
 
106
110
  cols = input_container.column_map.get_snowpark_column_names_from_spark_column_names(
107
111
  list(rel.approx_quantile.cols)
@@ -4,6 +4,7 @@
4
4
 
5
5
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
6
6
 
7
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
7
8
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
8
9
  from snowflake.snowpark_connect.relation.map_relation import map_relation
9
10
 
@@ -18,7 +19,9 @@ def map_alias(
18
19
  # we set reuse_parsed_plan=False because we need new expr_id for the attributes (output columns) in aliased snowpark dataframe
19
20
  # reuse_parsed_plan will lead to ambiguous column name for operations like joining two dataframes that are aliased from the same dataframe
20
21
  input_container = map_relation(rel.subquery_alias.input, reuse_parsed_plan=False)
21
- qualifiers = [[alias]] * len(input_container.column_map.columns)
22
+ qualifiers = [
23
+ {ColumnQualifier((alias,))} for _ in input_container.column_map.columns
24
+ ]
22
25
 
23
26
  return DataFrameContainer.create_with_column_mapping(
24
27
  dataframe=input_container.dataframe,
@@ -22,6 +22,8 @@ from snowflake.snowpark_connect.config import (
22
22
  global_config,
23
23
  )
24
24
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
25
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
26
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
25
27
  from snowflake.snowpark_connect.expression.map_expression import (
26
28
  map_single_column_expression,
27
29
  )
@@ -163,17 +165,21 @@ def process_return_type(
163
165
  else:
164
166
  parsed_return = return_type
165
167
  except ValueError as e:
166
- raise PythonException(
168
+ exception = PythonException(
167
169
  f"[UDTF_ARROW_TYPE_CAST_ERROR] Error parsing UDTF return type DDL: {e}"
168
170
  )
171
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
172
+ raise exception
169
173
  original_output_schema = proto_to_snowpark_type(parsed_return)
170
174
  output_schema = proto_to_snowpark_type(parsed_return)
171
175
  # Snowflake UDTF does not support MapType, so we convert it to VariantType.
172
176
  output_schema = convert_maptype_to_variant(output_schema)
173
177
  if not isinstance(output_schema, StructType):
174
- raise PySparkTypeError(
178
+ exception = PySparkTypeError(
175
179
  f"Invalid Python user-defined table function return type. Expect a struct type, but got {parsed_return}"
176
180
  )
181
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
182
+ raise exception
177
183
 
178
184
  expected_types = None
179
185
  if is_arrow_enabled_in_udtf() or is_spark_compatible_udtf_mode_enabled():
@@ -276,12 +282,16 @@ def map_common_inline_user_defined_table_function(
276
282
  if require_creating_udtf_in_sproc(udtf_proto):
277
283
  snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
278
284
  if isinstance(snowpark_udtf_or_error, str):
279
- raise PythonException(snowpark_udtf_or_error)
285
+ exception = PythonException(snowpark_udtf_or_error)
286
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
287
+ raise exception
280
288
  snowpark_udtf = snowpark_udtf_or_error
281
289
  else:
282
290
  udtf_or_error = create_udtf(**kwargs)
283
291
  if isinstance(udtf_or_error, str):
284
- raise PythonException(udtf_or_error)
292
+ exception = PythonException(udtf_or_error)
293
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
294
+ raise exception
285
295
  udtf = udtf_or_error
286
296
  snowpark_udtf = SnowparkUDTF(
287
297
  name=udtf.name,