snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (78) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +150 -25
  2. snowflake/snowpark_connect/config.py +51 -16
  3. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  4. snowflake/snowpark_connect/error/error_codes.py +50 -0
  5. snowflake/snowpark_connect/error/error_utils.py +142 -22
  6. snowflake/snowpark_connect/error/exceptions.py +13 -4
  7. snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
  8. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  9. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  10. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  11. snowflake/snowpark_connect/expression/literal.py +7 -1
  12. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  13. snowflake/snowpark_connect/expression/map_expression.py +48 -4
  14. snowflake/snowpark_connect/expression/map_extension.py +25 -5
  15. snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
  16. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  17. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
  18. snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
  19. snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
  20. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  21. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  22. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  23. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
  24. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  25. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  26. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  27. snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
  28. snowflake/snowpark_connect/relation/map_extension.py +28 -8
  29. snowflake/snowpark_connect/relation/map_join.py +21 -10
  30. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  31. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  32. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  33. snowflake/snowpark_connect/relation/map_sql.py +91 -24
  34. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  35. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  36. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  37. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  38. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  39. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  40. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  41. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  42. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  43. snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
  44. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  45. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  46. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  48. snowflake/snowpark_connect/relation/write/map_write.py +131 -34
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  50. snowflake/snowpark_connect/resources_initializer.py +5 -1
  51. snowflake/snowpark_connect/server.py +72 -19
  52. snowflake/snowpark_connect/type_mapping.py +54 -17
  53. snowflake/snowpark_connect/utils/context.py +42 -1
  54. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  55. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  56. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  57. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  58. snowflake/snowpark_connect/utils/profiling.py +25 -8
  59. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  60. snowflake/snowpark_connect/utils/session.py +5 -1
  61. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  62. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  63. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  64. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  65. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  66. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  67. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  68. snowflake/snowpark_connect/version.py +1 -1
  69. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
  70. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +78 -77
  71. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
  72. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
  73. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
  74. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
  75. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
  78. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
@@ -46,6 +46,8 @@ from snowflake.snowpark_connect.config import (
46
46
  unset_config_param,
47
47
  )
48
48
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
49
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
50
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
49
51
  from snowflake.snowpark_connect.expression.map_expression import (
50
52
  ColumnNameMap,
51
53
  map_single_column_expression,
@@ -257,7 +259,8 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
257
259
 
258
260
 
259
261
  def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
260
- # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
262
+ # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase"
263
+ # if present, or to "spark.sql.caseSensitive".
261
264
  # and struct fields will be left as is. This should allow users to use the same names
262
265
  # in spark and Snowflake in most cases.
263
266
  if is_column:
@@ -377,14 +380,18 @@ def _get_assignments_from_action(
377
380
  or action.getClass().getSimpleName() == "UpdateStarAction"
378
381
  ):
379
382
  if len(column_mapping_source.columns) != len(column_mapping_target.columns):
380
- raise ValueError(
383
+ exception = ValueError(
381
384
  "source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
382
385
  )
386
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
387
+ raise exception
383
388
  for i, col in enumerate(column_mapping_target.columns):
384
389
  if assignments.get(col.snowpark_name) is not None:
385
- raise SnowparkConnectNotImplementedError(
390
+ exception = SnowparkConnectNotImplementedError(
386
391
  "UpdateStarAction or InsertStarAction is not supported with duplicate columns."
387
392
  )
393
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
394
+ raise exception
388
395
  assignments[col.snowpark_name] = snowpark_fn.col(
389
396
  column_mapping_source.columns[i].snowpark_name
390
397
  )
@@ -489,9 +496,11 @@ def map_sql_to_pandas_df(
489
496
  snowflake_sql = f"ALTER TABLE {table_name} ALTER COLUMN {column_name} {alter_clause}"
490
497
  session.sql(snowflake_sql).collect()
491
498
  else:
492
- raise ValueError(
499
+ exception = ValueError(
493
500
  f"No alter operations found in AlterColumn logical plan for table {table_name}, column {column_name}"
494
501
  )
502
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
503
+ raise exception
495
504
  case "CreateNamespace":
496
505
  name = get_relation_identifier_name(logical_plan.name(), True)
497
506
  previous_name = session.connection.schema
@@ -603,9 +612,11 @@ def map_sql_to_pandas_df(
603
612
  )
604
613
  temp_view = get_temp_view(snowflake_view_name)
605
614
  if temp_view is not None and not logical_plan.replace():
606
- raise AnalysisException(
615
+ exception = AnalysisException(
607
616
  f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{spark_view_name}` because it already exists."
608
617
  )
618
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
619
+ raise exception
609
620
  else:
610
621
  unregister_temp_view(
611
622
  spark_to_sf_single_id_with_unquoting(spark_view_name)
@@ -625,11 +636,13 @@ def map_sql_to_pandas_df(
625
636
  df_container = execute_logical_plan(logical_plan.query())
626
637
  df = df_container.dataframe
627
638
  if _accessing_temp_object.get():
628
- raise AnalysisException(
639
+ exception = AnalysisException(
629
640
  f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
630
641
  "of the type VIEW because it references to a temporary object of the type VIEW. Please "
631
642
  f"make the temporary object persistent, or make the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` temporary."
632
643
  )
644
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
645
+ raise exception
633
646
 
634
647
  name = get_relation_identifier_name(logical_plan.child())
635
648
  comment = logical_plan.comment()
@@ -661,6 +674,7 @@ def map_sql_to_pandas_df(
661
674
  snowpark_column_names=df_container.column_map.get_snowpark_columns(),
662
675
  parent_column_name_map=df_container.column_map,
663
676
  )
677
+
664
678
  is_global = isinstance(
665
679
  logical_plan.viewType(),
666
680
  jpype.JClass(
@@ -757,9 +771,11 @@ def map_sql_to_pandas_df(
757
771
  del session._udtfs[func_name]
758
772
  else:
759
773
  if not logical_plan.ifExists():
760
- raise ValueError(
774
+ exception = ValueError(
761
775
  f"Function {func_name} not found among registered UDFs or UDTFs."
762
776
  )
777
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
778
+ raise exception
763
779
  if snowpark_name != "":
764
780
  argument_string = f"({', '.join(convert_sp_to_sf_type(arg) for arg in input_types)})"
765
781
  session.sql(
@@ -832,17 +848,25 @@ def map_sql_to_pandas_df(
832
848
  rows = session.sql(final_sql).collect()
833
849
  else:
834
850
  # TODO: Support other logical plans
835
- raise SnowparkConnectNotImplementedError(
851
+ exception = SnowparkConnectNotImplementedError(
836
852
  f"{logical_plan_name} is not supported yet with EXPLAIN."
837
853
  )
854
+ attach_custom_error_code(
855
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
856
+ )
857
+ raise exception
838
858
  case "InsertIntoStatement":
839
859
  df_container = execute_logical_plan(logical_plan.query())
840
860
  df = df_container.dataframe
841
861
  queries = df.queries["queries"]
842
862
  if len(queries) != 1:
843
- raise SnowparkConnectNotImplementedError(
863
+ exception = SnowparkConnectNotImplementedError(
844
864
  f"Unexpected number of queries: {len(queries)}"
845
865
  )
866
+ attach_custom_error_code(
867
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
868
+ )
869
+ raise exception
846
870
 
847
871
  name = get_relation_identifier_name(logical_plan.table(), True)
848
872
 
@@ -996,9 +1020,13 @@ def map_sql_to_pandas_df(
996
1020
  clauses.append(when_not_matched(condition).insert(assignments))
997
1021
 
998
1022
  if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
999
- raise SnowparkConnectNotImplementedError(
1023
+ exception = SnowparkConnectNotImplementedError(
1000
1024
  "Snowflake does not support 'not matched by source' actions in MERGE statements."
1001
1025
  )
1026
+ attach_custom_error_code(
1027
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1028
+ )
1029
+ raise exception
1002
1030
 
1003
1031
  target_table.merge(source_df, merge_condition_typed_col.col, clauses)
1004
1032
  case "DeleteFromTable":
@@ -1036,10 +1064,12 @@ def map_sql_to_pandas_df(
1036
1064
  case "UpdateTable":
1037
1065
  # Databricks/Delta-specific extension not supported by SAS.
1038
1066
  # Provide an actionable, clear error.
1039
- raise UnsupportedOperationException(
1067
+ exception = UnsupportedOperationException(
1040
1068
  "[UNSUPPORTED_SQL_EXTENSION] The UPDATE TABLE command failed.\n"
1041
1069
  + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
1042
1070
  )
1071
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1072
+ raise exception
1043
1073
  case "RenameColumn":
1044
1074
  full_table_identifier = get_relation_identifier_name(
1045
1075
  logical_plan.table(), True
@@ -1049,11 +1079,15 @@ def map_sql_to_pandas_df(
1049
1079
  if not check_table_supports_operation(
1050
1080
  full_table_identifier, "rename_column"
1051
1081
  ):
1052
- raise AnalysisException(
1082
+ exception = AnalysisException(
1053
1083
  f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
1054
1084
  f"This table was created as a v1 table with a data source that doesn't support column renaming. "
1055
1085
  f"To enable this operation, set 'snowpark.connect.enable_snowflake_extension_behavior' to 'true'."
1056
1086
  )
1087
+ attach_custom_error_code(
1088
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1089
+ )
1090
+ raise exception
1057
1091
 
1058
1092
  column_obj = logical_plan.column()
1059
1093
  old_column_name = ".".join(
@@ -1094,6 +1128,7 @@ def map_sql_to_pandas_df(
1094
1128
  f"ALTER ICEBERG TABLE {name} RENAME TO {new_name}"
1095
1129
  ).collect()
1096
1130
  else:
1131
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
1097
1132
  raise e
1098
1133
  case "ReplaceTableAsSelect":
1099
1134
  _create_table_as_select(logical_plan, mode="overwrite")
@@ -1113,9 +1148,11 @@ def map_sql_to_pandas_df(
1113
1148
  name = _spark_to_snowflake(logical_plan.namespace())
1114
1149
  session.sql(f"USE SCHEMA {name}").collect()
1115
1150
  case "SetNamespaceLocation" | "SetNamespaceProperties":
1116
- raise SnowparkConnectNotImplementedError(
1151
+ exception = SnowparkConnectNotImplementedError(
1117
1152
  "Altering databases is not currently supported."
1118
1153
  )
1154
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1155
+ raise exception
1119
1156
  case "ShowCreateTable":
1120
1157
  # Handle SHOW CREATE TABLE command
1121
1158
  # Spark: SHOW CREATE TABLE table_name
@@ -1137,16 +1174,24 @@ def map_sql_to_pandas_df(
1137
1174
  case "ShowNamespaces":
1138
1175
  name = get_relation_identifier_name(logical_plan.namespace(), True)
1139
1176
  if name:
1140
- raise SnowparkConnectNotImplementedError(
1177
+ exception = SnowparkConnectNotImplementedError(
1141
1178
  "'IN' clause is not supported while listing databases"
1142
1179
  )
1180
+ attach_custom_error_code(
1181
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1182
+ )
1183
+ raise exception
1143
1184
  if logical_plan.pattern().isDefined():
1144
1185
  # Snowflake SQL requires a "%" pattern.
1145
1186
  # Snowpark catalog requires a regex and does client-side filtering.
1146
1187
  # Spark, however, uses a regex-like pattern that treats '*' and '|' differently.
1147
- raise SnowparkConnectNotImplementedError(
1188
+ exception = SnowparkConnectNotImplementedError(
1148
1189
  "'LIKE' clause is not supported while listing databases"
1149
1190
  )
1191
+ attach_custom_error_code(
1192
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1193
+ )
1194
+ raise exception
1150
1195
  rows = session.sql("SHOW SCHEMAS").collect()
1151
1196
  if not rows:
1152
1197
  rows = None
@@ -1247,9 +1292,13 @@ def map_sql_to_pandas_df(
1247
1292
  spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
1248
1293
  != db_name.casefold()
1249
1294
  ):
1250
- raise AnalysisException(
1295
+ exception = AnalysisException(
1251
1296
  f"database name is not matching:{db_name} and {db_and_table_name[0]}"
1252
1297
  )
1298
+ attach_custom_error_code(
1299
+ exception, ErrorCodes.INVALID_OPERATION
1300
+ )
1301
+ raise exception
1253
1302
 
1254
1303
  # Just table name
1255
1304
  snowflake_cmd = f"SHOW COLUMNS IN TABLE {table_name}"
@@ -1471,7 +1520,7 @@ def map_logical_plan_relation(
1471
1520
 
1472
1521
  # Extract aliases from the aggregate expressions (SELECT clause)
1473
1522
  alias_map = {}
1474
- for agg_expr in as_java_list(rel.aggregateExpressions()):
1523
+ for agg_expr in list(as_java_list(rel.aggregateExpressions())):
1475
1524
  if str(agg_expr.getClass().getSimpleName()) == "Alias":
1476
1525
  alias_map[str(agg_expr.name())] = agg_expr.child()
1477
1526
 
@@ -1534,9 +1583,13 @@ def map_logical_plan_relation(
1534
1583
  group_type = snowflake_proto.Aggregate.GROUP_TYPE_CUBE
1535
1584
  case "GroupingSets":
1536
1585
  if not exp.userGivenGroupByExprs().isEmpty():
1537
- raise SnowparkConnectNotImplementedError(
1586
+ exception = SnowparkConnectNotImplementedError(
1538
1587
  "User-defined group by expressions are not supported"
1539
1588
  )
1589
+ attach_custom_error_code(
1590
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1591
+ )
1592
+ raise exception
1540
1593
  group_type = (
1541
1594
  snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS
1542
1595
  )
@@ -1552,9 +1605,13 @@ def map_logical_plan_relation(
1552
1605
 
1553
1606
  if group_type != snowflake_proto.Aggregate.GROUP_TYPE_GROUPBY:
1554
1607
  if len(group_expression_list) != 1:
1555
- raise SnowparkConnectNotImplementedError(
1608
+ exception = SnowparkConnectNotImplementedError(
1556
1609
  "Multiple grouping expressions are not supported"
1557
1610
  )
1611
+ attach_custom_error_code(
1612
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1613
+ )
1614
+ raise exception
1558
1615
  if group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
1559
1616
  group_expression_list = [] # TODO: exp.userGivenGroupByExprs()?
1560
1617
  else:
@@ -1786,12 +1843,14 @@ def map_logical_plan_relation(
1786
1843
 
1787
1844
  # Check for multi-column UNPIVOT which Snowflake doesn't support
1788
1845
  if len(value_column_names) > 1:
1789
- raise UnsupportedOperationException(
1846
+ exception = UnsupportedOperationException(
1790
1847
  f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
1791
1848
  f"multiple value columns ({', '.join(value_column_names)}) in a single operation. "
1792
1849
  f"Workaround: Use separate UNPIVOT operations for each value column and join the results, "
1793
1850
  f"or restructure your query to unpivot columns individually."
1794
1851
  )
1852
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1853
+ raise exception
1795
1854
 
1796
1855
  values = []
1797
1856
  values_groups = as_java_list(rel.values().get())
@@ -1799,11 +1858,13 @@ def map_logical_plan_relation(
1799
1858
  # Check if we have multi-column groups in the IN clause
1800
1859
  if values_groups and len(as_java_list(values_groups[0])) > 1:
1801
1860
  group_sizes = [len(as_java_list(group)) for group in values_groups]
1802
- raise UnsupportedOperationException(
1861
+ exception = UnsupportedOperationException(
1803
1862
  f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
1804
1863
  f"multiple columns together in groups. Found groups with {max(group_sizes)} columns. "
1805
1864
  f"Workaround: Unpivot each column separately and then join/union the results as needed."
1806
1865
  )
1866
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1867
+ raise exception
1807
1868
 
1808
1869
  for e1 in values_groups:
1809
1870
  for e in as_java_list(e1):
@@ -1849,9 +1910,11 @@ def map_logical_plan_relation(
1849
1910
  # Store the having condition in context and process the child aggregate
1850
1911
  child_relation = rel.child()
1851
1912
  if str(child_relation.getClass().getSimpleName()) != "Aggregate":
1852
- raise SnowparkConnectNotImplementedError(
1913
+ exception = SnowparkConnectNotImplementedError(
1853
1914
  "UnresolvedHaving can only be applied to Aggregate relations"
1854
1915
  )
1916
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1917
+ raise exception
1855
1918
 
1856
1919
  # Store having condition in a context variable for the Aggregate case to pick up
1857
1920
  having_condition = map_logical_plan_expression(rel.havingCondition())
@@ -2176,7 +2239,7 @@ def map_logical_plan_relation(
2176
2239
  function_name = rel.generator().name().toString()
2177
2240
  func_arguments = [
2178
2241
  map_logical_plan_expression(e)
2179
- for e in as_java_list(rel.generator().children())
2242
+ for e in list(as_java_list(rel.generator().children()))
2180
2243
  ]
2181
2244
  unresolved_fun_proto = expressions_proto.Expression.UnresolvedFunction(
2182
2245
  function_name=function_name, arguments=func_arguments
@@ -2242,7 +2305,11 @@ def map_logical_plan_relation(
2242
2305
  )
2243
2306
  proto = generator_dataframe_proto
2244
2307
  case other:
2245
- raise SnowparkConnectNotImplementedError(f"Unimplemented relation: {other}")
2308
+ exception = SnowparkConnectNotImplementedError(
2309
+ f"Unimplemented relation: {other}"
2310
+ )
2311
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
2312
+ raise exception
2246
2313
 
2247
2314
  proto.common.plan_id = plan_id
2248
2315
 
@@ -15,6 +15,8 @@ from snowflake import snowpark
15
15
  from snowflake.snowpark.exceptions import SnowparkSQLException
16
16
  from snowflake.snowpark_connect.config import get_boolean_session_config_param
17
17
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
18
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
19
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
18
20
  from snowflake.snowpark_connect.relation.map_relation import map_relation
19
21
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
20
22
 
@@ -99,9 +101,11 @@ def map_approx_quantile(
99
101
  else ""
100
102
  )
101
103
 
102
- raise AnalysisException(
104
+ exception = AnalysisException(
103
105
  f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{col_name}` cannot be resolved.{suggestion_text}"
104
106
  )
107
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
108
+ raise exception
105
109
 
106
110
  cols = input_container.column_map.get_snowpark_column_names_from_spark_column_names(
107
111
  list(rel.approx_quantile.cols)
@@ -22,6 +22,8 @@ from snowflake.snowpark_connect.config import (
22
22
  global_config,
23
23
  )
24
24
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
25
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
26
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
25
27
  from snowflake.snowpark_connect.expression.map_expression import (
26
28
  map_single_column_expression,
27
29
  )
@@ -163,17 +165,21 @@ def process_return_type(
163
165
  else:
164
166
  parsed_return = return_type
165
167
  except ValueError as e:
166
- raise PythonException(
168
+ exception = PythonException(
167
169
  f"[UDTF_ARROW_TYPE_CAST_ERROR] Error parsing UDTF return type DDL: {e}"
168
170
  )
171
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
172
+ raise exception
169
173
  original_output_schema = proto_to_snowpark_type(parsed_return)
170
174
  output_schema = proto_to_snowpark_type(parsed_return)
171
175
  # Snowflake UDTF does not support MapType, so we convert it to VariantType.
172
176
  output_schema = convert_maptype_to_variant(output_schema)
173
177
  if not isinstance(output_schema, StructType):
174
- raise PySparkTypeError(
178
+ exception = PySparkTypeError(
175
179
  f"Invalid Python user-defined table function return type. Expect a struct type, but got {parsed_return}"
176
180
  )
181
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
182
+ raise exception
177
183
 
178
184
  expected_types = None
179
185
  if is_arrow_enabled_in_udtf() or is_spark_compatible_udtf_mode_enabled():
@@ -276,12 +282,16 @@ def map_common_inline_user_defined_table_function(
276
282
  if require_creating_udtf_in_sproc(udtf_proto):
277
283
  snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
278
284
  if isinstance(snowpark_udtf_or_error, str):
279
- raise PythonException(snowpark_udtf_or_error)
285
+ exception = PythonException(snowpark_udtf_or_error)
286
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
287
+ raise exception
280
288
  snowpark_udtf = snowpark_udtf_or_error
281
289
  else:
282
290
  udtf_or_error = create_udtf(**kwargs)
283
291
  if isinstance(udtf_or_error, str):
284
- raise PythonException(udtf_or_error)
292
+ exception = PythonException(udtf_or_error)
293
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
294
+ raise exception
285
295
  udtf = udtf_or_error
286
296
  snowpark_udtf = SnowparkUDTF(
287
297
  name=udtf.name,
@@ -38,6 +38,8 @@ from snowflake.snowpark.types import (
38
38
  TimeType,
39
39
  _NumericType,
40
40
  )
41
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
42
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
41
43
  from snowflake.snowpark_connect.relation.read.utils import (
42
44
  DATA_SOURCE_SQL_COMMENT,
43
45
  Connection,
@@ -147,9 +149,11 @@ class JdbcDataFrameReader(DataFrameReader):
147
149
  or upper_bound is not None
148
150
  or num_partitions is not None
149
151
  ):
150
- raise ValueError(
152
+ exception = ValueError(
151
153
  "when column is not specified, lower_bound, upper_bound, num_partitions are expected to be None"
152
154
  )
155
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
156
+ raise exception
153
157
  if table is not None:
154
158
  partitioned_queries = []
155
159
  table_query = f"SELECT * FROM {table}"
@@ -160,24 +164,32 @@ class JdbcDataFrameReader(DataFrameReader):
160
164
  elif query is not None:
161
165
  partitioned_queries = [query]
162
166
  else:
163
- raise ValueError("table or query is not specified")
167
+ exception = ValueError("table or query is not specified")
168
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
169
+ raise exception
164
170
  else:
165
171
  if lower_bound is None or upper_bound is None or num_partitions is None:
166
- raise ValueError(
172
+ exception = ValueError(
167
173
  "when column is specified, lower_bound, upper_bound, num_partitions must be specified"
168
174
  )
175
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
176
+ raise exception
169
177
 
170
178
  column_type = None
171
179
  for field in struct_schema.fields:
172
180
  if field.name.lower() == column.lower():
173
181
  column_type = field.datatype
174
182
  if column_type is None:
175
- raise ValueError("Column does not exist")
183
+ exception = ValueError("Column does not exist")
184
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
185
+ raise exception
176
186
 
177
187
  if not isinstance(column_type, _NumericType) and not isinstance(
178
188
  column_type, DateType
179
189
  ):
180
- raise ValueError(f"unsupported type {column_type}")
190
+ exception = ValueError(f"unsupported type {column_type}")
191
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
192
+ raise exception
181
193
  spark_column_name = f'"{column}"'
182
194
  partitioned_queries = self._generate_partition(
183
195
  table,
@@ -240,7 +252,11 @@ class JdbcDataFrameReader(DataFrameReader):
240
252
  )
241
253
  query_thread_executor.shutdown(wait=False)
242
254
  upload_thread_executor.shutdown(wait=False)
243
- raise future.result()
255
+ exception = future.result()
256
+ attach_custom_error_code(
257
+ exception, ErrorCodes.INTERNAL_ERROR
258
+ )
259
+ raise exception
244
260
  else:
245
261
  path = future.result()
246
262
  if not path:
@@ -266,7 +282,11 @@ class JdbcDataFrameReader(DataFrameReader):
266
282
  )
267
283
  query_thread_executor.shutdown(wait=False)
268
284
  upload_thread_executor.shutdown(wait=False)
269
- raise f.result()
285
+ exception = f.result()
286
+ attach_custom_error_code(
287
+ exception, ErrorCodes.INTERNAL_ERROR
288
+ )
289
+ raise exception
270
290
  finally:
271
291
  close_connection(conn)
272
292
 
@@ -283,7 +303,9 @@ class JdbcDataFrameReader(DataFrameReader):
283
303
  elif query is not None:
284
304
  sql = f"SELECT * FROM ({query}) WHERE 1=0"
285
305
  else:
286
- raise ValueError("table or query is not specified")
306
+ exception = ValueError("table or query is not specified")
307
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
308
+ raise exception
287
309
 
288
310
  cursor = conn.cursor()
289
311
  cursor.execute(sql)
@@ -301,7 +323,11 @@ class JdbcDataFrameReader(DataFrameReader):
301
323
  dt = parser.parse(value)
302
324
  return int(dt.replace(tzinfo=pytz.UTC).timestamp())
303
325
  else:
304
- raise TypeError(f"unsupported column type for partition: {column_type}")
326
+ exception = TypeError(
327
+ f"unsupported column type for partition: {column_type}"
328
+ )
329
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
330
+ raise exception
305
331
 
306
332
  # this function is only used in data source API for SQL server
307
333
  def _to_external_value(self, value: Union[int, str, float], column_type: DataType):
@@ -311,7 +337,11 @@ class JdbcDataFrameReader(DataFrameReader):
311
337
  # TODO: SNOW-1909315: support timezone
312
338
  return datetime.datetime.fromtimestamp(value, tz=pytz.UTC)
313
339
  else:
314
- raise TypeError(f"unsupported column type for partition: {column_type}")
340
+ exception = TypeError(
341
+ f"unsupported column type for partition: {column_type}"
342
+ )
343
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
344
+ raise exception
315
345
 
316
346
  def _to_snowpark_type(self, schema: Tuple[tuple]) -> StructType:
317
347
  fields = []
@@ -339,7 +369,9 @@ class JdbcDataFrameReader(DataFrameReader):
339
369
  case jaydebeapi.BINARY:
340
370
  field = StructField(name, BinaryType(), is_nullable)
341
371
  case _:
342
- raise ValueError(f"unsupported type: {dbapi_type}")
372
+ exception = ValueError(f"unsupported type: {dbapi_type}")
373
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
374
+ raise exception
343
375
 
344
376
  fields.append(field)
345
377
  return StructType(fields)
@@ -359,7 +391,9 @@ class JdbcDataFrameReader(DataFrameReader):
359
391
  processed_lower_bound = self._to_internal_value(lower_bound, column_type)
360
392
  processed_upper_bound = self._to_internal_value(upper_bound, column_type)
361
393
  if processed_lower_bound > processed_upper_bound:
362
- raise ValueError("lower_bound cannot be greater than upper_bound")
394
+ exception = ValueError("lower_bound cannot be greater than upper_bound")
395
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
396
+ raise exception
363
397
 
364
398
  if processed_lower_bound == processed_upper_bound or num_partitions <= 1:
365
399
  return [select_query]
@@ -665,4 +699,6 @@ def get_jdbc_dialect(url: str) -> JdbcDialect:
665
699
  for jdbc_dialect in jdbc_dialects:
666
700
  if jdbc_dialect.can_handle(url):
667
701
  return jdbc_dialect
668
- raise ValueError(f"Unsupported JDBC datasource: {url}")
702
+ exception = ValueError(f"Unsupported JDBC datasource: {url}")
703
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
704
+ raise exception
@@ -15,6 +15,8 @@ from snowflake import snowpark
15
15
  from snowflake.snowpark.types import StructType
16
16
  from snowflake.snowpark_connect.config import global_config
17
17
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
18
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
19
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
18
20
  from snowflake.snowpark_connect.relation.io_utils import (
19
21
  convert_file_prefix_path,
20
22
  get_compression_for_source_and_options,
@@ -159,12 +161,20 @@ def map_read(
159
161
  options[DBTABLE_OPTION], session, rel.common.plan_id
160
162
  )
161
163
  case other:
162
- raise SnowparkConnectNotImplementedError(
164
+ exception = SnowparkConnectNotImplementedError(
163
165
  f"UNSUPPORTED FORMAT {other} WITH NO PATH"
164
166
  )
167
+ attach_custom_error_code(
168
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
169
+ )
170
+ raise exception
165
171
  case other:
166
172
  # TODO: Empty data source
167
- raise SnowparkConnectNotImplementedError(f"Unsupported read type: {other}")
173
+ exception = SnowparkConnectNotImplementedError(
174
+ f"Unsupported read type: {other}"
175
+ )
176
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
177
+ raise exception
168
178
 
169
179
  return df_cache_map_put_if_absent(
170
180
  (get_session_id(), rel.common.plan_id),
@@ -274,9 +284,11 @@ def _read_file(
274
284
 
275
285
  return map_read_text(rel, schema, session, paths)
276
286
  case _:
277
- raise SnowparkConnectNotImplementedError(
287
+ exception = SnowparkConnectNotImplementedError(
278
288
  f"Unsupported format: {read_format}"
279
289
  )
290
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
291
+ raise exception
280
292
 
281
293
 
282
294
  def _skip_upload(path: str, read_format: str):
@@ -12,6 +12,8 @@ from snowflake import snowpark
12
12
  from snowflake.snowpark.dataframe_reader import DataFrameReader
13
13
  from snowflake.snowpark.types import StringType, StructField, StructType
14
14
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
15
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
16
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
15
17
  from snowflake.snowpark_connect.relation.read.map_read import CsvReaderConfig
16
18
  from snowflake.snowpark_connect.relation.read.metadata_utils import (
17
19
  add_filename_metadata_to_reader,
@@ -43,9 +45,11 @@ def map_read_csv(
43
45
 
44
46
  if rel.read.is_streaming is True:
45
47
  # TODO: Structured streaming implementation.
46
- raise SnowparkConnectNotImplementedError(
48
+ exception = SnowparkConnectNotImplementedError(
47
49
  "Streaming is not supported for CSV files."
48
50
  )
51
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
52
+ raise exception
49
53
  else:
50
54
  snowpark_options = options.convert_to_snowpark_args()
51
55
  parse_header = snowpark_options.get("PARSE_HEADER", False)
@@ -188,14 +192,18 @@ def read_data(
188
192
 
189
193
  if schema is not None:
190
194
  if len(schema.fields) != len(non_metadata_fields):
191
- raise Exception(f"csv load from {filename} failed.")
195
+ exception = Exception(f"csv load from {filename} failed.")
196
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
197
+ raise exception
192
198
  if raw_options.get("enforceSchema", "True").lower() == "false":
193
199
  for i in range(len(schema.fields)):
194
200
  if (
195
201
  schema.fields[i].name != non_metadata_fields[i].name
196
202
  and f'"{schema.fields[i].name}"' != non_metadata_fields[i].name
197
203
  ):
198
- raise Exception("CSV header does not conform to the schema")
204
+ exception = Exception("CSV header does not conform to the schema")
205
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
206
+ raise exception
199
207
  return df
200
208
 
201
209
  headers = get_header_names(