snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,9 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
11
  )
12
12
  from snowflake.snowpark.types import StructType
13
13
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
14
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
15
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
16
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
17
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
15
18
  from snowflake.snowpark_connect.typed_column import TypedColumn
16
19
  from snowflake.snowpark_connect.utils.context import get_outer_dataframes
@@ -26,7 +29,7 @@ def check_struct_and_get_field_datatype(field_name, schema):
26
29
  else:
27
30
  return None
28
31
  else:
29
- None
32
+ return None
30
33
 
31
34
 
32
35
  def map_unresolved_star(
@@ -53,16 +56,17 @@ def map_unresolved_star(
53
56
  return spark_names, typed_column
54
57
 
55
58
  # scenario where it is expanding * to mulitple columns
56
- spark_names = []
57
- snowpark_names = []
58
- qualifiers = []
59
+ spark_names: list[str] = []
60
+ snowpark_names: list[str] = []
61
+ qualifiers: list[set[ColumnQualifier]] = []
59
62
 
63
+ target_qualifier = ColumnQualifier(tuple(name_parts[:-1]))
60
64
  (
61
65
  spark_names,
62
66
  snowpark_names,
63
67
  qualifiers,
64
68
  ) = column_mapping.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
65
- name_parts[:-1]
69
+ target_qualifier
66
70
  )
67
71
 
68
72
  if len(spark_names) == 0:
@@ -73,7 +77,7 @@ def map_unresolved_star(
73
77
  snowpark_names,
74
78
  qualifiers,
75
79
  ) = column_mapping_for_outer_df.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
76
- name_parts[:-1]
80
+ target_qualifier
77
81
  )
78
82
  if len(spark_names) > 0:
79
83
  break
@@ -139,7 +143,7 @@ def map_unresolved_star(
139
143
  final_sql_expr,
140
144
  lambda final_sql_expr=final_sql_expr: typer.type(final_sql_expr),
141
145
  )
142
- typed_column.set_multi_col_qualifiers([[] for _ in spark_names])
146
+ typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
143
147
  return spark_names, typed_column
144
148
  else:
145
149
  result_exp = snowpark_fn.sql_expr(
@@ -152,9 +156,11 @@ def map_unresolved_star(
152
156
  typed_column.set_multi_col_qualifiers(column_mapping.get_qualifiers())
153
157
  return spark_names, typed_column
154
158
 
155
- raise AnalysisException(
159
+ exception = AnalysisException(
156
160
  f"[UNRESOLVED_STAR] The unresolved star expression {exp} is not supported."
157
161
  )
162
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
163
+ raise exception
158
164
 
159
165
 
160
166
  def map_unresolved_star_struct(
@@ -8,6 +8,8 @@ from pyspark.errors.exceptions.base import AnalysisException
8
8
  import snowflake.snowpark.functions as snowpark_fn
9
9
  from snowflake.snowpark.types import DataType, StringType, StructField, StructType
10
10
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
12
14
  from snowflake.snowpark_connect.typed_column import TypedColumn
13
15
  from snowflake.snowpark_connect.utils.identifiers import (
@@ -39,9 +41,11 @@ def update_field_in_schema(
39
41
  field.name, updated_subschema, field.nullable, _is_column=False
40
42
  )
41
43
  else:
42
- raise AnalysisException(
44
+ exception = AnalysisException(
43
45
  message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}` in `{field}`"
44
46
  )
47
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
48
+ raise exception
45
49
  field_updated = True
46
50
  else:
47
51
  new_field = field # leave unchanged
@@ -59,9 +63,11 @@ def update_field_in_schema(
59
63
  # if the value type is None that means we want to drop the field and spark does not throw an error if the field does not exists
60
64
  # but if the value type is not None, it means we should add or update this field which has already been covered above
61
65
  # if we reach this code, it means the field should have existed
62
- raise AnalysisException(
66
+ exception = AnalysisException(
63
67
  message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}`"
64
68
  )
69
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
70
+ raise exception
65
71
  return StructType(new_fields)
66
72
 
67
73
 
@@ -99,9 +105,11 @@ def map_update_fields(
99
105
  )
100
106
 
101
107
  if not isinstance(struct_typed_column.typ, StructType):
102
- raise AnalysisException(
108
+ exception = AnalysisException(
103
109
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "update_fields({struct_name}, ...)" due to data type mismatch: Parameter 1 requires the "STRUCT" type'
104
110
  )
111
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
112
+ raise exception
105
113
 
106
114
  final_schema = struct_typed_column.typ
107
115
  value_column_list = []
@@ -137,9 +145,11 @@ def map_update_fields(
137
145
  final_name = f"update_fields({struct_name}, {update_operations_str})"
138
146
 
139
147
  if len(final_schema.fields) == 0:
140
- raise AnalysisException(
148
+ exception = AnalysisException(
141
149
  f'[DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "{final_name}" due to data type mismatch: Cannot drop all fields in struct.'
142
150
  )
151
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
152
+ raise exception
143
153
 
144
154
  @snowpark_fn.udf(
145
155
  input_types=input_types_to_the_udf,
@@ -6,7 +6,11 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
 
7
7
  from snowflake import snowpark
8
8
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
9
- from snowflake.snowpark_connect.error.error_utils import SparkException
9
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
10
+ from snowflake.snowpark_connect.error.error_utils import (
11
+ SparkException,
12
+ attach_custom_error_code,
13
+ )
10
14
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
11
15
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
12
16
  from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -29,6 +33,8 @@ SPARK_RANKING_FUNCTIONS = frozenset(
29
33
  ]
30
34
  )
31
35
 
36
+ RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS = frozenset(["percent_rank"])
37
+
32
38
  CAPITAL_FUNCTION_NAMES = frozenset(["rank()", "dense_rank()", "percent_rank()"])
33
39
 
34
40
 
@@ -128,6 +134,11 @@ def map_window_function(
128
134
  case expressions_proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW:
129
135
  frame_name.append("ROWS BETWEEN")
130
136
  frame_type_func_string = "rows_between"
137
+ if proto_func_name in RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS:
138
+ # Seems like Snowflake and Spark have different understanding of some functions. For those,
139
+ # Spark only allows rows_between while Snowflake only allows range_between. To be compatible
140
+ # with Spark, we have to use range_between here.
141
+ frame_type_func_string = "range_between"
131
142
  lower_name, lower = parse_frame_boundary(
132
143
  exp.window.frame_spec.lower, is_upper=False
133
144
  )
@@ -138,9 +149,11 @@ def map_window_function(
138
149
  lower != snowpark.Window.UNBOUNDED_PRECEDING
139
150
  or upper != snowpark.Window.CURRENT_ROW
140
151
  ):
141
- raise SparkException.invalid_ranking_function_window_frame(
152
+ exception = SparkException.invalid_ranking_function_window_frame(
142
153
  window_frame=f"specifiedwindowframe(RowFrame, {lower_name}, {upper_name})"
143
154
  )
155
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
156
+ raise exception
144
157
 
145
158
  is_unbounded = (
146
159
  lower == snowpark.Window.UNBOUNDED_PRECEDING
@@ -165,9 +178,11 @@ def map_window_function(
165
178
  orders = orders[:1]
166
179
 
167
180
  if proto_func_name in SPARK_RANKING_FUNCTIONS:
168
- raise SparkException.invalid_ranking_function_window_frame(
181
+ exception = SparkException.invalid_ranking_function_window_frame(
169
182
  window_frame=f"specifiedwindowframe(RangeFrame, {lower_name}, {upper_name})"
170
183
  )
184
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
185
+ raise exception
171
186
 
172
187
  is_unbounded = (
173
188
  lower == snowpark.Window.UNBOUNDED_PRECEDING
@@ -11,6 +11,8 @@ import pyspark.sql.connect.proto.types_pb2 as types_proto
11
11
 
12
12
  from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
13
13
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.error.exceptions import MissingDatabase, MissingSchema
15
17
  from snowflake.snowpark_connect.utils.identifiers import (
16
18
  split_fully_qualified_spark_name,
@@ -39,28 +41,40 @@ class AbstractSparkCatalog(ABC):
39
41
  description: str,
40
42
  **options: typing.Any,
41
43
  ) -> DataFrameContainer:
42
- raise SnowparkConnectNotImplementedError("createTable is not implemented")
44
+ exception = SnowparkConnectNotImplementedError("createTable is not implemented")
45
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
46
+ raise exception
43
47
 
44
48
  @abstractmethod
45
49
  def listDatabases(
46
50
  self,
47
51
  pattern: str | None = None,
48
52
  ) -> pandas.DataFrame:
49
- raise SnowparkConnectNotImplementedError("listDatabases is not implemented")
53
+ exception = SnowparkConnectNotImplementedError(
54
+ "listDatabases is not implemented"
55
+ )
56
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
57
+ raise exception
50
58
 
51
59
  @abstractmethod
52
60
  def getDatabase(
53
61
  self,
54
62
  spark_dbName: str,
55
63
  ) -> pandas.DataFrame:
56
- raise SnowparkConnectNotImplementedError("getDatabase is not implemented")
64
+ exception = SnowparkConnectNotImplementedError("getDatabase is not implemented")
65
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
66
+ raise exception
57
67
 
58
68
  @abstractmethod
59
69
  def databaseExists(
60
70
  self,
61
71
  spark_dbName: str,
62
72
  ) -> pandas.DataFrame:
63
- raise SnowparkConnectNotImplementedError("databaseExists is not implemented")
73
+ exception = SnowparkConnectNotImplementedError(
74
+ "databaseExists is not implemented"
75
+ )
76
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
77
+ raise exception
64
78
 
65
79
  @abstractmethod
66
80
  def listTables(
@@ -68,14 +82,18 @@ class AbstractSparkCatalog(ABC):
68
82
  spark_dbName: str | None = None,
69
83
  pattern: str | None = None,
70
84
  ) -> pandas.DataFrame:
71
- raise SnowparkConnectNotImplementedError("listTables is not implemented")
85
+ exception = SnowparkConnectNotImplementedError("listTables is not implemented")
86
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
87
+ raise exception
72
88
 
73
89
  @abstractmethod
74
90
  def getTable(
75
91
  self,
76
92
  spark_tableName: str,
77
93
  ) -> pandas.DataFrame:
78
- raise SnowparkConnectNotImplementedError("getTable is not implemented")
94
+ exception = SnowparkConnectNotImplementedError("getTable is not implemented")
95
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
96
+ raise exception
79
97
 
80
98
  @abstractmethod
81
99
  def tableExists(
@@ -83,7 +101,9 @@ class AbstractSparkCatalog(ABC):
83
101
  spark_tableName: str,
84
102
  spark_dbName: str | None,
85
103
  ) -> pandas.DataFrame:
86
- raise SnowparkConnectNotImplementedError("tableExists is not implemented")
104
+ exception = SnowparkConnectNotImplementedError("tableExists is not implemented")
105
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
106
+ raise exception
87
107
 
88
108
  @abstractmethod
89
109
  def listColumns(
@@ -91,36 +111,50 @@ class AbstractSparkCatalog(ABC):
91
111
  spark_tableName: str,
92
112
  spark_dbName: str | None = None,
93
113
  ) -> pandas.DataFrame:
94
- raise SnowparkConnectNotImplementedError("listColumns is not implemented")
114
+ exception = SnowparkConnectNotImplementedError("listColumns is not implemented")
115
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
116
+ raise exception
95
117
 
96
118
  @abstractmethod
97
119
  def currentDatabase(self) -> pandas.DataFrame:
98
- raise SnowparkConnectNotImplementedError("currentDatabase is not implemented")
120
+ exception = SnowparkConnectNotImplementedError(
121
+ "currentDatabase is not implemented"
122
+ )
123
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
124
+ raise exception
99
125
 
100
126
  @abstractmethod
101
127
  def setCurrentDatabase(
102
128
  self,
103
129
  spark_dbName: str,
104
130
  ) -> pandas.DataFrame:
105
- raise SnowparkConnectNotImplementedError(
131
+ exception = SnowparkConnectNotImplementedError(
106
132
  "setCurrentDatabase is not implemented"
107
133
  )
134
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
135
+ raise exception
108
136
 
109
137
  @abstractmethod
110
138
  def dropGlobalTempView(
111
139
  self,
112
140
  spark_view_name: str,
113
141
  ) -> DataFrameContainer:
114
- raise SnowparkConnectNotImplementedError(
142
+ exception = SnowparkConnectNotImplementedError(
115
143
  "dropGlobalTempView is not implemented"
116
144
  )
145
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
146
+ raise exception
117
147
 
118
148
  @abstractmethod
119
149
  def dropTempView(
120
150
  self,
121
151
  spark_view_name: str,
122
152
  ) -> DataFrameContainer:
123
- raise SnowparkConnectNotImplementedError("dropTempView is not implemented")
153
+ exception = SnowparkConnectNotImplementedError(
154
+ "dropTempView is not implemented"
155
+ )
156
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
157
+ raise exception
124
158
 
125
159
  def cacheTable(
126
160
  self,
@@ -135,9 +169,11 @@ class AbstractSparkCatalog(ABC):
135
169
  spark_tableName
136
170
  )
137
171
  if catalog is not None and self != catalog:
138
- raise SnowparkConnectNotImplementedError(
172
+ exception = SnowparkConnectNotImplementedError(
139
173
  "Calling into another catalog is not currently supported"
140
174
  )
175
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
176
+ raise exception
141
177
  if sf_database is None:
142
178
  sf_database = _get_current_snowflake_database()
143
179
  if sf_schema is None:
@@ -168,9 +204,11 @@ class AbstractSparkCatalog(ABC):
168
204
  spark_tableName
169
205
  )
170
206
  if catalog is not None and self != catalog:
171
- raise SnowparkConnectNotImplementedError(
207
+ exception = SnowparkConnectNotImplementedError(
172
208
  "Calling into another catalog is not currently supported"
173
209
  )
210
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
211
+ raise exception
174
212
  if sf_database is None:
175
213
  sf_database = _get_current_snowflake_database()
176
214
  if sf_schema is None:
@@ -194,9 +232,11 @@ class AbstractSparkCatalog(ABC):
194
232
  spark_tableName
195
233
  )
196
234
  if catalog is not None and self != catalog:
197
- raise SnowparkConnectNotImplementedError(
235
+ exception = SnowparkConnectNotImplementedError(
198
236
  "Calling into another catalog is not currently supported"
199
237
  )
238
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
239
+ raise exception
200
240
  if sf_database is None:
201
241
  sf_database = _get_current_snowflake_database()
202
242
  if sf_schema is None:
@@ -249,7 +289,11 @@ def _process_multi_layer_database(
249
289
  else:
250
290
  return None, c, d
251
291
  case _:
252
- raise ValueError(f"Unexpected database identifier format: {spark_mli}")
292
+ exception = ValueError(
293
+ f"Unexpected database identifier format: {spark_mli}"
294
+ )
295
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
296
+ raise exception
253
297
 
254
298
 
255
299
  def _process_multi_layer_identifier(
@@ -283,5 +327,9 @@ def _process_multi_layer_identifier(
283
327
  else:
284
328
  snowflake_database, snowflake_schema, snowflake_obj = d, s, t
285
329
  case _:
286
- raise ValueError(f"Unexpected table/view identifier format: {spark_mli}")
330
+ exception = ValueError(
331
+ f"Unexpected table/view identifier format: {spark_mli}"
332
+ )
333
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
334
+ raise exception
287
335
  return spark_catalog, snowflake_database, snowflake_schema, snowflake_obj
@@ -19,11 +19,14 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
19
19
  )
20
20
  from snowflake.snowpark.functions import lit
21
21
  from snowflake.snowpark.types import BooleanType, StringType
22
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
22
23
  from snowflake.snowpark_connect.config import (
23
24
  auto_uppercase_non_column_identifiers,
24
25
  global_config,
25
26
  )
26
27
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
28
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
29
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
27
30
  from snowflake.snowpark_connect.error.exceptions import MaxRetryExceeded
28
31
  from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
29
32
  AbstractSparkCatalog,
@@ -109,9 +112,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
109
112
  catalog, sf_database, sf_schema = _process_multi_layer_database(pattern)
110
113
  sf_schema = sf_schema.replace("*", ".*")
111
114
  if catalog is not None and self != catalog:
112
- raise SnowparkConnectNotImplementedError(
115
+ exception = SnowparkConnectNotImplementedError(
113
116
  "Calling into another catalog is not currently supported"
114
117
  )
118
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
119
+ raise exception
115
120
  sp_catalog = get_or_create_snowpark_session().catalog
116
121
 
117
122
  dbs: list[Schema] | None = None
@@ -131,7 +136,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
131
136
  )
132
137
  if dbs is None:
133
138
  raise MaxRetryExceeded(
134
- f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts"
139
+ f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts",
140
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
135
141
  )
136
142
  names: list[str] = list()
137
143
  catalogs: list[str] = list()
@@ -163,9 +169,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
163
169
  """Listing a single database that's accessible in Snowflake."""
164
170
  catalog, sf_database, sf_schema = _process_multi_layer_database(spark_dbName)
165
171
  if catalog is not None and self != catalog:
166
- raise SnowparkConnectNotImplementedError(
172
+ exception = SnowparkConnectNotImplementedError(
167
173
  "Calling into another catalog is not currently supported"
168
174
  )
175
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
176
+ raise exception
169
177
  sp_catalog = get_or_create_snowpark_session().catalog
170
178
 
171
179
  db: Schema | None = None
@@ -184,7 +192,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
184
192
  )
185
193
  if db is None:
186
194
  raise MaxRetryExceeded(
187
- f"Failed to fetch database {spark_dbName} after all retry attempts"
195
+ f"Failed to fetch database {spark_dbName} after all retry attempts",
196
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
188
197
  )
189
198
 
190
199
  name = unquote_if_quoted(db.name)
@@ -307,9 +316,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
307
316
  spark_dbName
308
317
  )
309
318
  if catalog is not None and self != catalog:
310
- raise SnowparkConnectNotImplementedError(
319
+ exception = SnowparkConnectNotImplementedError(
311
320
  "Calling into another catalog is not currently supported"
312
321
  )
322
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
323
+ raise exception
313
324
  else:
314
325
  catalog = sf_database = sf_schema = None
315
326
 
@@ -440,9 +451,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
440
451
  spark_tableName
441
452
  )
442
453
  if catalog is not None and self != catalog:
443
- raise SnowparkConnectNotImplementedError(
454
+ exception = SnowparkConnectNotImplementedError(
444
455
  "Calling into another catalog is not currently supported"
445
456
  )
457
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
458
+ raise exception
446
459
 
447
460
  table: Table | None = None
448
461
  for attempt in Retrying(
@@ -463,7 +476,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
463
476
 
464
477
  if table is None:
465
478
  raise MaxRetryExceeded(
466
- f"Failed to fetch table {spark_tableName} after all retry attempts"
479
+ f"Failed to fetch table {spark_tableName} after all retry attempts",
480
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
467
481
  )
468
482
 
469
483
  return pandas.DataFrame(
@@ -563,9 +577,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
563
577
  spark_tableName
564
578
  )
565
579
  if catalog is not None and self != catalog:
566
- raise SnowparkConnectNotImplementedError(
580
+ exception = SnowparkConnectNotImplementedError(
567
581
  "Calling into another catalog is not currently supported"
568
582
  )
583
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
584
+ raise exception
569
585
  for attempt in Retrying(
570
586
  max_retries=5,
571
587
  initial_backoff=100, # 100ms
@@ -598,7 +614,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
598
614
  )
599
615
  if columns is None:
600
616
  raise MaxRetryExceeded(
601
- f"Failed to fetch columns of {spark_tableName} after all retry attempts"
617
+ f"Failed to fetch columns of {spark_tableName} after all retry attempts",
618
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
602
619
  )
603
620
  names: list[str] = list()
604
621
  descriptions: list[str | None] = list()
@@ -702,26 +719,34 @@ class SnowflakeCatalog(AbstractSparkCatalog):
702
719
  if source == "":
703
720
  source = global_config.get("spark.sql.sources.default")
704
721
  if source not in ("csv", "json", "avro", "parquet", "orc", "xml"):
705
- raise SnowparkConnectNotImplementedError(
722
+ exception = SnowparkConnectNotImplementedError(
706
723
  f"Source '{source}' is not currently supported by Catalog.createTable. "
707
724
  "Maybe default value through 'spark.sql.sources.default' should be set."
708
725
  )
726
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
727
+ raise exception
709
728
  if path != "":
710
729
  # External table creation is not supported currently.
711
- raise SnowparkConnectNotImplementedError(
730
+ exception = SnowparkConnectNotImplementedError(
712
731
  "External table creation is not supported currently."
713
732
  )
733
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
734
+ raise exception
714
735
 
715
736
  session = get_or_create_snowpark_session()
716
737
  # Managed table
717
738
  if schema.ByteSize() == 0:
718
- raise SnowparkConnectNotImplementedError(
739
+ exception = SnowparkConnectNotImplementedError(
719
740
  f"Unable to infer schema for {source.upper()}. It must be specified manually.",
720
741
  )
742
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
743
+ raise exception
721
744
  sp_schema = proto_to_snowpark_type(schema)
722
745
  columns = [c.name for c in schema.struct.fields]
723
746
  table_name_parts = split_fully_qualified_spark_name(tableName)
724
- qualifiers = [table_name_parts for _ in columns]
747
+ qualifiers: list[set[ColumnQualifier]] = [
748
+ {ColumnQualifier(tuple(table_name_parts))} for _ in columns
749
+ ]
725
750
  column_types = [f.datatype for f in sp_schema.fields]
726
751
  return DataFrameContainer.create_with_column_mapping(
727
752
  dataframe=session.createDataFrame([], sp_schema),
@@ -5,6 +5,8 @@
5
5
  from collections import defaultdict
6
6
 
7
7
  from snowflake.connector.errors import ProgrammingError
8
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
9
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
8
10
  from snowflake.snowpark_connect.relation.catalogs import CATALOGS, SNOWFLAKE_CATALOG
9
11
  from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
10
12
  AbstractSparkCatalog,
@@ -27,11 +29,15 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
27
29
 
28
30
  # Validate input parameters to match PySpark behavior
29
31
  if catalog_name is None:
30
- raise ValueError("Catalog name cannot be None")
32
+ exception = ValueError("Catalog name cannot be None")
33
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
34
+ raise exception
31
35
  if catalog_name == "":
32
- raise ValueError(
36
+ exception = ValueError(
33
37
  "Catalog '' plugin class not found: spark.sql.catalog. is not defined"
34
38
  )
39
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
40
+ raise exception
35
41
 
36
42
  CURRENT_CATALOG_NAME = catalog_name
37
43
  if catalog_name in CATALOGS:
@@ -42,9 +48,11 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
42
48
  sf_catalog.setCurrentDatabase(catalog_name if catalog_name is not None else "")
43
49
  return get_current_catalog()
44
50
  except ProgrammingError as e:
45
- raise Exception(
51
+ exception = Exception(
46
52
  f"Catalog '{catalog_name}' plugin class not found: spark.sql.catalog.{catalog_name} is not defined"
47
- ) from e
53
+ )
54
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
55
+ raise exception from e
48
56
 
49
57
 
50
58
  def _get_current_temp_objects() -> set[tuple[str | None, str | None, str]]:
@@ -6,6 +6,9 @@ from urllib.parse import urlparse
6
6
 
7
7
  from pyspark.errors.exceptions.base import AnalysisException
8
8
 
9
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
10
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
+
9
12
  CLOUD_PREFIX_TO_CLOUD = {
10
13
  "abfss": "azure",
11
14
  "wasbs": "azure",
@@ -74,7 +77,7 @@ def get_compression_for_source_and_options(
74
77
 
75
78
  if not is_supported_compression(source, compression):
76
79
  supported_compressions = supported_compressions_for_format(source)
77
- raise AnalysisException(
80
+ exception = AnalysisException(
78
81
  f"Compression {compression} is not supported for {source} format. "
79
82
  + (
80
83
  f"Supported compressions: {sorted(supported_compressions)}"
@@ -82,6 +85,8 @@ def get_compression_for_source_and_options(
82
85
  else "None compression supported for this format."
83
86
  )
84
87
  )
88
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
89
+ raise exception
85
90
 
86
91
  return compression
87
92
 
@@ -16,6 +16,7 @@ from snowflake.snowpark.types import DataType
16
16
  from snowflake.snowpark_connect.column_name_handler import (
17
17
  make_column_names_snowpark_compatible,
18
18
  )
19
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
19
20
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
20
21
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
21
22
  from snowflake.snowpark_connect.expression.map_expression import (
@@ -200,7 +201,9 @@ def map_pivot_aggregate(
200
201
  dataframe=result.select(*column_selectors),
201
202
  spark_column_names=reordered_spark_names,
202
203
  snowpark_column_names=reordered_snowpark_names,
203
- column_qualifiers=[[]] * len(reordered_spark_names),
204
+ column_qualifiers=[
205
+ {ColumnQualifier.no_qualifier()} for _ in reordered_spark_names
206
+ ],
204
207
  parent_column_name_map=input_container.column_map,
205
208
  snowpark_column_types=reordered_types,
206
209
  )
@@ -349,7 +352,7 @@ class _ColumnMetadata:
349
352
  spark_name: str
350
353
  snowpark_name: str
351
354
  data_type: DataType
352
- qualifiers: list[str]
355
+ qualifiers: set[ColumnQualifier]
353
356
 
354
357
 
355
358
  @dataclass(frozen=True)
@@ -385,7 +388,7 @@ class _Columns:
385
388
  col.spark_name for col in self.grouping_columns + self.aggregation_columns
386
389
  ]
387
390
 
388
- def get_qualifiers(self) -> list[list[str]]:
391
+ def get_qualifiers(self) -> list[set[ColumnQualifier]]:
389
392
  return [
390
393
  col.qualifiers for col in self.grouping_columns + self.aggregation_columns
391
394
  ]
@@ -429,7 +432,7 @@ def map_aggregate_helper(
429
432
  new_name,
430
433
  None if skip_alias else alias,
431
434
  None if pivot else snowpark_column.typ,
432
- snowpark_column.get_qualifiers(),
435
+ qualifiers=snowpark_column.get_qualifiers(),
433
436
  )
434
437
  )
435
438
 
@@ -469,7 +472,7 @@ def map_aggregate_helper(
469
472
  new_name,
470
473
  None if skip_alias else alias,
471
474
  agg_col_typ,
472
- [],
475
+ qualifiers={ColumnQualifier.no_qualifier()},
473
476
  )
474
477
  )
475
478