snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (81) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +150 -25
  2. snowflake/snowpark_connect/config.py +54 -16
  3. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  4. snowflake/snowpark_connect/error/error_codes.py +50 -0
  5. snowflake/snowpark_connect/error/error_utils.py +142 -22
  6. snowflake/snowpark_connect/error/exceptions.py +13 -4
  7. snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
  8. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  9. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  10. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  11. snowflake/snowpark_connect/expression/literal.py +7 -1
  12. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  13. snowflake/snowpark_connect/expression/map_expression.py +48 -4
  14. snowflake/snowpark_connect/expression/map_extension.py +25 -5
  15. snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
  16. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  17. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
  18. snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
  19. snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
  20. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  21. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  22. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
  26. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  27. snowflake/snowpark_connect/relation/io_utils.py +66 -4
  28. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  29. snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
  30. snowflake/snowpark_connect/relation/map_extension.py +28 -8
  31. snowflake/snowpark_connect/relation/map_join.py +21 -10
  32. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  33. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  34. snowflake/snowpark_connect/relation/map_row_ops.py +36 -9
  35. snowflake/snowpark_connect/relation/map_sql.py +91 -24
  36. snowflake/snowpark_connect/relation/map_stats.py +25 -6
  37. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  38. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  39. snowflake/snowpark_connect/relation/read/map_read.py +24 -3
  40. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  41. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  42. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  43. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  44. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  45. snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
  46. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  47. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  48. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  49. snowflake/snowpark_connect/relation/utils.py +19 -2
  50. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  51. snowflake/snowpark_connect/relation/write/map_write.py +146 -63
  52. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  53. snowflake/snowpark_connect/resources_initializer.py +5 -1
  54. snowflake/snowpark_connect/server.py +72 -19
  55. snowflake/snowpark_connect/type_mapping.py +54 -17
  56. snowflake/snowpark_connect/utils/context.py +42 -1
  57. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  58. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  59. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  60. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  61. snowflake/snowpark_connect/utils/profiling.py +25 -8
  62. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  63. snowflake/snowpark_connect/utils/session.py +5 -2
  64. snowflake/snowpark_connect/utils/telemetry.py +81 -18
  65. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  66. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  67. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  68. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  69. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  70. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  71. snowflake/snowpark_connect/version.py +1 -1
  72. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
  73. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +81 -78
  74. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
  75. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
  76. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
  77. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
  78. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
  79. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
  80. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
  81. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,8 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
11
  )
12
12
  from snowflake.snowpark.types import StructType
13
13
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
15
17
  from snowflake.snowpark_connect.typed_column import TypedColumn
16
18
  from snowflake.snowpark_connect.utils.context import get_outer_dataframes
@@ -152,9 +154,11 @@ def map_unresolved_star(
152
154
  typed_column.set_multi_col_qualifiers(column_mapping.get_qualifiers())
153
155
  return spark_names, typed_column
154
156
 
155
- raise AnalysisException(
157
+ exception = AnalysisException(
156
158
  f"[UNRESOLVED_STAR] The unresolved star expression {exp} is not supported."
157
159
  )
160
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
161
+ raise exception
158
162
 
159
163
 
160
164
  def map_unresolved_star_struct(
@@ -8,6 +8,8 @@ from pyspark.errors.exceptions.base import AnalysisException
8
8
  import snowflake.snowpark.functions as snowpark_fn
9
9
  from snowflake.snowpark.types import DataType, StringType, StructField, StructType
10
10
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
12
14
  from snowflake.snowpark_connect.typed_column import TypedColumn
13
15
  from snowflake.snowpark_connect.utils.identifiers import (
@@ -39,9 +41,11 @@ def update_field_in_schema(
39
41
  field.name, updated_subschema, field.nullable, _is_column=False
40
42
  )
41
43
  else:
42
- raise AnalysisException(
44
+ exception = AnalysisException(
43
45
  message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}` in `{field}`"
44
46
  )
47
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
48
+ raise exception
45
49
  field_updated = True
46
50
  else:
47
51
  new_field = field # leave unchanged
@@ -59,9 +63,11 @@ def update_field_in_schema(
59
63
  # if the value type is None that means we want to drop the field and spark does not throw an error if the field does not exists
60
64
  # but if the value type is not None, it means we should add or update this field which has already been covered above
61
65
  # if we reach this code, it means the field should have existed
62
- raise AnalysisException(
66
+ exception = AnalysisException(
63
67
  message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}`"
64
68
  )
69
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
70
+ raise exception
65
71
  return StructType(new_fields)
66
72
 
67
73
 
@@ -99,9 +105,11 @@ def map_update_fields(
99
105
  )
100
106
 
101
107
  if not isinstance(struct_typed_column.typ, StructType):
102
- raise AnalysisException(
108
+ exception = AnalysisException(
103
109
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "update_fields({struct_name}, ...)" due to data type mismatch: Parameter 1 requires the "STRUCT" type'
104
110
  )
111
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
112
+ raise exception
105
113
 
106
114
  final_schema = struct_typed_column.typ
107
115
  value_column_list = []
@@ -137,9 +145,11 @@ def map_update_fields(
137
145
  final_name = f"update_fields({struct_name}, {update_operations_str})"
138
146
 
139
147
  if len(final_schema.fields) == 0:
140
- raise AnalysisException(
148
+ exception = AnalysisException(
141
149
  f'[DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "{final_name}" due to data type mismatch: Cannot drop all fields in struct.'
142
150
  )
151
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
152
+ raise exception
143
153
 
144
154
  @snowpark_fn.udf(
145
155
  input_types=input_types_to_the_udf,
@@ -6,7 +6,11 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
 
7
7
  from snowflake import snowpark
8
8
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
9
- from snowflake.snowpark_connect.error.error_utils import SparkException
9
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
10
+ from snowflake.snowpark_connect.error.error_utils import (
11
+ SparkException,
12
+ attach_custom_error_code,
13
+ )
10
14
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
11
15
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
12
16
  from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -29,6 +33,8 @@ SPARK_RANKING_FUNCTIONS = frozenset(
29
33
  ]
30
34
  )
31
35
 
36
+ RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS = frozenset(["percent_rank"])
37
+
32
38
  CAPITAL_FUNCTION_NAMES = frozenset(["rank()", "dense_rank()", "percent_rank()"])
33
39
 
34
40
 
@@ -128,6 +134,11 @@ def map_window_function(
128
134
  case expressions_proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW:
129
135
  frame_name.append("ROWS BETWEEN")
130
136
  frame_type_func_string = "rows_between"
137
+ if proto_func_name in RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS:
138
+ # Seems like Snowflake and Spark have different understanding of some functions. For those,
139
+ # Spark only allows rows_between while Snowflake only allows range_between. To be compatible
140
+ # with Spark, we have to use range_between here.
141
+ frame_type_func_string = "range_between"
131
142
  lower_name, lower = parse_frame_boundary(
132
143
  exp.window.frame_spec.lower, is_upper=False
133
144
  )
@@ -138,9 +149,11 @@ def map_window_function(
138
149
  lower != snowpark.Window.UNBOUNDED_PRECEDING
139
150
  or upper != snowpark.Window.CURRENT_ROW
140
151
  ):
141
- raise SparkException.invalid_ranking_function_window_frame(
152
+ exception = SparkException.invalid_ranking_function_window_frame(
142
153
  window_frame=f"specifiedwindowframe(RowFrame, {lower_name}, {upper_name})"
143
154
  )
155
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
156
+ raise exception
144
157
 
145
158
  is_unbounded = (
146
159
  lower == snowpark.Window.UNBOUNDED_PRECEDING
@@ -165,9 +178,11 @@ def map_window_function(
165
178
  orders = orders[:1]
166
179
 
167
180
  if proto_func_name in SPARK_RANKING_FUNCTIONS:
168
- raise SparkException.invalid_ranking_function_window_frame(
181
+ exception = SparkException.invalid_ranking_function_window_frame(
169
182
  window_frame=f"specifiedwindowframe(RangeFrame, {lower_name}, {upper_name})"
170
183
  )
184
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
185
+ raise exception
171
186
 
172
187
  is_unbounded = (
173
188
  lower == snowpark.Window.UNBOUNDED_PRECEDING
@@ -0,0 +1,4 @@
1
+ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2
+ """Client and server classes corresponding to protobuf-defined services."""
3
+ import grpc
4
+
@@ -0,0 +1,4 @@
1
+ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2
+ """Client and server classes corresponding to protobuf-defined services."""
3
+ import grpc
4
+
@@ -11,6 +11,8 @@ import pyspark.sql.connect.proto.types_pb2 as types_proto
11
11
 
12
12
  from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
13
13
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.error.exceptions import MissingDatabase, MissingSchema
15
17
  from snowflake.snowpark_connect.utils.identifiers import (
16
18
  split_fully_qualified_spark_name,
@@ -39,28 +41,40 @@ class AbstractSparkCatalog(ABC):
39
41
  description: str,
40
42
  **options: typing.Any,
41
43
  ) -> DataFrameContainer:
42
- raise SnowparkConnectNotImplementedError("createTable is not implemented")
44
+ exception = SnowparkConnectNotImplementedError("createTable is not implemented")
45
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
46
+ raise exception
43
47
 
44
48
  @abstractmethod
45
49
  def listDatabases(
46
50
  self,
47
51
  pattern: str | None = None,
48
52
  ) -> pandas.DataFrame:
49
- raise SnowparkConnectNotImplementedError("listDatabases is not implemented")
53
+ exception = SnowparkConnectNotImplementedError(
54
+ "listDatabases is not implemented"
55
+ )
56
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
57
+ raise exception
50
58
 
51
59
  @abstractmethod
52
60
  def getDatabase(
53
61
  self,
54
62
  spark_dbName: str,
55
63
  ) -> pandas.DataFrame:
56
- raise SnowparkConnectNotImplementedError("getDatabase is not implemented")
64
+ exception = SnowparkConnectNotImplementedError("getDatabase is not implemented")
65
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
66
+ raise exception
57
67
 
58
68
  @abstractmethod
59
69
  def databaseExists(
60
70
  self,
61
71
  spark_dbName: str,
62
72
  ) -> pandas.DataFrame:
63
- raise SnowparkConnectNotImplementedError("databaseExists is not implemented")
73
+ exception = SnowparkConnectNotImplementedError(
74
+ "databaseExists is not implemented"
75
+ )
76
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
77
+ raise exception
64
78
 
65
79
  @abstractmethod
66
80
  def listTables(
@@ -68,14 +82,18 @@ class AbstractSparkCatalog(ABC):
68
82
  spark_dbName: str | None = None,
69
83
  pattern: str | None = None,
70
84
  ) -> pandas.DataFrame:
71
- raise SnowparkConnectNotImplementedError("listTables is not implemented")
85
+ exception = SnowparkConnectNotImplementedError("listTables is not implemented")
86
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
87
+ raise exception
72
88
 
73
89
  @abstractmethod
74
90
  def getTable(
75
91
  self,
76
92
  spark_tableName: str,
77
93
  ) -> pandas.DataFrame:
78
- raise SnowparkConnectNotImplementedError("getTable is not implemented")
94
+ exception = SnowparkConnectNotImplementedError("getTable is not implemented")
95
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
96
+ raise exception
79
97
 
80
98
  @abstractmethod
81
99
  def tableExists(
@@ -83,7 +101,9 @@ class AbstractSparkCatalog(ABC):
83
101
  spark_tableName: str,
84
102
  spark_dbName: str | None,
85
103
  ) -> pandas.DataFrame:
86
- raise SnowparkConnectNotImplementedError("tableExists is not implemented")
104
+ exception = SnowparkConnectNotImplementedError("tableExists is not implemented")
105
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
106
+ raise exception
87
107
 
88
108
  @abstractmethod
89
109
  def listColumns(
@@ -91,36 +111,50 @@ class AbstractSparkCatalog(ABC):
91
111
  spark_tableName: str,
92
112
  spark_dbName: str | None = None,
93
113
  ) -> pandas.DataFrame:
94
- raise SnowparkConnectNotImplementedError("listColumns is not implemented")
114
+ exception = SnowparkConnectNotImplementedError("listColumns is not implemented")
115
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
116
+ raise exception
95
117
 
96
118
  @abstractmethod
97
119
  def currentDatabase(self) -> pandas.DataFrame:
98
- raise SnowparkConnectNotImplementedError("currentDatabase is not implemented")
120
+ exception = SnowparkConnectNotImplementedError(
121
+ "currentDatabase is not implemented"
122
+ )
123
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
124
+ raise exception
99
125
 
100
126
  @abstractmethod
101
127
  def setCurrentDatabase(
102
128
  self,
103
129
  spark_dbName: str,
104
130
  ) -> pandas.DataFrame:
105
- raise SnowparkConnectNotImplementedError(
131
+ exception = SnowparkConnectNotImplementedError(
106
132
  "setCurrentDatabase is not implemented"
107
133
  )
134
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
135
+ raise exception
108
136
 
109
137
  @abstractmethod
110
138
  def dropGlobalTempView(
111
139
  self,
112
140
  spark_view_name: str,
113
141
  ) -> DataFrameContainer:
114
- raise SnowparkConnectNotImplementedError(
142
+ exception = SnowparkConnectNotImplementedError(
115
143
  "dropGlobalTempView is not implemented"
116
144
  )
145
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
146
+ raise exception
117
147
 
118
148
  @abstractmethod
119
149
  def dropTempView(
120
150
  self,
121
151
  spark_view_name: str,
122
152
  ) -> DataFrameContainer:
123
- raise SnowparkConnectNotImplementedError("dropTempView is not implemented")
153
+ exception = SnowparkConnectNotImplementedError(
154
+ "dropTempView is not implemented"
155
+ )
156
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
157
+ raise exception
124
158
 
125
159
  def cacheTable(
126
160
  self,
@@ -135,9 +169,11 @@ class AbstractSparkCatalog(ABC):
135
169
  spark_tableName
136
170
  )
137
171
  if catalog is not None and self != catalog:
138
- raise SnowparkConnectNotImplementedError(
172
+ exception = SnowparkConnectNotImplementedError(
139
173
  "Calling into another catalog is not currently supported"
140
174
  )
175
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
176
+ raise exception
141
177
  if sf_database is None:
142
178
  sf_database = _get_current_snowflake_database()
143
179
  if sf_schema is None:
@@ -168,9 +204,11 @@ class AbstractSparkCatalog(ABC):
168
204
  spark_tableName
169
205
  )
170
206
  if catalog is not None and self != catalog:
171
- raise SnowparkConnectNotImplementedError(
207
+ exception = SnowparkConnectNotImplementedError(
172
208
  "Calling into another catalog is not currently supported"
173
209
  )
210
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
211
+ raise exception
174
212
  if sf_database is None:
175
213
  sf_database = _get_current_snowflake_database()
176
214
  if sf_schema is None:
@@ -194,9 +232,11 @@ class AbstractSparkCatalog(ABC):
194
232
  spark_tableName
195
233
  )
196
234
  if catalog is not None and self != catalog:
197
- raise SnowparkConnectNotImplementedError(
235
+ exception = SnowparkConnectNotImplementedError(
198
236
  "Calling into another catalog is not currently supported"
199
237
  )
238
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
239
+ raise exception
200
240
  if sf_database is None:
201
241
  sf_database = _get_current_snowflake_database()
202
242
  if sf_schema is None:
@@ -249,7 +289,11 @@ def _process_multi_layer_database(
249
289
  else:
250
290
  return None, c, d
251
291
  case _:
252
- raise ValueError(f"Unexpected database identifier format: {spark_mli}")
292
+ exception = ValueError(
293
+ f"Unexpected database identifier format: {spark_mli}"
294
+ )
295
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
296
+ raise exception
253
297
 
254
298
 
255
299
  def _process_multi_layer_identifier(
@@ -283,5 +327,9 @@ def _process_multi_layer_identifier(
283
327
  else:
284
328
  snowflake_database, snowflake_schema, snowflake_obj = d, s, t
285
329
  case _:
286
- raise ValueError(f"Unexpected table/view identifier format: {spark_mli}")
330
+ exception = ValueError(
331
+ f"Unexpected table/view identifier format: {spark_mli}"
332
+ )
333
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
334
+ raise exception
287
335
  return spark_catalog, snowflake_database, snowflake_schema, snowflake_obj
@@ -24,6 +24,8 @@ from snowflake.snowpark_connect.config import (
24
24
  global_config,
25
25
  )
26
26
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
27
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
28
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
27
29
  from snowflake.snowpark_connect.error.exceptions import MaxRetryExceeded
28
30
  from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
29
31
  AbstractSparkCatalog,
@@ -109,9 +111,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
109
111
  catalog, sf_database, sf_schema = _process_multi_layer_database(pattern)
110
112
  sf_schema = sf_schema.replace("*", ".*")
111
113
  if catalog is not None and self != catalog:
112
- raise SnowparkConnectNotImplementedError(
114
+ exception = SnowparkConnectNotImplementedError(
113
115
  "Calling into another catalog is not currently supported"
114
116
  )
117
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
118
+ raise exception
115
119
  sp_catalog = get_or_create_snowpark_session().catalog
116
120
 
117
121
  dbs: list[Schema] | None = None
@@ -131,7 +135,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
131
135
  )
132
136
  if dbs is None:
133
137
  raise MaxRetryExceeded(
134
- f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts"
138
+ f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts",
139
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
135
140
  )
136
141
  names: list[str] = list()
137
142
  catalogs: list[str] = list()
@@ -163,9 +168,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
163
168
  """Listing a single database that's accessible in Snowflake."""
164
169
  catalog, sf_database, sf_schema = _process_multi_layer_database(spark_dbName)
165
170
  if catalog is not None and self != catalog:
166
- raise SnowparkConnectNotImplementedError(
171
+ exception = SnowparkConnectNotImplementedError(
167
172
  "Calling into another catalog is not currently supported"
168
173
  )
174
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
175
+ raise exception
169
176
  sp_catalog = get_or_create_snowpark_session().catalog
170
177
 
171
178
  db: Schema | None = None
@@ -184,7 +191,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
184
191
  )
185
192
  if db is None:
186
193
  raise MaxRetryExceeded(
187
- f"Failed to fetch database {spark_dbName} after all retry attempts"
194
+ f"Failed to fetch database {spark_dbName} after all retry attempts",
195
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
188
196
  )
189
197
 
190
198
  name = unquote_if_quoted(db.name)
@@ -307,9 +315,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
307
315
  spark_dbName
308
316
  )
309
317
  if catalog is not None and self != catalog:
310
- raise SnowparkConnectNotImplementedError(
318
+ exception = SnowparkConnectNotImplementedError(
311
319
  "Calling into another catalog is not currently supported"
312
320
  )
321
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
322
+ raise exception
313
323
  else:
314
324
  catalog = sf_database = sf_schema = None
315
325
 
@@ -440,9 +450,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
440
450
  spark_tableName
441
451
  )
442
452
  if catalog is not None and self != catalog:
443
- raise SnowparkConnectNotImplementedError(
453
+ exception = SnowparkConnectNotImplementedError(
444
454
  "Calling into another catalog is not currently supported"
445
455
  )
456
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
457
+ raise exception
446
458
 
447
459
  table: Table | None = None
448
460
  for attempt in Retrying(
@@ -463,7 +475,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
463
475
 
464
476
  if table is None:
465
477
  raise MaxRetryExceeded(
466
- f"Failed to fetch table {spark_tableName} after all retry attempts"
478
+ f"Failed to fetch table {spark_tableName} after all retry attempts",
479
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
467
480
  )
468
481
 
469
482
  return pandas.DataFrame(
@@ -563,9 +576,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
563
576
  spark_tableName
564
577
  )
565
578
  if catalog is not None and self != catalog:
566
- raise SnowparkConnectNotImplementedError(
579
+ exception = SnowparkConnectNotImplementedError(
567
580
  "Calling into another catalog is not currently supported"
568
581
  )
582
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
583
+ raise exception
569
584
  for attempt in Retrying(
570
585
  max_retries=5,
571
586
  initial_backoff=100, # 100ms
@@ -598,7 +613,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
598
613
  )
599
614
  if columns is None:
600
615
  raise MaxRetryExceeded(
601
- f"Failed to fetch columns of {spark_tableName} after all retry attempts"
616
+ f"Failed to fetch columns of {spark_tableName} after all retry attempts",
617
+ custom_error_code=ErrorCodes.INTERNAL_ERROR,
602
618
  )
603
619
  names: list[str] = list()
604
620
  descriptions: list[str | None] = list()
@@ -702,22 +718,28 @@ class SnowflakeCatalog(AbstractSparkCatalog):
702
718
  if source == "":
703
719
  source = global_config.get("spark.sql.sources.default")
704
720
  if source not in ("csv", "json", "avro", "parquet", "orc", "xml"):
705
- raise SnowparkConnectNotImplementedError(
721
+ exception = SnowparkConnectNotImplementedError(
706
722
  f"Source '{source}' is not currently supported by Catalog.createTable. "
707
723
  "Maybe default value through 'spark.sql.sources.default' should be set."
708
724
  )
725
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
726
+ raise exception
709
727
  if path != "":
710
728
  # External table creation is not supported currently.
711
- raise SnowparkConnectNotImplementedError(
729
+ exception = SnowparkConnectNotImplementedError(
712
730
  "External table creation is not supported currently."
713
731
  )
732
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
733
+ raise exception
714
734
 
715
735
  session = get_or_create_snowpark_session()
716
736
  # Managed table
717
737
  if schema.ByteSize() == 0:
718
- raise SnowparkConnectNotImplementedError(
738
+ exception = SnowparkConnectNotImplementedError(
719
739
  f"Unable to infer schema for {source.upper()}. It must be specified manually.",
720
740
  )
741
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
742
+ raise exception
721
743
  sp_schema = proto_to_snowpark_type(schema)
722
744
  columns = [c.name for c in schema.struct.fields]
723
745
  table_name_parts = split_fully_qualified_spark_name(tableName)
@@ -5,6 +5,8 @@
5
5
  from collections import defaultdict
6
6
 
7
7
  from snowflake.connector.errors import ProgrammingError
8
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
9
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
8
10
  from snowflake.snowpark_connect.relation.catalogs import CATALOGS, SNOWFLAKE_CATALOG
9
11
  from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
10
12
  AbstractSparkCatalog,
@@ -27,11 +29,15 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
27
29
 
28
30
  # Validate input parameters to match PySpark behavior
29
31
  if catalog_name is None:
30
- raise ValueError("Catalog name cannot be None")
32
+ exception = ValueError("Catalog name cannot be None")
33
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
34
+ raise exception
31
35
  if catalog_name == "":
32
- raise ValueError(
36
+ exception = ValueError(
33
37
  "Catalog '' plugin class not found: spark.sql.catalog. is not defined"
34
38
  )
39
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
40
+ raise exception
35
41
 
36
42
  CURRENT_CATALOG_NAME = catalog_name
37
43
  if catalog_name in CATALOGS:
@@ -42,9 +48,11 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
42
48
  sf_catalog.setCurrentDatabase(catalog_name if catalog_name is not None else "")
43
49
  return get_current_catalog()
44
50
  except ProgrammingError as e:
45
- raise Exception(
51
+ exception = Exception(
46
52
  f"Catalog '{catalog_name}' plugin class not found: spark.sql.catalog.{catalog_name} is not defined"
47
- ) from e
53
+ )
54
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
55
+ raise exception from e
48
56
 
49
57
 
50
58
  def _get_current_temp_objects() -> set[tuple[str | None, str | None, str]]:
@@ -4,6 +4,11 @@
4
4
 
5
5
  from urllib.parse import urlparse
6
6
 
7
+ from pyspark.errors.exceptions.base import AnalysisException
8
+
9
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
10
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
+
7
12
  CLOUD_PREFIX_TO_CLOUD = {
8
13
  "abfss": "azure",
9
14
  "wasbs": "azure",
@@ -12,10 +17,28 @@ CLOUD_PREFIX_TO_CLOUD = {
12
17
  }
13
18
 
14
19
  SUPPORTED_COMPRESSION_PER_FORMAT = {
15
- "csv": {"AUTO", "GZIP", "BZ2", "BROTLI", "ZSTD", "DEFLATE", "RAW_DEFLATE", "NONE"},
16
- "json": {"AUTO", "GZIP", "BZ2", "BROTLI", "ZSTD", "DEFLATE", "RAW_DEFLATE", "NONE"},
17
- "parquet": {"AUTO", "LZO", "SNAPPY", "NONE"},
18
- "text": {"NONE"},
20
+ "csv": {
21
+ "GZIP",
22
+ "BZ2",
23
+ "BROTLI",
24
+ "ZSTD",
25
+ "DEFLATE",
26
+ "RAW_DEFLATE",
27
+ "NONE",
28
+ "UNCOMPRESSED",
29
+ },
30
+ "json": {
31
+ "GZIP",
32
+ "BZ2",
33
+ "BROTLI",
34
+ "ZSTD",
35
+ "DEFLATE",
36
+ "RAW_DEFLATE",
37
+ "NONE",
38
+ "UNCOMPRESSED",
39
+ },
40
+ "parquet": {"LZO", "SNAPPY", "NONE", "UNCOMPRESSED"},
41
+ "text": {"NONE", "UNCOMPRESSED"},
19
42
  }
20
43
 
21
44
 
@@ -29,6 +52,45 @@ def is_supported_compression(format: str, compression: str | None) -> bool:
29
52
  return compression in supported_compressions_for_format(format)
30
53
 
31
54
 
55
+ def get_compression_for_source_and_options(
56
+ source: str, options: dict[str, str], from_read: bool = False
57
+ ) -> str | None:
58
+ """
59
+ Determines the compression type to use for a given data source and options.
60
+ Args:
61
+ source (str): The data source format (e.g., "csv", "json", "parquet", "text").
62
+ options (dict[str, str]): A dictionary of options that may include a "compression" key.
63
+ Returns:
64
+ str: The compression type to use (e.g., "GZIP", "SNAPPY", "NONE").
65
+ Raises:
66
+ AnalysisException: If the specified compression is not supported for the given source format.
67
+ """
68
+ # From read, we don't have a default compression
69
+ if from_read and "compression" not in options:
70
+ return None
71
+
72
+ # Get compression from options for proper filename generation
73
+ default_compression = "NONE" if source != "parquet" else "snappy"
74
+ compression = options.get("compression", default_compression).upper()
75
+ if compression == "UNCOMPRESSED":
76
+ compression = "NONE"
77
+
78
+ if not is_supported_compression(source, compression):
79
+ supported_compressions = supported_compressions_for_format(source)
80
+ exception = AnalysisException(
81
+ f"Compression {compression} is not supported for {source} format. "
82
+ + (
83
+ f"Supported compressions: {sorted(supported_compressions)}"
84
+ if supported_compressions
85
+ else "None compression supported for this format."
86
+ )
87
+ )
88
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
89
+ raise exception
90
+
91
+ return compression
92
+
93
+
32
94
  def get_cloud_from_url(
33
95
  url: str,
34
96
  ):
@@ -8,6 +8,8 @@ import pandas
8
8
  import pyspark.sql.connect.proto.catalog_pb2 as catalog_proto
9
9
 
10
10
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.relation.catalogs import CATALOGS
12
14
  from snowflake.snowpark_connect.relation.catalogs.utils import (
13
15
  CURRENT_CATALOG_NAME,
@@ -148,4 +150,6 @@ def map_catalog(
148
150
  return get_current_catalog().uncacheTable(rel.uncache_table.table_name)
149
151
  case other:
150
152
  # TODO: list_function implementation is blocked on SNOW-1787268
151
- raise SnowparkConnectNotImplementedError(f"Other Relation {other}")
153
+ exception = SnowparkConnectNotImplementedError(f"Other Relation {other}")
154
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
155
+ raise exception