snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -38,6 +38,8 @@ from snowflake.snowpark.types import (
38
38
  TimeType,
39
39
  _NumericType,
40
40
  )
41
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
42
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
41
43
  from snowflake.snowpark_connect.relation.read.utils import (
42
44
  DATA_SOURCE_SQL_COMMENT,
43
45
  Connection,
@@ -147,9 +149,11 @@ class JdbcDataFrameReader(DataFrameReader):
147
149
  or upper_bound is not None
148
150
  or num_partitions is not None
149
151
  ):
150
- raise ValueError(
152
+ exception = ValueError(
151
153
  "when column is not specified, lower_bound, upper_bound, num_partitions are expected to be None"
152
154
  )
155
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
156
+ raise exception
153
157
  if table is not None:
154
158
  partitioned_queries = []
155
159
  table_query = f"SELECT * FROM {table}"
@@ -160,24 +164,32 @@ class JdbcDataFrameReader(DataFrameReader):
160
164
  elif query is not None:
161
165
  partitioned_queries = [query]
162
166
  else:
163
- raise ValueError("table or query is not specified")
167
+ exception = ValueError("table or query is not specified")
168
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
169
+ raise exception
164
170
  else:
165
171
  if lower_bound is None or upper_bound is None or num_partitions is None:
166
- raise ValueError(
172
+ exception = ValueError(
167
173
  "when column is specified, lower_bound, upper_bound, num_partitions must be specified"
168
174
  )
175
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
176
+ raise exception
169
177
 
170
178
  column_type = None
171
179
  for field in struct_schema.fields:
172
180
  if field.name.lower() == column.lower():
173
181
  column_type = field.datatype
174
182
  if column_type is None:
175
- raise ValueError("Column does not exist")
183
+ exception = ValueError("Column does not exist")
184
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
185
+ raise exception
176
186
 
177
187
  if not isinstance(column_type, _NumericType) and not isinstance(
178
188
  column_type, DateType
179
189
  ):
180
- raise ValueError(f"unsupported type {column_type}")
190
+ exception = ValueError(f"unsupported type {column_type}")
191
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
192
+ raise exception
181
193
  spark_column_name = f'"{column}"'
182
194
  partitioned_queries = self._generate_partition(
183
195
  table,
@@ -240,7 +252,11 @@ class JdbcDataFrameReader(DataFrameReader):
240
252
  )
241
253
  query_thread_executor.shutdown(wait=False)
242
254
  upload_thread_executor.shutdown(wait=False)
243
- raise future.result()
255
+ exception = future.result()
256
+ attach_custom_error_code(
257
+ exception, ErrorCodes.INTERNAL_ERROR
258
+ )
259
+ raise exception
244
260
  else:
245
261
  path = future.result()
246
262
  if not path:
@@ -266,7 +282,11 @@ class JdbcDataFrameReader(DataFrameReader):
266
282
  )
267
283
  query_thread_executor.shutdown(wait=False)
268
284
  upload_thread_executor.shutdown(wait=False)
269
- raise f.result()
285
+ exception = f.result()
286
+ attach_custom_error_code(
287
+ exception, ErrorCodes.INTERNAL_ERROR
288
+ )
289
+ raise exception
270
290
  finally:
271
291
  close_connection(conn)
272
292
 
@@ -283,7 +303,9 @@ class JdbcDataFrameReader(DataFrameReader):
283
303
  elif query is not None:
284
304
  sql = f"SELECT * FROM ({query}) WHERE 1=0"
285
305
  else:
286
- raise ValueError("table or query is not specified")
306
+ exception = ValueError("table or query is not specified")
307
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
308
+ raise exception
287
309
 
288
310
  cursor = conn.cursor()
289
311
  cursor.execute(sql)
@@ -301,7 +323,11 @@ class JdbcDataFrameReader(DataFrameReader):
301
323
  dt = parser.parse(value)
302
324
  return int(dt.replace(tzinfo=pytz.UTC).timestamp())
303
325
  else:
304
- raise TypeError(f"unsupported column type for partition: {column_type}")
326
+ exception = TypeError(
327
+ f"unsupported column type for partition: {column_type}"
328
+ )
329
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
330
+ raise exception
305
331
 
306
332
  # this function is only used in data source API for SQL server
307
333
  def _to_external_value(self, value: Union[int, str, float], column_type: DataType):
@@ -311,7 +337,11 @@ class JdbcDataFrameReader(DataFrameReader):
311
337
  # TODO: SNOW-1909315: support timezone
312
338
  return datetime.datetime.fromtimestamp(value, tz=pytz.UTC)
313
339
  else:
314
- raise TypeError(f"unsupported column type for partition: {column_type}")
340
+ exception = TypeError(
341
+ f"unsupported column type for partition: {column_type}"
342
+ )
343
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
344
+ raise exception
315
345
 
316
346
  def _to_snowpark_type(self, schema: Tuple[tuple]) -> StructType:
317
347
  fields = []
@@ -339,7 +369,9 @@ class JdbcDataFrameReader(DataFrameReader):
339
369
  case jaydebeapi.BINARY:
340
370
  field = StructField(name, BinaryType(), is_nullable)
341
371
  case _:
342
- raise ValueError(f"unsupported type: {dbapi_type}")
372
+ exception = ValueError(f"unsupported type: {dbapi_type}")
373
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
374
+ raise exception
343
375
 
344
376
  fields.append(field)
345
377
  return StructType(fields)
@@ -359,7 +391,9 @@ class JdbcDataFrameReader(DataFrameReader):
359
391
  processed_lower_bound = self._to_internal_value(lower_bound, column_type)
360
392
  processed_upper_bound = self._to_internal_value(upper_bound, column_type)
361
393
  if processed_lower_bound > processed_upper_bound:
362
- raise ValueError("lower_bound cannot be greater than upper_bound")
394
+ exception = ValueError("lower_bound cannot be greater than upper_bound")
395
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
396
+ raise exception
363
397
 
364
398
  if processed_lower_bound == processed_upper_bound or num_partitions <= 1:
365
399
  return [select_query]
@@ -665,4 +699,6 @@ def get_jdbc_dialect(url: str) -> JdbcDialect:
665
699
  for jdbc_dialect in jdbc_dialects:
666
700
  if jdbc_dialect.can_handle(url):
667
701
  return jdbc_dialect
668
- raise ValueError(f"Unsupported JDBC datasource: {url}")
702
+ exception = ValueError(f"Unsupported JDBC datasource: {url}")
703
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
704
+ raise exception
@@ -15,6 +15,8 @@ from snowflake import snowpark
15
15
  from snowflake.snowpark.types import StructType
16
16
  from snowflake.snowpark_connect.config import global_config
17
17
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
18
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
19
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
18
20
  from snowflake.snowpark_connect.relation.io_utils import (
19
21
  convert_file_prefix_path,
20
22
  get_compression_for_source_and_options,
@@ -159,12 +161,20 @@ def map_read(
159
161
  options[DBTABLE_OPTION], session, rel.common.plan_id
160
162
  )
161
163
  case other:
162
- raise SnowparkConnectNotImplementedError(
164
+ exception = SnowparkConnectNotImplementedError(
163
165
  f"UNSUPPORTED FORMAT {other} WITH NO PATH"
164
166
  )
167
+ attach_custom_error_code(
168
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
169
+ )
170
+ raise exception
165
171
  case other:
166
172
  # TODO: Empty data source
167
- raise SnowparkConnectNotImplementedError(f"Unsupported read type: {other}")
173
+ exception = SnowparkConnectNotImplementedError(
174
+ f"Unsupported read type: {other}"
175
+ )
176
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
177
+ raise exception
168
178
 
169
179
  return df_cache_map_put_if_absent(
170
180
  (get_session_id(), rel.common.plan_id),
@@ -274,9 +284,11 @@ def _read_file(
274
284
 
275
285
  return map_read_text(rel, schema, session, paths)
276
286
  case _:
277
- raise SnowparkConnectNotImplementedError(
287
+ exception = SnowparkConnectNotImplementedError(
278
288
  f"Unsupported format: {read_format}"
279
289
  )
290
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
291
+ raise exception
280
292
 
281
293
 
282
294
  def _skip_upload(path: str, read_format: str):
@@ -12,6 +12,8 @@ from snowflake import snowpark
12
12
  from snowflake.snowpark.dataframe_reader import DataFrameReader
13
13
  from snowflake.snowpark.types import StringType, StructField, StructType
14
14
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
15
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
16
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
15
17
  from snowflake.snowpark_connect.relation.read.map_read import CsvReaderConfig
16
18
  from snowflake.snowpark_connect.relation.read.metadata_utils import (
17
19
  add_filename_metadata_to_reader,
@@ -43,9 +45,11 @@ def map_read_csv(
43
45
 
44
46
  if rel.read.is_streaming is True:
45
47
  # TODO: Structured streaming implementation.
46
- raise SnowparkConnectNotImplementedError(
48
+ exception = SnowparkConnectNotImplementedError(
47
49
  "Streaming is not supported for CSV files."
48
50
  )
51
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
52
+ raise exception
49
53
  else:
50
54
  snowpark_options = options.convert_to_snowpark_args()
51
55
  parse_header = snowpark_options.get("PARSE_HEADER", False)
@@ -188,14 +192,18 @@ def read_data(
188
192
 
189
193
  if schema is not None:
190
194
  if len(schema.fields) != len(non_metadata_fields):
191
- raise Exception(f"csv load from {filename} failed.")
195
+ exception = Exception(f"csv load from {filename} failed.")
196
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
197
+ raise exception
192
198
  if raw_options.get("enforceSchema", "True").lower() == "false":
193
199
  for i in range(len(schema.fields)):
194
200
  if (
195
201
  schema.fields[i].name != non_metadata_fields[i].name
196
202
  and f'"{schema.fields[i].name}"' != non_metadata_fields[i].name
197
203
  ):
198
- raise Exception("CSV header does not conform to the schema")
204
+ exception = Exception("CSV header does not conform to the schema")
205
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
206
+ raise exception
199
207
  return df
200
208
 
201
209
  headers = get_header_names(
@@ -9,6 +9,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
9
9
  from snowflake import snowpark
10
10
  from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
11
11
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
12
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
13
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
12
14
  from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDataFrameReader
13
15
  from snowflake.snowpark_connect.relation.read.utils import (
14
16
  Connection,
@@ -28,7 +30,9 @@ def create_connection(jdbc_options: dict[str, str]) -> Connection:
28
30
  return jaydebeapi.connect(driver, url, jdbc_options)
29
31
  except Exception as e:
30
32
  jpype.detachThreadFromJVM()
31
- raise Exception(f"Error connecting JDBC datasource: {e}")
33
+ exception = Exception(f"Error connecting JDBC datasource: {e}")
34
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
35
+ raise exception
32
36
 
33
37
 
34
38
  def close_connection(conn: Connection) -> None:
@@ -70,17 +74,23 @@ def map_read_jdbc(
70
74
  dbtable = None
71
75
 
72
76
  if not dbtable and not query:
73
- raise ValueError("Include dbtable or query is required option")
77
+ exception = ValueError("Include dbtable or query is required option")
78
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
79
+ raise exception
74
80
 
75
81
  if query is not None and dbtable is not None:
76
- raise ValueError(
82
+ exception = ValueError(
77
83
  "Not allowed to specify dbtable and query options at the same time"
78
84
  )
85
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
86
+ raise exception
79
87
 
80
88
  if query is not None and partition_column is not None:
81
- raise ValueError(
89
+ exception = ValueError(
82
90
  "Not allowed to specify partitionColumn and query options at the same time"
83
91
  )
92
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
93
+ raise exception
84
94
 
85
95
  try:
86
96
  df = JdbcDataFrameReader(session, jdbc_options).jdbc_read_dbapi(
@@ -105,4 +115,6 @@ def map_read_jdbc(
105
115
  snowpark_column_types=[f.datatype for f in df.schema.fields],
106
116
  )
107
117
  except Exception as e:
108
- raise Exception(f"Error accessing JDBC datasource for read: {e}")
118
+ exception = Exception(f"Error accessing JDBC datasource for read: {e}")
119
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
120
+ raise exception
@@ -28,6 +28,8 @@ from snowflake.snowpark.types import (
28
28
  TimestampType,
29
29
  )
30
30
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
31
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
32
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
31
33
  from snowflake.snowpark_connect.relation.read.map_read import JsonReaderConfig
32
34
  from snowflake.snowpark_connect.relation.read.metadata_utils import (
33
35
  add_filename_metadata_to_reader,
@@ -64,9 +66,11 @@ def map_read_json(
64
66
 
65
67
  if rel.read.is_streaming is True:
66
68
  # TODO: Structured streaming implementation.
67
- raise SnowparkConnectNotImplementedError(
69
+ exception = SnowparkConnectNotImplementedError(
68
70
  "Streaming is not supported for JSON files."
69
71
  )
72
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
73
+ raise exception
70
74
  else:
71
75
  snowpark_options = options.convert_to_snowpark_args()
72
76
  raw_options = rel.read.data_source.options
@@ -363,9 +367,11 @@ def construct_row_by_schema(
363
367
  content.get(col_name, None), sf.datatype, snowpark_options
364
368
  )
365
369
  else:
366
- raise SnowparkConnectNotImplementedError(
370
+ exception = SnowparkConnectNotImplementedError(
367
371
  f"JSON construct {str(content)} to StructType failed"
368
372
  )
373
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
374
+ raise exception
369
375
  return result
370
376
  elif isinstance(schema, ArrayType):
371
377
  result = []
@@ -22,6 +22,8 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
22
22
  from snowflake.snowpark.column import METADATA_FILENAME
23
23
  from snowflake.snowpark.types import DataType, DoubleType, IntegerType, StringType
24
24
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
25
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
26
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
25
27
  from snowflake.snowpark_connect.relation.read.metadata_utils import (
26
28
  add_filename_metadata_to_reader,
27
29
  )
@@ -44,9 +46,11 @@ def map_read_parquet(
44
46
  """Read a Parquet file into a Snowpark DataFrame."""
45
47
 
46
48
  if rel.read.is_streaming is True:
47
- raise SnowparkConnectNotImplementedError(
49
+ exception = SnowparkConnectNotImplementedError(
48
50
  "Streaming is not supported for Parquet files."
49
51
  )
52
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
53
+ raise exception
50
54
 
51
55
  snowpark_options = options.convert_to_snowpark_args()
52
56
  raw_options = rel.read.data_source.options
@@ -155,10 +159,14 @@ def _discover_partition_columns(
155
159
  if i not in dir_level_to_column_name:
156
160
  dir_level_to_column_name[i] = key
157
161
  elif dir_level_to_column_name[i] != key:
158
- raise ValueError(
162
+ exception = ValueError(
159
163
  f"Conflicting partition column names detected: '{dir_level_to_column_name[i]}' and '{key}' "
160
164
  f"at the same directory level"
161
165
  )
166
+ attach_custom_error_code(
167
+ exception, ErrorCodes.INVALID_OPERATION
168
+ )
169
+ raise exception
162
170
 
163
171
  partition_columns_values[key].add(value)
164
172
 
@@ -166,10 +174,12 @@ def _discover_partition_columns(
166
174
  for level in sorted(dir_level_to_column_name.keys()):
167
175
  col_name = dir_level_to_column_name[level]
168
176
  if col_name in seen_columns:
169
- raise ValueError(
177
+ exception = ValueError(
170
178
  f"Found partition column '{col_name}' at multiple directory levels. "
171
179
  f"A partition column can only appear at a single level."
172
180
  )
181
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
182
+ raise exception
173
183
  seen_columns.add(col_name)
174
184
 
175
185
  ordered_columns = [
@@ -9,6 +9,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
9
9
 
10
10
  from snowflake import snowpark
11
11
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
12
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
13
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
12
14
  from snowflake.snowpark_connect.utils.telemetry import (
13
15
  SnowparkConnectNotImplementedError,
14
16
  )
@@ -30,7 +32,9 @@ def map_read_socket(
30
32
  host = options.get("host", None)
31
33
  port = options.get("port", None)
32
34
  if not host or not port:
33
- raise ValueError("Host and port must be provided in options.")
35
+ exception = ValueError("Host and port must be provided in options.")
36
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
37
+ raise exception
34
38
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
35
39
  try:
36
40
  s.connect((host, int(port)))
@@ -56,8 +60,12 @@ def map_read_socket(
56
60
  snowpark_column_names=[snowpark_cname],
57
61
  )
58
62
  except OSError as e:
59
- raise Exception(f"Error connecting to {host}:{port} - {e}")
63
+ exception = Exception(f"Error connecting to {host}:{port} - {e}")
64
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
65
+ raise exception
60
66
  else:
61
- raise SnowparkConnectNotImplementedError(
67
+ exception = SnowparkConnectNotImplementedError(
62
68
  "Socket reads are only supported in streaming mode."
63
69
  )
70
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
71
+ raise exception
@@ -16,8 +16,11 @@ from snowflake.snowpark_connect.column_name_handler import (
16
16
  ColumnNameMap,
17
17
  make_column_names_snowpark_compatible,
18
18
  )
19
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
19
20
  from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
20
21
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
22
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
23
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
21
24
  from snowflake.snowpark_connect.relation.read.utils import (
22
25
  rename_columns_as_snowflake_standard,
23
26
  )
@@ -56,7 +59,7 @@ def post_process_df(
56
59
  spark_column_names=true_names,
57
60
  snowpark_column_names=snowpark_column_names,
58
61
  snowpark_column_types=[f.datatype for f in df.schema.fields],
59
- column_qualifiers=[name_parts] * len(true_names)
62
+ column_qualifiers=[{ColumnQualifier(tuple(name_parts))} for _ in true_names]
60
63
  if source_table_name
61
64
  else None,
62
65
  )
@@ -64,9 +67,11 @@ def post_process_df(
64
67
  # Check if this is a table/view not found error
65
68
  # Snowflake error codes: 002003 (42S02) - Object does not exist or not authorized
66
69
  if hasattr(e, "sql_error_code") and e.sql_error_code == 2003:
67
- raise AnalysisException(
70
+ exception = AnalysisException(
68
71
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view cannot be found. {source_table_name}"
69
- ) from None # Suppress original exception to reduce message size
72
+ )
73
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
74
+ raise exception from None # Suppress original exception to reduce message size
70
75
  # Re-raise if it's not a table not found error
71
76
  raise
72
77
 
@@ -90,8 +95,10 @@ def _get_temporary_view(
90
95
  spark_column_names=temp_view.column_map.get_spark_columns(),
91
96
  snowpark_column_names=snowpark_column_names,
92
97
  column_metadata=temp_view.column_map.column_metadata,
93
- column_qualifiers=[split_fully_qualified_spark_name(table_name)]
94
- * len(temp_view.column_map.get_spark_columns()),
98
+ column_qualifiers=[
99
+ {ColumnQualifier(tuple(split_fully_qualified_spark_name(table_name)))}
100
+ for _ in range(len(temp_view.column_map.get_spark_columns()))
101
+ ],
95
102
  parent_column_name_map=temp_view.column_map.get_parent_column_name_map(),
96
103
  )
97
104
 
@@ -118,9 +125,11 @@ def get_table_from_name(
118
125
 
119
126
  # Verify if recursive view read is not attempted
120
127
  if table_name in get_processed_views():
121
- raise AnalysisException(
128
+ exception = AnalysisException(
122
129
  f"[RECURSIVE_VIEW] Recursive view `{table_name}` detected (cycle: `{table_name}` -> `{table_name}`)"
123
130
  )
131
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
132
+ raise exception
124
133
 
125
134
  snowpark_name = ".".join(
126
135
  quote_name_without_upper_casing(part)
@@ -159,10 +168,14 @@ def map_read_table(
159
168
  and rel.read.data_source.format.lower() == "iceberg"
160
169
  ):
161
170
  if len(rel.read.data_source.paths) != 1:
162
- raise SnowparkConnectNotImplementedError(
171
+ exception = SnowparkConnectNotImplementedError(
163
172
  f"Unexpected paths: {rel.read.data_source.paths}"
164
173
  )
174
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
175
+ raise exception
165
176
  table_identifier = rel.read.data_source.paths[0]
166
177
  else:
167
- raise ValueError("The relation must have a table identifier.")
178
+ exception = ValueError("The relation must have a table identifier.")
179
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
180
+ raise exception
168
181
  return get_table_from_name(table_identifier, session, rel.common.plan_id)
@@ -8,6 +8,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
8
 
9
9
  from snowflake import snowpark
10
10
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.relation.read.utils import (
12
14
  get_spark_column_names_from_snowpark_columns,
13
15
  rename_columns_as_snowflake_standard,
@@ -82,9 +84,11 @@ def map_read_text(
82
84
  """
83
85
  if rel.read.is_streaming is True:
84
86
  # TODO: Structured streaming implementation.
85
- raise SnowparkConnectNotImplementedError(
87
+ exception = SnowparkConnectNotImplementedError(
86
88
  "Streaming is not supported for CSV files."
87
89
  )
90
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
91
+ raise exception
88
92
 
89
93
  df = read_text(paths[0], schema, session, rel.read.data_source.options)
90
94
  if len(paths) > 1:
@@ -16,6 +16,8 @@ from snowflake.snowpark.column import METADATA_FILENAME
16
16
  from snowflake.snowpark.functions import col
17
17
  from snowflake.snowpark.types import StructField
18
18
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
19
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
20
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
21
 
20
22
  # Constant for the metadata filename column name
21
23
  METADATA_FILENAME_COLUMN = "METADATA$FILENAME"
@@ -129,9 +131,11 @@ def filter_metadata_columns(
129
131
  if len(non_metadata_columns) == 0:
130
132
  # DataFrame contains only metadata columns (METADATA$FILENAME), no actual data columns remaining.
131
133
  # We don't have a way to return an empty dataframe.
132
- raise AnalysisException(
134
+ exception = AnalysisException(
133
135
  "[DATAFRAME_MISSING_DATA_COLUMNS] Cannot perform operation on DataFrame that contains no data columns."
134
136
  )
137
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
138
+ raise exception
135
139
 
136
140
  filtered_df = result_df.select([col(name) for name in non_metadata_columns])
137
141
 
@@ -11,6 +11,8 @@ from s3fs.core import S3FileSystem
11
11
  from snowflake import snowpark
12
12
  from snowflake.snowpark.session import Session
13
13
  from snowflake.snowpark_connect.config import sessions_config
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.relation.io_utils import (
15
17
  get_cloud_from_url,
16
18
  parse_azure_url,
@@ -42,9 +44,11 @@ def get_paths_from_stage(
42
44
  rewrite_paths.append(f"{stage_name}/{path}")
43
45
  paths = rewrite_paths
44
46
  case "gcp":
45
- raise AnalysisException(
47
+ exception = AnalysisException(
46
48
  "You must configure an integration for Google Cloud Storage to perform I/O operations rather than accessing the URL directly. Reference: https://docs.snowflake.com/en/user-guide/data-load-gcs-config"
47
49
  )
50
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
51
+ raise exception
48
52
  case _:
49
53
  filesystem, parsed_path = url_to_fs(paths[0])
50
54
  if isinstance(filesystem, S3FileSystem): # aws
@@ -11,6 +11,8 @@ from snowflake import snowpark
11
11
  from snowflake.snowpark import DataFrameWriter
12
12
  from snowflake.snowpark.dataframe import DataFrame
13
13
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.relation.read import jdbc_read_dbapi
15
17
  from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDialect
16
18
  from snowflake.snowpark_connect.relation.read.utils import Connection
@@ -65,9 +67,13 @@ class JdbcDataFrameWriter(DataFrameWriter):
65
67
  self._create_table(conn, table, container, jdbc_dialect)
66
68
  case "errorifexists":
67
69
  if table_exist:
68
- raise ValueError(
70
+ exception = ValueError(
69
71
  "table is already exist and write mode is ERROR_IF_EXISTS"
70
72
  )
73
+ attach_custom_error_code(
74
+ exception, ErrorCodes.INVALID_OPERATION
75
+ )
76
+ raise exception
71
77
  else:
72
78
  self._create_table(conn, table, container, jdbc_dialect)
73
79
  case "overwrite":
@@ -82,7 +88,9 @@ class JdbcDataFrameWriter(DataFrameWriter):
82
88
  else:
83
89
  self._create_table(conn, table, container, jdbc_dialect)
84
90
  case _:
85
- raise ValueError(f"Invalid write mode value{write_mode}")
91
+ exception = ValueError(f"Invalid write mode value{write_mode}")
92
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
93
+ raise exception
86
94
 
87
95
  task_insert_into_data_source_with_retry(
88
96
  input_df,
@@ -141,6 +149,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
141
149
  cursor.execute(sql)
142
150
  except Exception as e:
143
151
  logger.error(f"failed to drop table {table} from the data source {e}")
152
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
144
153
  raise e
145
154
 
146
155
  def _create_table(
@@ -189,6 +198,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
189
198
  cursor.execute(sql)
190
199
  except Exception as e:
191
200
  logger.error(f"failed to create a table {table} from the data source {e}")
201
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
192
202
  raise e
193
203
 
194
204
 
@@ -218,6 +228,7 @@ def _task_insert_into_data_source(
218
228
  except Exception as e:
219
229
  logger.debug(f"failed to insert into data source {e}")
220
230
  conn.rollback()
231
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
221
232
  raise e
222
233
  finally:
223
234
  cursor.close()
@@ -274,6 +285,7 @@ def task_insert_into_data_source_with_retry(
274
285
  )
275
286
  except Exception as e:
276
287
  logger.debug(f"failed to insert into data source {e}")
288
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
277
289
  raise e
278
290
  finally:
279
291
  close_connection(conn)
@@ -339,4 +351,8 @@ def convert_sp_to_sql_type(
339
351
  case _:
340
352
  return "TIMESTAMP"
341
353
  case _:
342
- raise TypeError(f"Unsupported data type: {datatype.__class__.__name__}")
354
+ exception = TypeError(
355
+ f"Unsupported data type: {datatype.__class__.__name__}"
356
+ )
357
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
358
+ raise exception