snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,8 @@ from snowflake.snowpark.types import (
13
13
  YearMonthIntervalType,
14
14
  )
15
15
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
16
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
17
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
18
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
17
19
  from snowflake.snowpark_connect.typed_column import TypedColumn
18
20
  from snowflake.snowpark_connect.utils.context import (
@@ -78,9 +80,11 @@ def map_extension(
78
80
  elif value.HasField("unresolved_attribute"):
79
81
  name = "__" + key + "__" + exp_name[0]
80
82
  else:
81
- raise SnowparkConnectNotImplementedError(
83
+ exception = SnowparkConnectNotImplementedError(
82
84
  "Named argument not supported yet for this input."
83
85
  )
86
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
87
+ raise exception
84
88
  return [name], typed_col
85
89
 
86
90
  case "interval_literal":
@@ -152,9 +156,11 @@ def map_extension(
152
156
 
153
157
  queries = df.queries["queries"]
154
158
  if len(queries) != 1:
155
- raise SnowparkConnectNotImplementedError(
159
+ exception = SnowparkConnectNotImplementedError(
156
160
  f"Unexpected number of queries: {len(queries)}"
157
161
  )
162
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
163
+ raise exception
158
164
  query = f"({queries[0]})"
159
165
 
160
166
  match extension.subquery_expression.subquery_type:
@@ -168,7 +174,13 @@ def map_extension(
168
174
  result_type = BooleanType()
169
175
  case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_TABLE_ARG:
170
176
  # TODO: Currently, map_sql.py handles this, so we never end up here.
171
- raise SnowparkConnectNotImplementedError("Unexpected table arg")
177
+ exception = SnowparkConnectNotImplementedError(
178
+ "Unexpected table arg"
179
+ )
180
+ attach_custom_error_code(
181
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
182
+ )
183
+ raise exception
172
184
  case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN:
173
185
  cols = [
174
186
  map_expression(e, column_mapping, typer)
@@ -184,14 +196,22 @@ def map_extension(
184
196
  )
185
197
  result_type = BooleanType()
186
198
  case other:
187
- raise SnowparkConnectNotImplementedError(
199
+ exception = SnowparkConnectNotImplementedError(
188
200
  f"Unexpected subquery type: {other}"
189
201
  )
202
+ attach_custom_error_code(
203
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
204
+ )
205
+ raise exception
190
206
 
191
207
  return [name], TypedColumn(result_exp, lambda: [result_type])
192
208
 
193
209
  case other:
194
- raise SnowparkConnectNotImplementedError(f"Unexpected extension {other}")
210
+ exception = SnowparkConnectNotImplementedError(
211
+ f"Unexpected extension {other}"
212
+ )
213
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
214
+ raise exception
195
215
 
196
216
 
197
217
  def _format_year_month_interval(
@@ -257,6 +277,9 @@ def _format_day_time_interval(
257
277
  if is_negative:
258
278
  days = -days
259
279
 
280
+ # Calculate days string representation (handle -0 case)
281
+ days_str = "-0" if (is_negative and days == 0) else str(days)
282
+
260
283
  # Format based on the specific start/end field context
261
284
  if (
262
285
  start_field == DayTimeIntervalType.DAY and end_field == DayTimeIntervalType.DAY
@@ -324,7 +347,10 @@ def _format_day_time_interval(
324
347
  start_field == DayTimeIntervalType.HOUR
325
348
  and end_field == DayTimeIntervalType.MINUTE
326
349
  ): # HOUR TO MINUTE
327
- str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
350
+ if is_negative:
351
+ str_value = f"INTERVAL '-{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
352
+ else:
353
+ str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
328
354
  elif (
329
355
  start_field == DayTimeIntervalType.HOUR
330
356
  and end_field == DayTimeIntervalType.SECOND
@@ -348,21 +374,21 @@ def _format_day_time_interval(
348
374
  and end_field == DayTimeIntervalType.SECOND
349
375
  ): # DAY TO SECOND
350
376
  if seconds == int(seconds):
351
- str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
377
+ str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
352
378
  else:
353
379
  seconds_str = _format_seconds_precise(seconds)
354
- str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
380
+ str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
355
381
  else:
356
382
  # Fallback - use smart formatting like the original literal.py logic
357
- if days > 0:
383
+ if days >= 0:
358
384
  if hours == 0 and minutes == 0 and seconds == 0:
359
385
  str_value = f"INTERVAL '{int(days)}' DAY"
360
386
  else:
361
387
  if seconds == int(seconds):
362
- str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
388
+ str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
363
389
  else:
364
390
  seconds_str = _format_seconds_precise(seconds)
365
- str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
391
+ str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
366
392
  elif hours > 0:
367
393
  if minutes == 0 and seconds == 0:
368
394
  str_value = f"INTERVAL '{_format_time_component(hours)}' HOUR"
@@ -18,8 +18,11 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quot
18
18
  from snowflake.snowpark.types import DayTimeIntervalType, YearMonthIntervalType
19
19
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
20
20
  from snowflake.snowpark_connect.config import global_config
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
21
23
  from snowflake.snowpark_connect.typed_column import TypedColumn
22
24
  from snowflake.snowpark_connect.utils.context import (
25
+ get_jpype_jclass_lock,
23
26
  get_sql_named_arg,
24
27
  get_sql_plan,
25
28
  get_sql_pos_arg,
@@ -73,17 +76,20 @@ def sql_parser():
73
76
 
74
77
  @cache
75
78
  def _get_sql_parser():
76
- return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
79
+ with get_jpype_jclass_lock():
80
+ return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
77
81
 
78
82
 
79
83
  @cache
80
84
  def _get_sql_conf():
81
- return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
85
+ with get_jpype_jclass_lock():
86
+ return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
82
87
 
83
88
 
84
89
  @cache
85
90
  def _as_java_list():
86
- return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
91
+ with get_jpype_jclass_lock():
92
+ return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
87
93
 
88
94
 
89
95
  def as_java_list(obj):
@@ -92,7 +98,8 @@ def as_java_list(obj):
92
98
 
93
99
  @cache
94
100
  def _as_java_map():
95
- return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
101
+ with get_jpype_jclass_lock():
102
+ return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
96
103
 
97
104
 
98
105
  def as_java_map(obj):
@@ -253,12 +260,47 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
253
260
  class_name = str(exp.getClass().getSimpleName())
254
261
  match class_name:
255
262
  case "AggregateExpression":
256
- func_name = as_java_list(exp.children())[0].nodeName()
263
+ aggregate_func = as_java_list(exp.children())[0]
264
+ func_name = aggregate_func.nodeName()
257
265
  args = [
258
266
  map_logical_plan_expression(e)
259
- for e in as_java_list(as_java_list(exp.children())[0].children())
267
+ for e in list(as_java_list(aggregate_func.children()))
260
268
  ]
261
- proto = apply_filter_clause(func_name, args, exp)
269
+
270
+ # Special handling for percentile_cont and percentile_disc
271
+ # These functions have a 'reverse' property that indicates sort order
272
+ # Pass it as a 3rd argument (sort_order expression) without modifying children
273
+ if func_name.lower() in ("percentile_cont", "percentiledisc"):
274
+ # percentile_cont/disc should always have exactly 2 children: unresolved attribute and percentile value
275
+ if len(args) != 2:
276
+ exception = AssertionError(
277
+ f"{func_name} expected 2 args but got {len(args)}"
278
+ )
279
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
280
+ raise exception
281
+
282
+ reverse = bool(aggregate_func.reverse())
283
+
284
+ direction = (
285
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
286
+ if reverse
287
+ else expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
288
+ )
289
+
290
+ sort_order_expr = expressions_proto.Expression(
291
+ sort_order=expressions_proto.Expression.SortOrder(
292
+ child=args[0],
293
+ direction=direction,
294
+ )
295
+ )
296
+ args.append(sort_order_expr)
297
+ proto = apply_filter_clause(func_name, [args[0]], exp)
298
+ # second arg is a literal value and it doesn't make sense to apply filter on it.
299
+ # also skips filtering on sort_order.
300
+ proto.unresolved_function.arguments.append(args[1])
301
+ proto.unresolved_function.arguments.append(sort_order_expr)
302
+ else:
303
+ proto = apply_filter_clause(func_name, args, exp)
262
304
  case "Alias":
263
305
  proto = expressions_proto.Expression(
264
306
  alias=expressions_proto.Expression.Alias(
@@ -275,7 +317,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
275
317
  function_name="when",
276
318
  arguments=[
277
319
  map_logical_plan_expression(e)
278
- for e in as_java_list(exp.children())
320
+ for e in list(as_java_list(exp.children()))
279
321
  ],
280
322
  )
281
323
  )
@@ -289,7 +331,8 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
289
331
  )
290
332
  case "Coalesce":
291
333
  arguments = [
292
- map_logical_plan_expression(e) for e in as_java_list(exp.children())
334
+ map_logical_plan_expression(e)
335
+ for e in list(as_java_list(exp.children()))
293
336
  ]
294
337
 
295
338
  proto = expressions_proto.Expression(
@@ -357,7 +400,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
357
400
  subquery_type=snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN,
358
401
  in_subquery_values=[
359
402
  map_logical_plan_expression(value)
360
- for value in as_java_list(exp.values())
403
+ for value in list(as_java_list(exp.values()))
361
404
  ],
362
405
  )
363
406
  )
@@ -366,7 +409,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
366
409
  case "LambdaFunction":
367
410
  arguments = [
368
411
  map_logical_plan_expression(arg).unresolved_named_lambda_variable
369
- for arg in as_java_list(exp.arguments())
412
+ for arg in list(as_java_list(exp.arguments()))
370
413
  ]
371
414
  proto = expressions_proto.Expression(
372
415
  lambda_function=expressions_proto.Expression.LambdaFunction(
@@ -380,14 +423,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
380
423
  function_name=class_name.lower(),
381
424
  arguments=[
382
425
  map_logical_plan_expression(e)
383
- for e in as_java_list(exp.children())
426
+ for e in list(as_java_list(exp.children()))
384
427
  ],
385
428
  )
386
429
  )
387
430
  case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
388
- patterns = as_java_list(exp.patterns())
431
+ patterns = list(as_java_list(exp.patterns()))
389
432
  arguments = [
390
- map_logical_plan_expression(e) for e in as_java_list(exp.children())
433
+ map_logical_plan_expression(e)
434
+ for e in list(as_java_list(exp.children()))
391
435
  ]
392
436
  arguments += [map_logical_plan_expression(e) for e in patterns]
393
437
  proto = expressions_proto.Expression(
@@ -421,19 +465,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
421
465
  end_field = _YEAR_MONTH_FIELD_MAP.get(end_field_name)
422
466
 
423
467
  if start_field is None:
424
- raise AnalysisException(
468
+ exception = AnalysisException(
425
469
  f"Invalid year-month interval start field: '{start_field_name}'. Expected 'year' or 'month'."
426
470
  )
471
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
472
+ raise exception
427
473
  if end_field is None:
428
- raise AnalysisException(
474
+ exception = AnalysisException(
429
475
  f"Invalid year-month interval end field: '{end_field_name}'. Expected 'year' or 'month'."
430
476
  )
477
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
478
+ raise exception
431
479
 
432
480
  # Validate field ordering (start_field should be <= end_field)
433
481
  if start_field > end_field:
434
- raise AnalysisException(
482
+ exception = AnalysisException(
435
483
  f"Invalid year-month interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
436
484
  )
485
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
486
+ raise exception
437
487
 
438
488
  # Use extension for year-month intervals to preserve start/end field info
439
489
  literal = expressions_proto.Expression.Literal(
@@ -466,19 +516,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
466
516
  end_field = _DAY_TIME_FIELD_MAP.get(end_field_name)
467
517
 
468
518
  if start_field is None:
469
- raise AnalysisException(
519
+ exception = AnalysisException(
470
520
  f"Invalid day-time interval start field: '{start_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
471
521
  )
522
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
523
+ raise exception
472
524
  if end_field is None:
473
- raise AnalysisException(
525
+ exception = AnalysisException(
474
526
  f"Invalid day-time interval end field: '{end_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
475
527
  )
528
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
529
+ raise exception
476
530
 
477
531
  # Validate field ordering (start_field should be <= end_field)
478
532
  if start_field > end_field:
479
- raise AnalysisException(
533
+ exception = AnalysisException(
480
534
  f"Invalid day-time interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
481
535
  )
536
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
537
+ raise exception
482
538
 
483
539
  # Use extension for day-time intervals to preserve start/end field info
484
540
  literal = expressions_proto.Expression.Literal(
@@ -534,19 +590,27 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
534
590
  name = str(exp.name())
535
591
  value = get_sql_named_arg(name)
536
592
  if not value.HasField("literal_type"):
537
- raise AnalysisException(f"Found an unbound parameter {name!r}")
593
+ exception = AnalysisException(f"Found an unbound parameter {name!r}")
594
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
595
+ raise exception
538
596
  proto = expressions_proto.Expression(literal=value)
539
597
  case "NamePlaceholder$":
540
598
  # This is a placeholder for an expression name to be resolved later.
541
- raise SnowparkConnectNotImplementedError(
599
+ exception = SnowparkConnectNotImplementedError(
542
600
  "NamePlaceholder is not supported in SQL expressions."
543
601
  )
602
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
603
+ raise exception
544
604
  case "PosParameter":
545
605
  pos = exp.pos()
546
606
  try:
547
607
  value = get_sql_pos_arg(pos)
548
608
  except KeyError:
549
- raise AnalysisException(f"Found an unbound parameter at position {pos}")
609
+ exception = AnalysisException(
610
+ f"Found an unbound parameter at position {pos}"
611
+ )
612
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
613
+ raise exception
550
614
  proto = expressions_proto.Expression(literal=value)
551
615
  case "ScalarSubquery":
552
616
  rel_proto = map_logical_plan_relation(exp.plan())
@@ -616,7 +680,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
616
680
  ]
617
681
  + [
618
682
  map_logical_plan_expression(e)
619
- for e in as_java_list(exp.children())
683
+ for e in list(as_java_list(exp.children()))
620
684
  ],
621
685
  )
622
686
  )
@@ -659,18 +723,20 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
659
723
  )
660
724
  case "UnresolvedFunction":
661
725
  func_name = ".".join(
662
- str(part) for part in as_java_list(exp.nameParts())
726
+ str(part) for part in list(as_java_list(exp.nameParts()))
663
727
  ).lower()
664
728
  args = [
665
729
  map_logical_plan_expression(arg)
666
- for arg in as_java_list(exp.arguments())
730
+ for arg in list(as_java_list(exp.arguments()))
667
731
  ]
668
732
 
669
733
  proto = apply_filter_clause(func_name, args, exp, exp.isDistinct())
670
734
  case "UnresolvedNamedLambdaVariable":
671
735
  proto = expressions_proto.Expression(
672
736
  unresolved_named_lambda_variable=expressions_proto.Expression.UnresolvedNamedLambdaVariable(
673
- name_parts=[str(part) for part in as_java_list(exp.nameParts())],
737
+ name_parts=[
738
+ str(part) for part in list(as_java_list(exp.nameParts()))
739
+ ],
674
740
  )
675
741
  )
676
742
  case "UnresolvedStar":
@@ -691,9 +757,11 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
691
757
  # Build Window expression
692
758
  proto = get_window_expression_proto(window_spec, exp.child())
693
759
  else:
694
- raise AnalysisException(
760
+ exception = AnalysisException(
695
761
  f"Window specification not found {window_spec_reference!r}"
696
762
  )
763
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
764
+ raise exception
697
765
  case "UTF8String":
698
766
  proto = expressions_proto.Expression(
699
767
  literal=expressions_proto.Expression.Literal(
@@ -723,13 +791,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
723
791
  function_name=proto_func,
724
792
  arguments=[
725
793
  map_logical_plan_expression(arg)
726
- for arg in as_java_list(exp.children())
794
+ for arg in list(as_java_list(exp.children()))
727
795
  ],
728
796
  )
729
797
  )
730
798
 
731
799
  case other:
732
- raise SnowparkConnectNotImplementedError(f"Not implemented: {other}")
800
+ exception = SnowparkConnectNotImplementedError(f"Not implemented: {other}")
801
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
802
+ raise exception
733
803
 
734
804
  return proto
735
805
 
@@ -752,11 +822,11 @@ def get_window_expression_proto(
752
822
  window_function=map_logical_plan_expression(window_function),
753
823
  partition_spec=[
754
824
  map_logical_plan_expression(e)
755
- for e in as_java_list(window_spec.partitionSpec())
825
+ for e in list(as_java_list(window_spec.partitionSpec()))
756
826
  ],
757
827
  order_spec=[
758
828
  map_logical_plan_expression(e).sort_order
759
- for e in as_java_list(window_spec.orderSpec())
829
+ for e in list(as_java_list(window_spec.orderSpec()))
760
830
  ],
761
831
  frame_spec=frame_spec_proto,
762
832
  )
@@ -10,6 +10,8 @@ from snowflake import snowpark
10
10
  from snowflake.snowpark.types import MapType, StructType, VariantType
11
11
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
12
12
  from snowflake.snowpark_connect.config import global_config
13
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
14
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
13
15
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
14
16
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
15
17
  from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -54,7 +56,11 @@ def cache_external_udf_wrapper(from_register_udf: bool):
54
56
  case "python_udf":
55
57
  pass
56
58
  case _:
57
- raise ValueError(f"Unsupported UDF type: {function_type}")
59
+ exception = ValueError(f"Unsupported UDF type: {function_type}")
60
+ attach_custom_error_code(
61
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
62
+ )
63
+ raise exception
58
64
 
59
65
  return cached_udf
60
66
 
@@ -97,9 +103,11 @@ def register_udf(
97
103
  case "scalar_scala_udf":
98
104
  output_type = udf_proto.scalar_scala_udf.outputType
99
105
  case _:
100
- raise ValueError(
106
+ exception = ValueError(
101
107
  f"Unsupported UDF type: {udf_proto.WhichOneof('function')}"
102
108
  )
109
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
110
+ raise exception
103
111
  processed_return_type, original_return_type = process_udf_return_type(output_type)
104
112
  session = get_or_create_snowpark_session()
105
113
  kwargs = {
@@ -15,6 +15,8 @@ from snowflake.snowpark.exceptions import SnowparkSQLException
15
15
  from snowflake.snowpark.types import ArrayType, LongType, MapType, StructType
16
16
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
17
17
  from snowflake.snowpark_connect.config import global_config
18
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
19
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
18
20
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
19
21
  from snowflake.snowpark_connect.typed_column import TypedColumn
20
22
  from snowflake.snowpark_connect.utils.context import (
@@ -85,9 +87,11 @@ def map_unresolved_attribute(
85
87
  grouping_spark_columns = get_current_grouping_columns()
86
88
  if not grouping_spark_columns:
87
89
  # grouping__id can only be used with GROUP BY CUBE/ROLLUP/GROUPING SETS
88
- raise AnalysisException(
90
+ exception = AnalysisException(
89
91
  "[MISSING_GROUP_BY] grouping__id can only be used with GROUP BY (CUBE | ROLLUP | GROUPING SETS)"
90
92
  )
93
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
94
+ raise exception
91
95
  # Convert to GROUPING_ID() function call with the grouping columns
92
96
  # Map Spark column names to Snowpark column names
93
97
  snowpark_cols = []
@@ -99,9 +103,11 @@ def map_unresolved_attribute(
99
103
  )
100
104
  )
101
105
  if not snowpark_name:
102
- raise AnalysisException(
106
+ exception = AnalysisException(
103
107
  f"[INTERNAL_ERROR] Cannot find Snowpark column mapping for grouping column '{spark_col_name}'"
104
108
  )
109
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
110
+ raise exception
105
111
  snowpark_cols.append(snowpark_fn.col(snowpark_name))
106
112
 
107
113
  # Call GROUPING_ID with all grouping columns using Snowpark names
@@ -155,10 +161,12 @@ def map_unresolved_attribute(
155
161
 
156
162
  if is_catalog:
157
163
  # This looks like a catalog.database.column.field pattern
158
- raise AnalysisException(
164
+ exception = AnalysisException(
159
165
  f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{original_attr_name}` cannot be resolved. "
160
166
  f"Cross-catalog column references are not supported in DataFrame API."
161
167
  )
168
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
169
+ raise exception
162
170
 
163
171
  attr_name = ".".join(name_parts)
164
172
 
@@ -205,18 +213,24 @@ def map_unresolved_attribute(
205
213
  if compiled_regex.fullmatch(col_name):
206
214
  matched_columns.append(col_name)
207
215
  except re.error as e:
208
- raise AnalysisException(f"Invalid regex pattern '{regex_pattern}': {e}")
216
+ exception = AnalysisException(
217
+ f"Invalid regex pattern '{regex_pattern}': {e}"
218
+ )
219
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
220
+ raise exception
209
221
 
210
222
  if not matched_columns:
211
223
  # Keep the improved error message for SQL regex patterns
212
224
  # This is only hit for SQL queries like SELECT `(e|f)` FROM table
213
225
  # when spark.sql.parser.quotedRegexColumnNames is enabled
214
- raise AnalysisException(
226
+ exception = AnalysisException(
215
227
  f"No columns match the regex pattern '{regex_pattern}'. "
216
228
  f"Snowflake SQL does not support SELECT statements with no columns. "
217
229
  f"Please ensure your regex pattern matches at least one column. "
218
230
  f"Available columns: {', '.join(available_columns[:10])}{'...' if len(available_columns) > 10 else ''}"
219
231
  )
232
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
233
+ raise exception
220
234
 
221
235
  # When multiple columns match, we need to signal that this should expand to multiple columns
222
236
  # Since map_unresolved_attribute can only return one column, we'll use a special marker
@@ -233,7 +247,7 @@ def map_unresolved_attribute(
233
247
  )
234
248
  )
235
249
  col = get_col(snowpark_name)
236
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
250
+ qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
237
251
  typed_col = TypedColumn(col, lambda: typer.type(col))
238
252
  typed_col.set_qualifiers(qualifiers)
239
253
  # Store matched columns info for later use
@@ -248,7 +262,7 @@ def map_unresolved_attribute(
248
262
  )
249
263
  )
250
264
  col = get_col(snowpark_name)
251
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
265
+ qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
252
266
  typed_col = TypedColumn(col, lambda: typer.type(col))
253
267
  typed_col.set_qualifiers(qualifiers)
254
268
  return (matched_columns[0], typed_col)
@@ -266,7 +280,7 @@ def map_unresolved_attribute(
266
280
  )
267
281
  if snowpark_name is not None:
268
282
  col = get_col(snowpark_name)
269
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_attr_name)
283
+ qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_attr_name)
270
284
  else:
271
285
  # this means it has to be a struct column with a field name
272
286
  snowpark_name: str | None = None
@@ -324,7 +338,7 @@ def map_unresolved_attribute(
324
338
  )
325
339
  if snowpark_name is not None:
326
340
  col = get_col(snowpark_name)
327
- qualifiers = column_mapping.get_qualifier_for_spark_column(
341
+ qualifiers = column_mapping.get_qualifiers_for_spark_column(
328
342
  unqualified_name
329
343
  )
330
344
  typed_col = TypedColumn(col, lambda: typer.type(col))
@@ -346,16 +360,22 @@ def map_unresolved_attribute(
346
360
  )
347
361
  if outer_col_name:
348
362
  # This is an outer scope column being referenced inside a lambda
349
- raise AnalysisException(
363
+ exception = AnalysisException(
350
364
  f"Reference to non-lambda variable '{attr_name}' within lambda function. "
351
365
  f"Lambda functions can only access their own parameters. "
352
366
  f"If '{attr_name}' is a table column, it must be passed as an explicit parameter to the enclosing function."
353
367
  )
368
+ attach_custom_error_code(
369
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
370
+ )
371
+ raise exception
354
372
 
355
373
  if has_plan_id:
356
- raise AnalysisException(
374
+ exception = AnalysisException(
357
375
  f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
358
376
  )
377
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
378
+ raise exception
359
379
  else:
360
380
  # Column does not exist. Pass in dummy column name for lazy error throwing as it could be a built-in function
361
381
  snowpark_name = attr_name
@@ -365,9 +385,11 @@ def map_unresolved_attribute(
365
385
  col_type = typer.type(col)[0]
366
386
  except SnowparkSQLException as e:
367
387
  if e.raw_message is not None and "invalid identifier" in e.raw_message:
368
- raise AnalysisException(
388
+ exception = AnalysisException(
369
389
  f'[COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
370
390
  )
391
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
392
+ raise exception
371
393
  else:
372
394
  raise
373
395
  is_struct = isinstance(col_type, StructType)
@@ -383,7 +405,7 @@ def map_unresolved_attribute(
383
405
  for field_name in path:
384
406
  col = col.getItem(field_name)
385
407
 
386
- qualifiers = []
408
+ qualifiers = set()
387
409
 
388
410
  typed_col = TypedColumn(col, lambda: typer.type(col))
389
411
  typed_col.set_qualifiers(qualifiers)
@@ -416,7 +438,9 @@ def _match_path_to_struct(path: list[str], col_type: StructType) -> list[str]:
416
438
  typ = typ.value_type if isinstance(typ, MapType) else typ.element_type
417
439
  else:
418
440
  # If the type is not a struct, map, or array, we cannot access the field.
419
- raise AnalysisException(
441
+ exception = AnalysisException(
420
442
  f"[INVALID_EXTRACT_BASE_FIELD_TYPE] Can't extract a value from \"{'.'.join(path[:i])}\". Need a complex type [STRUCT, ARRAY, MAP] but got \"{typ}\"."
421
443
  )
444
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
445
+ raise exception
422
446
  return adjusted_path