snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (81) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +150 -25
  2. snowflake/snowpark_connect/config.py +54 -16
  3. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  4. snowflake/snowpark_connect/error/error_codes.py +50 -0
  5. snowflake/snowpark_connect/error/error_utils.py +142 -22
  6. snowflake/snowpark_connect/error/exceptions.py +13 -4
  7. snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
  8. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  9. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  10. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  11. snowflake/snowpark_connect/expression/literal.py +7 -1
  12. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  13. snowflake/snowpark_connect/expression/map_expression.py +48 -4
  14. snowflake/snowpark_connect/expression/map_extension.py +25 -5
  15. snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
  16. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  17. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
  18. snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
  19. snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
  20. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  21. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  22. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
  26. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  27. snowflake/snowpark_connect/relation/io_utils.py +66 -4
  28. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  29. snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
  30. snowflake/snowpark_connect/relation/map_extension.py +28 -8
  31. snowflake/snowpark_connect/relation/map_join.py +21 -10
  32. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  33. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  34. snowflake/snowpark_connect/relation/map_row_ops.py +36 -9
  35. snowflake/snowpark_connect/relation/map_sql.py +91 -24
  36. snowflake/snowpark_connect/relation/map_stats.py +25 -6
  37. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  38. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  39. snowflake/snowpark_connect/relation/read/map_read.py +24 -3
  40. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  41. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  42. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  43. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  44. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  45. snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
  46. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  47. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  48. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  49. snowflake/snowpark_connect/relation/utils.py +19 -2
  50. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  51. snowflake/snowpark_connect/relation/write/map_write.py +146 -63
  52. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  53. snowflake/snowpark_connect/resources_initializer.py +5 -1
  54. snowflake/snowpark_connect/server.py +72 -19
  55. snowflake/snowpark_connect/type_mapping.py +54 -17
  56. snowflake/snowpark_connect/utils/context.py +42 -1
  57. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  58. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  59. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  60. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  61. snowflake/snowpark_connect/utils/profiling.py +25 -8
  62. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  63. snowflake/snowpark_connect/utils/session.py +5 -2
  64. snowflake/snowpark_connect/utils/telemetry.py +81 -18
  65. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  66. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  67. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  68. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  69. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  70. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  71. snowflake/snowpark_connect/version.py +1 -1
  72. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
  73. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +81 -78
  74. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
  75. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
  76. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
  77. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
  78. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
  79. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
  80. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
  81. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,8 @@ from snowflake.snowpark.types import (
13
13
  YearMonthIntervalType,
14
14
  )
15
15
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
16
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
17
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
18
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
17
19
  from snowflake.snowpark_connect.typed_column import TypedColumn
18
20
  from snowflake.snowpark_connect.utils.context import (
@@ -78,9 +80,11 @@ def map_extension(
78
80
  elif value.HasField("unresolved_attribute"):
79
81
  name = "__" + key + "__" + exp_name[0]
80
82
  else:
81
- raise SnowparkConnectNotImplementedError(
83
+ exception = SnowparkConnectNotImplementedError(
82
84
  "Named argument not supported yet for this input."
83
85
  )
86
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
87
+ raise exception
84
88
  return [name], typed_col
85
89
 
86
90
  case "interval_literal":
@@ -152,9 +156,11 @@ def map_extension(
152
156
 
153
157
  queries = df.queries["queries"]
154
158
  if len(queries) != 1:
155
- raise SnowparkConnectNotImplementedError(
159
+ exception = SnowparkConnectNotImplementedError(
156
160
  f"Unexpected number of queries: {len(queries)}"
157
161
  )
162
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
163
+ raise exception
158
164
  query = f"({queries[0]})"
159
165
 
160
166
  match extension.subquery_expression.subquery_type:
@@ -168,7 +174,13 @@ def map_extension(
168
174
  result_type = BooleanType()
169
175
  case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_TABLE_ARG:
170
176
  # TODO: Currently, map_sql.py handles this, so we never end up here.
171
- raise SnowparkConnectNotImplementedError("Unexpected table arg")
177
+ exception = SnowparkConnectNotImplementedError(
178
+ "Unexpected table arg"
179
+ )
180
+ attach_custom_error_code(
181
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
182
+ )
183
+ raise exception
172
184
  case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN:
173
185
  cols = [
174
186
  map_expression(e, column_mapping, typer)
@@ -184,14 +196,22 @@ def map_extension(
184
196
  )
185
197
  result_type = BooleanType()
186
198
  case other:
187
- raise SnowparkConnectNotImplementedError(
199
+ exception = SnowparkConnectNotImplementedError(
188
200
  f"Unexpected subquery type: {other}"
189
201
  )
202
+ attach_custom_error_code(
203
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
204
+ )
205
+ raise exception
190
206
 
191
207
  return [name], TypedColumn(result_exp, lambda: [result_type])
192
208
 
193
209
  case other:
194
- raise SnowparkConnectNotImplementedError(f"Unexpected extension {other}")
210
+ exception = SnowparkConnectNotImplementedError(
211
+ f"Unexpected extension {other}"
212
+ )
213
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
214
+ raise exception
195
215
 
196
216
 
197
217
  def _format_year_month_interval(
@@ -18,8 +18,11 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quot
18
18
  from snowflake.snowpark.types import DayTimeIntervalType, YearMonthIntervalType
19
19
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
20
20
  from snowflake.snowpark_connect.config import global_config
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
21
23
  from snowflake.snowpark_connect.typed_column import TypedColumn
22
24
  from snowflake.snowpark_connect.utils.context import (
25
+ get_jpype_jclass_lock,
23
26
  get_sql_named_arg,
24
27
  get_sql_plan,
25
28
  get_sql_pos_arg,
@@ -73,17 +76,20 @@ def sql_parser():
73
76
 
74
77
  @cache
75
78
  def _get_sql_parser():
76
- return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
79
+ with get_jpype_jclass_lock():
80
+ return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
77
81
 
78
82
 
79
83
  @cache
80
84
  def _get_sql_conf():
81
- return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
85
+ with get_jpype_jclass_lock():
86
+ return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
82
87
 
83
88
 
84
89
  @cache
85
90
  def _as_java_list():
86
- return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
91
+ with get_jpype_jclass_lock():
92
+ return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
87
93
 
88
94
 
89
95
  def as_java_list(obj):
@@ -92,7 +98,8 @@ def as_java_list(obj):
92
98
 
93
99
  @cache
94
100
  def _as_java_map():
95
- return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
101
+ with get_jpype_jclass_lock():
102
+ return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
96
103
 
97
104
 
98
105
  def as_java_map(obj):
@@ -256,7 +263,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
256
263
  func_name = as_java_list(exp.children())[0].nodeName()
257
264
  args = [
258
265
  map_logical_plan_expression(e)
259
- for e in as_java_list(as_java_list(exp.children())[0].children())
266
+ for e in list(as_java_list(as_java_list(exp.children())[0].children()))
260
267
  ]
261
268
  proto = apply_filter_clause(func_name, args, exp)
262
269
  case "Alias":
@@ -275,7 +282,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
275
282
  function_name="when",
276
283
  arguments=[
277
284
  map_logical_plan_expression(e)
278
- for e in as_java_list(exp.children())
285
+ for e in list(as_java_list(exp.children()))
279
286
  ],
280
287
  )
281
288
  )
@@ -289,7 +296,8 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
289
296
  )
290
297
  case "Coalesce":
291
298
  arguments = [
292
- map_logical_plan_expression(e) for e in as_java_list(exp.children())
299
+ map_logical_plan_expression(e)
300
+ for e in list(as_java_list(exp.children()))
293
301
  ]
294
302
 
295
303
  proto = expressions_proto.Expression(
@@ -357,7 +365,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
357
365
  subquery_type=snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN,
358
366
  in_subquery_values=[
359
367
  map_logical_plan_expression(value)
360
- for value in as_java_list(exp.values())
368
+ for value in list(as_java_list(exp.values()))
361
369
  ],
362
370
  )
363
371
  )
@@ -366,7 +374,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
366
374
  case "LambdaFunction":
367
375
  arguments = [
368
376
  map_logical_plan_expression(arg).unresolved_named_lambda_variable
369
- for arg in as_java_list(exp.arguments())
377
+ for arg in list(as_java_list(exp.arguments()))
370
378
  ]
371
379
  proto = expressions_proto.Expression(
372
380
  lambda_function=expressions_proto.Expression.LambdaFunction(
@@ -380,14 +388,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
380
388
  function_name=class_name.lower(),
381
389
  arguments=[
382
390
  map_logical_plan_expression(e)
383
- for e in as_java_list(exp.children())
391
+ for e in list(as_java_list(exp.children()))
384
392
  ],
385
393
  )
386
394
  )
387
395
  case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
388
- patterns = as_java_list(exp.patterns())
396
+ patterns = list(as_java_list(exp.patterns()))
389
397
  arguments = [
390
- map_logical_plan_expression(e) for e in as_java_list(exp.children())
398
+ map_logical_plan_expression(e)
399
+ for e in list(as_java_list(exp.children()))
391
400
  ]
392
401
  arguments += [map_logical_plan_expression(e) for e in patterns]
393
402
  proto = expressions_proto.Expression(
@@ -421,19 +430,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
421
430
  end_field = _YEAR_MONTH_FIELD_MAP.get(end_field_name)
422
431
 
423
432
  if start_field is None:
424
- raise AnalysisException(
433
+ exception = AnalysisException(
425
434
  f"Invalid year-month interval start field: '{start_field_name}'. Expected 'year' or 'month'."
426
435
  )
436
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
437
+ raise exception
427
438
  if end_field is None:
428
- raise AnalysisException(
439
+ exception = AnalysisException(
429
440
  f"Invalid year-month interval end field: '{end_field_name}'. Expected 'year' or 'month'."
430
441
  )
442
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
443
+ raise exception
431
444
 
432
445
  # Validate field ordering (start_field should be <= end_field)
433
446
  if start_field > end_field:
434
- raise AnalysisException(
447
+ exception = AnalysisException(
435
448
  f"Invalid year-month interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
436
449
  )
450
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
451
+ raise exception
437
452
 
438
453
  # Use extension for year-month intervals to preserve start/end field info
439
454
  literal = expressions_proto.Expression.Literal(
@@ -466,19 +481,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
466
481
  end_field = _DAY_TIME_FIELD_MAP.get(end_field_name)
467
482
 
468
483
  if start_field is None:
469
- raise AnalysisException(
484
+ exception = AnalysisException(
470
485
  f"Invalid day-time interval start field: '{start_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
471
486
  )
487
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
488
+ raise exception
472
489
  if end_field is None:
473
- raise AnalysisException(
490
+ exception = AnalysisException(
474
491
  f"Invalid day-time interval end field: '{end_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
475
492
  )
493
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
494
+ raise exception
476
495
 
477
496
  # Validate field ordering (start_field should be <= end_field)
478
497
  if start_field > end_field:
479
- raise AnalysisException(
498
+ exception = AnalysisException(
480
499
  f"Invalid day-time interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
481
500
  )
501
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
502
+ raise exception
482
503
 
483
504
  # Use extension for day-time intervals to preserve start/end field info
484
505
  literal = expressions_proto.Expression.Literal(
@@ -534,19 +555,27 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
534
555
  name = str(exp.name())
535
556
  value = get_sql_named_arg(name)
536
557
  if not value.HasField("literal_type"):
537
- raise AnalysisException(f"Found an unbound parameter {name!r}")
558
+ exception = AnalysisException(f"Found an unbound parameter {name!r}")
559
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
560
+ raise exception
538
561
  proto = expressions_proto.Expression(literal=value)
539
562
  case "NamePlaceholder$":
540
563
  # This is a placeholder for an expression name to be resolved later.
541
- raise SnowparkConnectNotImplementedError(
564
+ exception = SnowparkConnectNotImplementedError(
542
565
  "NamePlaceholder is not supported in SQL expressions."
543
566
  )
567
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
568
+ raise exception
544
569
  case "PosParameter":
545
570
  pos = exp.pos()
546
571
  try:
547
572
  value = get_sql_pos_arg(pos)
548
573
  except KeyError:
549
- raise AnalysisException(f"Found an unbound parameter at position {pos}")
574
+ exception = AnalysisException(
575
+ f"Found an unbound parameter at position {pos}"
576
+ )
577
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
578
+ raise exception
550
579
  proto = expressions_proto.Expression(literal=value)
551
580
  case "ScalarSubquery":
552
581
  rel_proto = map_logical_plan_relation(exp.plan())
@@ -616,7 +645,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
616
645
  ]
617
646
  + [
618
647
  map_logical_plan_expression(e)
619
- for e in as_java_list(exp.children())
648
+ for e in list(as_java_list(exp.children()))
620
649
  ],
621
650
  )
622
651
  )
@@ -659,18 +688,20 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
659
688
  )
660
689
  case "UnresolvedFunction":
661
690
  func_name = ".".join(
662
- str(part) for part in as_java_list(exp.nameParts())
691
+ str(part) for part in list(as_java_list(exp.nameParts()))
663
692
  ).lower()
664
693
  args = [
665
694
  map_logical_plan_expression(arg)
666
- for arg in as_java_list(exp.arguments())
695
+ for arg in list(as_java_list(exp.arguments()))
667
696
  ]
668
697
 
669
698
  proto = apply_filter_clause(func_name, args, exp, exp.isDistinct())
670
699
  case "UnresolvedNamedLambdaVariable":
671
700
  proto = expressions_proto.Expression(
672
701
  unresolved_named_lambda_variable=expressions_proto.Expression.UnresolvedNamedLambdaVariable(
673
- name_parts=[str(part) for part in as_java_list(exp.nameParts())],
702
+ name_parts=[
703
+ str(part) for part in list(as_java_list(exp.nameParts()))
704
+ ],
674
705
  )
675
706
  )
676
707
  case "UnresolvedStar":
@@ -691,9 +722,11 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
691
722
  # Build Window expression
692
723
  proto = get_window_expression_proto(window_spec, exp.child())
693
724
  else:
694
- raise AnalysisException(
725
+ exception = AnalysisException(
695
726
  f"Window specification not found {window_spec_reference!r}"
696
727
  )
728
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
729
+ raise exception
697
730
  case "UTF8String":
698
731
  proto = expressions_proto.Expression(
699
732
  literal=expressions_proto.Expression.Literal(
@@ -723,13 +756,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
723
756
  function_name=proto_func,
724
757
  arguments=[
725
758
  map_logical_plan_expression(arg)
726
- for arg in as_java_list(exp.children())
759
+ for arg in list(as_java_list(exp.children()))
727
760
  ],
728
761
  )
729
762
  )
730
763
 
731
764
  case other:
732
- raise SnowparkConnectNotImplementedError(f"Not implemented: {other}")
765
+ exception = SnowparkConnectNotImplementedError(f"Not implemented: {other}")
766
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
767
+ raise exception
733
768
 
734
769
  return proto
735
770
 
@@ -752,11 +787,11 @@ def get_window_expression_proto(
752
787
  window_function=map_logical_plan_expression(window_function),
753
788
  partition_spec=[
754
789
  map_logical_plan_expression(e)
755
- for e in as_java_list(window_spec.partitionSpec())
790
+ for e in list(as_java_list(window_spec.partitionSpec()))
756
791
  ],
757
792
  order_spec=[
758
793
  map_logical_plan_expression(e).sort_order
759
- for e in as_java_list(window_spec.orderSpec())
794
+ for e in list(as_java_list(window_spec.orderSpec()))
760
795
  ],
761
796
  frame_spec=frame_spec_proto,
762
797
  )
@@ -10,6 +10,8 @@ from snowflake import snowpark
10
10
  from snowflake.snowpark.types import MapType, StructType, VariantType
11
11
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
12
12
  from snowflake.snowpark_connect.config import global_config
13
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
14
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
13
15
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
14
16
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
15
17
  from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -54,7 +56,11 @@ def cache_external_udf_wrapper(from_register_udf: bool):
54
56
  case "python_udf":
55
57
  pass
56
58
  case _:
57
- raise ValueError(f"Unsupported UDF type: {function_type}")
59
+ exception = ValueError(f"Unsupported UDF type: {function_type}")
60
+ attach_custom_error_code(
61
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
62
+ )
63
+ raise exception
58
64
 
59
65
  return cached_udf
60
66
 
@@ -97,9 +103,11 @@ def register_udf(
97
103
  case "scalar_scala_udf":
98
104
  output_type = udf_proto.scalar_scala_udf.outputType
99
105
  case _:
100
- raise ValueError(
106
+ exception = ValueError(
101
107
  f"Unsupported UDF type: {udf_proto.WhichOneof('function')}"
102
108
  )
109
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
110
+ raise exception
103
111
  processed_return_type, original_return_type = process_udf_return_type(output_type)
104
112
  session = get_or_create_snowpark_session()
105
113
  kwargs = {
@@ -15,6 +15,8 @@ from snowflake.snowpark.exceptions import SnowparkSQLException
15
15
  from snowflake.snowpark.types import ArrayType, LongType, MapType, StructType
16
16
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
17
17
  from snowflake.snowpark_connect.config import global_config
18
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
19
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
18
20
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
19
21
  from snowflake.snowpark_connect.typed_column import TypedColumn
20
22
  from snowflake.snowpark_connect.utils.context import (
@@ -85,9 +87,11 @@ def map_unresolved_attribute(
85
87
  grouping_spark_columns = get_current_grouping_columns()
86
88
  if not grouping_spark_columns:
87
89
  # grouping__id can only be used with GROUP BY CUBE/ROLLUP/GROUPING SETS
88
- raise AnalysisException(
90
+ exception = AnalysisException(
89
91
  "[MISSING_GROUP_BY] grouping__id can only be used with GROUP BY (CUBE | ROLLUP | GROUPING SETS)"
90
92
  )
93
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
94
+ raise exception
91
95
  # Convert to GROUPING_ID() function call with the grouping columns
92
96
  # Map Spark column names to Snowpark column names
93
97
  snowpark_cols = []
@@ -99,9 +103,11 @@ def map_unresolved_attribute(
99
103
  )
100
104
  )
101
105
  if not snowpark_name:
102
- raise AnalysisException(
106
+ exception = AnalysisException(
103
107
  f"[INTERNAL_ERROR] Cannot find Snowpark column mapping for grouping column '{spark_col_name}'"
104
108
  )
109
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
110
+ raise exception
105
111
  snowpark_cols.append(snowpark_fn.col(snowpark_name))
106
112
 
107
113
  # Call GROUPING_ID with all grouping columns using Snowpark names
@@ -155,10 +161,12 @@ def map_unresolved_attribute(
155
161
 
156
162
  if is_catalog:
157
163
  # This looks like a catalog.database.column.field pattern
158
- raise AnalysisException(
164
+ exception = AnalysisException(
159
165
  f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{original_attr_name}` cannot be resolved. "
160
166
  f"Cross-catalog column references are not supported in DataFrame API."
161
167
  )
168
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
169
+ raise exception
162
170
 
163
171
  attr_name = ".".join(name_parts)
164
172
 
@@ -205,18 +213,24 @@ def map_unresolved_attribute(
205
213
  if compiled_regex.fullmatch(col_name):
206
214
  matched_columns.append(col_name)
207
215
  except re.error as e:
208
- raise AnalysisException(f"Invalid regex pattern '{regex_pattern}': {e}")
216
+ exception = AnalysisException(
217
+ f"Invalid regex pattern '{regex_pattern}': {e}"
218
+ )
219
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
220
+ raise exception
209
221
 
210
222
  if not matched_columns:
211
223
  # Keep the improved error message for SQL regex patterns
212
224
  # This is only hit for SQL queries like SELECT `(e|f)` FROM table
213
225
  # when spark.sql.parser.quotedRegexColumnNames is enabled
214
- raise AnalysisException(
226
+ exception = AnalysisException(
215
227
  f"No columns match the regex pattern '{regex_pattern}'. "
216
228
  f"Snowflake SQL does not support SELECT statements with no columns. "
217
229
  f"Please ensure your regex pattern matches at least one column. "
218
230
  f"Available columns: {', '.join(available_columns[:10])}{'...' if len(available_columns) > 10 else ''}"
219
231
  )
232
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
233
+ raise exception
220
234
 
221
235
  # When multiple columns match, we need to signal that this should expand to multiple columns
222
236
  # Since map_unresolved_attribute can only return one column, we'll use a special marker
@@ -346,16 +360,22 @@ def map_unresolved_attribute(
346
360
  )
347
361
  if outer_col_name:
348
362
  # This is an outer scope column being referenced inside a lambda
349
- raise AnalysisException(
363
+ exception = AnalysisException(
350
364
  f"Reference to non-lambda variable '{attr_name}' within lambda function. "
351
365
  f"Lambda functions can only access their own parameters. "
352
366
  f"If '{attr_name}' is a table column, it must be passed as an explicit parameter to the enclosing function."
353
367
  )
368
+ attach_custom_error_code(
369
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
370
+ )
371
+ raise exception
354
372
 
355
373
  if has_plan_id:
356
- raise AnalysisException(
374
+ exception = AnalysisException(
357
375
  f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
358
376
  )
377
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
378
+ raise exception
359
379
  else:
360
380
  # Column does not exist. Pass in dummy column name for lazy error throwing as it could be a built-in function
361
381
  snowpark_name = attr_name
@@ -365,9 +385,11 @@ def map_unresolved_attribute(
365
385
  col_type = typer.type(col)[0]
366
386
  except SnowparkSQLException as e:
367
387
  if e.raw_message is not None and "invalid identifier" in e.raw_message:
368
- raise AnalysisException(
388
+ exception = AnalysisException(
369
389
  f'[COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
370
390
  )
391
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
392
+ raise exception
371
393
  else:
372
394
  raise
373
395
  is_struct = isinstance(col_type, StructType)
@@ -416,7 +438,9 @@ def _match_path_to_struct(path: list[str], col_type: StructType) -> list[str]:
416
438
  typ = typ.value_type if isinstance(typ, MapType) else typ.element_type
417
439
  else:
418
440
  # If the type is not a struct, map, or array, we cannot access the field.
419
- raise AnalysisException(
441
+ exception = AnalysisException(
420
442
  f"[INVALID_EXTRACT_BASE_FIELD_TYPE] Can't extract a value from \"{'.'.join(path[:i])}\". Need a complex type [STRUCT, ARRAY, MAP] but got \"{typ}\"."
421
443
  )
444
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
445
+ raise exception
422
446
  return adjusted_path