snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (81) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +150 -25
  2. snowflake/snowpark_connect/config.py +54 -16
  3. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  4. snowflake/snowpark_connect/error/error_codes.py +50 -0
  5. snowflake/snowpark_connect/error/error_utils.py +142 -22
  6. snowflake/snowpark_connect/error/exceptions.py +13 -4
  7. snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
  8. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  9. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  10. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  11. snowflake/snowpark_connect/expression/literal.py +7 -1
  12. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  13. snowflake/snowpark_connect/expression/map_expression.py +48 -4
  14. snowflake/snowpark_connect/expression/map_extension.py +25 -5
  15. snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
  16. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  17. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
  18. snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
  19. snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
  20. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  21. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  22. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
  26. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  27. snowflake/snowpark_connect/relation/io_utils.py +66 -4
  28. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  29. snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
  30. snowflake/snowpark_connect/relation/map_extension.py +28 -8
  31. snowflake/snowpark_connect/relation/map_join.py +21 -10
  32. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  33. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  34. snowflake/snowpark_connect/relation/map_row_ops.py +36 -9
  35. snowflake/snowpark_connect/relation/map_sql.py +91 -24
  36. snowflake/snowpark_connect/relation/map_stats.py +25 -6
  37. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  38. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  39. snowflake/snowpark_connect/relation/read/map_read.py +24 -3
  40. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  41. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  42. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  43. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  44. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  45. snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
  46. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  47. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  48. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  49. snowflake/snowpark_connect/relation/utils.py +19 -2
  50. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  51. snowflake/snowpark_connect/relation/write/map_write.py +146 -63
  52. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  53. snowflake/snowpark_connect/resources_initializer.py +5 -1
  54. snowflake/snowpark_connect/server.py +72 -19
  55. snowflake/snowpark_connect/type_mapping.py +54 -17
  56. snowflake/snowpark_connect/utils/context.py +42 -1
  57. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  58. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  59. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  60. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  61. snowflake/snowpark_connect/utils/profiling.py +25 -8
  62. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  63. snowflake/snowpark_connect/utils/session.py +5 -2
  64. snowflake/snowpark_connect/utils/telemetry.py +81 -18
  65. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  66. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  67. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  68. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  69. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  70. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  71. snowflake/snowpark_connect/version.py +1 -1
  72. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
  73. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +81 -78
  74. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
  75. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
  76. {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
  77. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
  78. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
  79. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
  80. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
  81. {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,10 @@ import snowflake.snowpark.functions as snowpark_fn
13
13
  from snowflake import snowpark
14
14
  from snowflake.snowpark.types import IntegerType, PandasDataFrameType, StructType
15
15
 
16
+ # Removed error imports to avoid UDF serialization issues
17
+ # from snowflake.snowpark_connect.error.error_codes import ErrorCodes
18
+ # from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
+
16
20
 
17
21
  def get_map_in_arrow_udtf(
18
22
  user_function: Callable,
@@ -60,14 +64,14 @@ def get_map_in_arrow_udtf(
60
64
  result_iterator, "__iter__"
61
65
  ):
62
66
  raise RuntimeError(
63
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should be "
67
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
64
68
  f"iterator of pyarrow.RecordBatch, but is {type(result_iterator).__name__}"
65
69
  )
66
70
 
67
71
  for batch in result_iterator:
68
72
  if not isinstance(batch, pa.RecordBatch):
69
73
  raise RuntimeError(
70
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should "
74
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should "
71
75
  f"be iterator of pyarrow.RecordBatch, but is iterator of {type(batch).__name__}"
72
76
  )
73
77
  if batch.num_rows > 0:
@@ -121,7 +125,7 @@ def create_pandas_udtf(
121
125
  result_iterator, "__iter__"
122
126
  ):
123
127
  raise RuntimeError(
124
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should be "
128
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
125
129
  f"iterator of pandas.DataFrame, but is {type(result_iterator).__name__}"
126
130
  )
127
131
 
@@ -140,7 +144,7 @@ def create_pandas_udtf(
140
144
  if column not in self.output_column_original_names
141
145
  ]
142
146
  raise RuntimeError(
143
- f"[RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
147
+ f"[snowpark_connect::invalid_operation] [RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
144
148
  "."
145
149
  )
146
150
  reordered_df = output_df[self.output_column_original_names]
@@ -10,6 +10,7 @@
10
10
 
11
11
  import cProfile
12
12
  import functools
13
+ import inspect
13
14
  import os
14
15
  from datetime import datetime
15
16
  from typing import Any, Callable
@@ -35,13 +36,29 @@ def profile_method(method: Callable) -> Callable:
35
36
  profile_filename = f"{PROFILE_OUTPUT_DIR}/{method_name}_{timestamp}.prof"
36
37
 
37
38
  profiler = cProfile.Profile()
38
- profiler.enable()
39
-
40
- try:
41
- result = method(*args, **kwargs)
42
- return result
43
- finally:
44
- profiler.disable()
45
- profiler.dump_stats(profile_filename)
39
+
40
+ if inspect.isgeneratorfunction(method):
41
+
42
+ def profiled_generator():
43
+ profiler.enable()
44
+ try:
45
+ generator = method(*args, **kwargs)
46
+ for item in generator:
47
+ profiler.disable()
48
+ yield item
49
+ profiler.enable()
50
+ finally:
51
+ profiler.disable()
52
+ profiler.dump_stats(profile_filename)
53
+
54
+ return profiled_generator()
55
+ else:
56
+ profiler.enable()
57
+ try:
58
+ result = method(*args, **kwargs)
59
+ return result
60
+ finally:
61
+ profiler.disable()
62
+ profiler.dump_stats(profile_filename)
46
63
 
47
64
  return wrapper
@@ -22,6 +22,8 @@ from typing import List, Union
22
22
 
23
23
  import snowflake.snowpark.types as snowpark_type
24
24
  import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
25
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
26
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
25
27
  from snowflake.snowpark_connect.resources_initializer import RESOURCE_PATH
26
28
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
27
29
  from snowflake.snowpark_connect.utils.udf_utils import (
@@ -467,7 +469,9 @@ def map_type_to_scala_type(
467
469
  case snowpark_type.VariantType:
468
470
  return "Variant"
469
471
  case _:
470
- raise ValueError(f"Unsupported Snowpark type: {t}")
472
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
473
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
474
+ raise exception
471
475
 
472
476
 
473
477
  def map_type_to_snowflake_type(
@@ -533,7 +537,9 @@ def map_type_to_snowflake_type(
533
537
  case snowpark_type.VariantType:
534
538
  return "VARIANT"
535
539
  case _:
536
- raise ValueError(f"Unsupported Snowpark type: {t}")
540
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
541
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
542
+ raise exception
537
543
 
538
544
 
539
545
  def cast_scala_map_args_from_given_type(
@@ -573,7 +579,9 @@ def cast_scala_map_args_from_given_type(
573
579
  case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
574
580
  return "java.sql.Timestamp.valueOf({arg_name})"
575
581
  case _:
576
- raise ValueError(f"Unsupported Snowpark type: {t}")
582
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
583
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
584
+ raise exception
577
585
 
578
586
  if (is_snowpark_type and isinstance(input_type, snowpark_type.MapType)) or (
579
587
  not is_snowpark_type and input_type.WhichOneof("kind") == "map"
@@ -11,6 +11,8 @@ from snowflake import snowpark
11
11
  from snowflake.snowpark.exceptions import SnowparkClientException
12
12
  from snowflake.snowpark.session import _get_active_session
13
13
  from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.utils.describe_query_cache import (
15
17
  instrument_session_for_describe_cache,
16
18
  )
@@ -120,7 +122,6 @@ def configure_snowpark_session(session: snowpark.Session):
120
122
  "PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false", # this is required for creating udfs from sproc
121
123
  "ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
122
124
  "QUERY_TAG": f"'{query_tag}'",
123
- "FEATURE_INTERVAL_TYPES": "enabled",
124
125
  }
125
126
 
126
127
  session.sql(
@@ -192,7 +193,9 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
192
193
  """Sets Snowpark session query_tag value to the tag from the Spark request."""
193
194
 
194
195
  if any("," in tag for tag in spark_tags):
195
- raise ValueError("Tags cannot contain ','.")
196
+ exception = ValueError("Tags cannot contain ','.")
197
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
198
+ raise exception
196
199
 
197
200
  # TODO: Tags might not be set correctly in parallel workloads or multi-threaded code.
198
201
  snowpark_session = get_or_create_snowpark_session()
@@ -11,8 +11,8 @@ from abc import ABC, abstractmethod
11
11
  from collections import defaultdict
12
12
  from collections.abc import Iterable
13
13
  from contextvars import ContextVar
14
+ from dataclasses import dataclass
14
15
  from enum import Enum, unique
15
- from typing import Dict
16
16
 
17
17
  import google.protobuf.message
18
18
  import pyspark.sql.connect.proto.base_pb2 as proto_base
@@ -27,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
27
27
  from snowflake.snowpark import Session
28
28
  from snowflake.snowpark._internal.utils import get_os_name, get_python_version
29
29
  from snowflake.snowpark.version import VERSION as snowpark_version
30
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
30
31
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
31
32
  from snowflake.snowpark_connect.version import VERSION as sas_version
32
33
 
@@ -57,6 +58,7 @@ class TelemetryType(Enum):
57
58
 
58
59
  class EventType(Enum):
59
60
  SERVER_STARTED = "scos_server_started"
61
+ WARNING = "scos_warning"
60
62
 
61
63
 
62
64
  # global labels
@@ -106,7 +108,16 @@ REDACTED_PLAN_SUFFIXES = [
106
108
  ]
107
109
 
108
110
 
109
- def _basic_telemetry_data() -> Dict:
111
+ @dataclass
112
+ class TelemetryMessage:
113
+ """Container for telemetry messages in the processing queue."""
114
+
115
+ message: dict
116
+ timestamp: int
117
+ is_warning: bool
118
+
119
+
120
+ def _basic_telemetry_data() -> dict:
110
121
  return {
111
122
  **STATIC_TELEMETRY_DATA,
112
123
  TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
@@ -123,9 +134,11 @@ def safe(func):
123
134
  def wrap(*args, **kwargs):
124
135
  try:
125
136
  func(*args, **kwargs)
126
- except Exception:
127
- # We don't really care if telemetry fails, just want to be safe for the user
128
- logger.warning(f"Telemetry operation failed: {func}", exc_info=True)
137
+ except Exception as e:
138
+ # report failed operation to telemetry
139
+ telemetry.send_warning_msg(
140
+ f"Telemetry operation {func} failed due to exception", e
141
+ )
129
142
 
130
143
  return wrap
131
144
 
@@ -334,6 +347,11 @@ class Telemetry:
334
347
  summary["error_message"] = str(e)
335
348
  summary["error_type"] = type(e).__name__
336
349
 
350
+ if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
351
+ summary["error_code"] = ErrorCodes.INTERNAL_ERROR
352
+ else:
353
+ summary["error_code"] = e.custom_error_code
354
+
337
355
  error_location = _error_location(e)
338
356
  if error_location:
339
357
  summary["error_location"] = error_location
@@ -528,8 +546,8 @@ class Telemetry:
528
546
  @safe
529
547
  def send_request_summary_telemetry(self):
530
548
  if self._not_in_request():
531
- logger.warning(
532
- "Truing to send request summary telemetry without initializing it"
549
+ self.send_warning_msg(
550
+ "Trying to send request summary telemetry without initializing it"
533
551
  )
534
552
  return
535
553
 
@@ -541,14 +559,56 @@ class Telemetry:
541
559
  }
542
560
  self._send(message)
543
561
 
544
- def _send(self, msg: Dict) -> None:
562
+ def send_warning_msg(self, msg: str, e: Exception = None) -> None:
563
+ # using this within @safe decorator may result in recursive loop
564
+ try:
565
+ message = self._build_warning_message(msg, e)
566
+ if not message:
567
+ return
568
+
569
+ self._send(message, is_warning=True)
570
+ except Exception:
571
+ # if there's an exception here, there's nothing we can really do about it
572
+ pass
573
+
574
+ def _build_warning_message(self, warning_msg: str, e: Exception = None) -> dict:
575
+ try:
576
+ data = {"warning_message": warning_msg}
577
+ if e is not None:
578
+ data["exception"] = repr(e)
579
+
580
+ # add session and operation id if available
581
+ spark_session_id = self._request_summary.get().get("spark_session_id", None)
582
+ if spark_session_id is not None:
583
+ data["spark_session_id"] = spark_session_id
584
+
585
+ spark_operation_id = self._request_summary.get().get(
586
+ "spark_operation_id", None
587
+ )
588
+ if spark_operation_id is not None:
589
+ data["spark_operation_id"] = spark_operation_id
590
+
591
+ message = {
592
+ **_basic_telemetry_data(),
593
+ TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
594
+ TelemetryType.EVENT_TYPE.value: EventType.WARNING.value,
595
+ TelemetryField.KEY_DATA.value: data,
596
+ }
597
+ return message
598
+ except Exception:
599
+ return {}
600
+
601
+ def _send(self, msg: dict, is_warning: bool = False) -> None:
545
602
  """Queue a telemetry message for asynchronous processing."""
546
603
  if not self._is_enabled:
547
604
  return
548
605
 
549
606
  timestamp = get_time_millis()
550
607
  try:
551
- self._message_queue.put_nowait((msg, timestamp))
608
+ telemetry_msg = TelemetryMessage(
609
+ message=msg, timestamp=timestamp, is_warning=is_warning
610
+ )
611
+ self._message_queue.put_nowait(telemetry_msg)
552
612
  except queue.Full:
553
613
  # If queue is full, drop the message to avoid blocking
554
614
  logger.warning("Telemetry queue is full, dropping message")
@@ -566,13 +626,16 @@ class Telemetry:
566
626
  while True:
567
627
  try:
568
628
  # block to allow the GIL to switch threads
569
- message, timestamp = self._message_queue.get()
570
- if timestamp is None and message is None:
571
- # shutdown
629
+ telemetry_msg = self._message_queue.get()
630
+ if telemetry_msg is None:
631
+ # shutdown signal
572
632
  break
573
- self._sink.add_telemetry_data(message, timestamp)
574
- except Exception:
575
- logger.warning("Failed to add telemetry message to sink", exc_info=True)
633
+ self._sink.add_telemetry_data(
634
+ telemetry_msg.message, telemetry_msg.timestamp
635
+ )
636
+ except Exception as e:
637
+ if not telemetry_msg.is_warning:
638
+ self.send_warning_msg("Failed to add telemetry message to sink", e)
576
639
  finally:
577
640
  self._message_queue.task_done()
578
641
 
@@ -585,7 +648,7 @@ class Telemetry:
585
648
  return
586
649
 
587
650
  try:
588
- self._message_queue.put_nowait((None, None))
651
+ self._message_queue.put_nowait(None)
589
652
  # Wait for worker thread to finish
590
653
  self._worker_thread.join(timeout=3.0)
591
654
  except Exception:
@@ -594,7 +657,7 @@ class Telemetry:
594
657
  )
595
658
 
596
659
 
597
- def _error_location(e: Exception) -> Dict | None:
660
+ def _error_location(e: Exception) -> dict | None:
598
661
  """
599
662
  Inspect the exception traceback and extract the file name, line number, and function name
600
663
  from the last frame (the one that raised the exception).
@@ -675,7 +738,7 @@ def _protobuf_to_json_with_redaction(
675
738
  """Recursively convert protobuf message to dict"""
676
739
 
677
740
  if not isinstance(msg, google.protobuf.message.Message):
678
- logger.warning("Expected a protobuf message, got: %s", type(msg))
741
+ telemetry.send_warning_msg(f"Expected a protobuf message, got: {type(msg)}")
679
742
  return {}
680
743
 
681
744
  result = {}
@@ -7,6 +7,8 @@ from typing import Optional, Tuple
7
7
  from pyspark.errors import AnalysisException
8
8
 
9
9
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
11
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
10
12
  from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
11
13
  from snowflake.snowpark_connect.utils.context import get_session_id
12
14
 
@@ -22,9 +24,11 @@ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None
22
24
  _temp_views.remove(key)
23
25
  break
24
26
  else:
25
- raise AnalysisException(
27
+ exception = AnalysisException(
26
28
  f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
27
29
  )
30
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
31
+ raise exception
28
32
 
29
33
  _temp_views[(name, current_session_id)] = df
30
34
 
@@ -118,7 +118,7 @@ def cached_udaf(
118
118
 
119
119
  if class_type is None:
120
120
  raise ValueError(
121
- "Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
121
+ "[snowpark_connect::internal_error] Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
122
122
  )
123
123
  else:
124
124
  # return udaf
@@ -379,7 +379,9 @@ def register_cached_java_udf(
379
379
 
380
380
  with zipfile.ZipFile(zip_path, "r") as zip_ref:
381
381
  if jar_path_in_zip not in zip_ref.namelist():
382
- raise FileNotFoundError(f"{jar_path_in_zip} not found")
382
+ raise FileNotFoundError(
383
+ f"[snowpark_connect::invalid_input] {jar_path_in_zip} not found"
384
+ )
383
385
  zip_ref.extract(jar_path_in_zip, temp_dir)
384
386
 
385
387
  jar_path = f"{temp_dir}/{jar_path_in_zip}"
@@ -388,7 +390,7 @@ def register_cached_java_udf(
388
390
 
389
391
  if upload_result[0].status != "UPLOADED":
390
392
  raise RuntimeError(
391
- f"Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
393
+ f"[snowpark_connect::internal_error] Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
392
394
  )
393
395
 
394
396
  udf_is_cached = function_name in cache
@@ -18,6 +18,8 @@ from snowflake.snowpark import Column, Session
18
18
  from snowflake.snowpark.types import DataType, _parse_datatype_json_value
19
19
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
20
20
  from snowflake.snowpark_connect.config import global_config
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
21
23
  from snowflake.snowpark_connect.expression.map_expression import (
22
24
  map_single_column_expression,
23
25
  )
@@ -233,19 +235,25 @@ def _check_supported_udf(
233
235
 
234
236
  session = get_or_create_snowpark_session()
235
237
  if udf_proto.java_udf.class_name not in session._cached_java_udfs:
236
- raise AnalysisException(
238
+ exception = AnalysisException(
237
239
  f"Can not load class {udf_proto.java_udf.class_name}"
238
240
  )
241
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
242
+ raise exception
239
243
  else:
240
- raise ValueError(
244
+ exception = ValueError(
241
245
  "Function type java_udf not supported for common inline user-defined function"
242
246
  )
247
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
248
+ raise exception
243
249
  case "scalar_scala_udf":
244
250
  pass
245
251
  case _ as function_type:
246
- raise ValueError(
252
+ exception = ValueError(
247
253
  f"Function type {function_type} not supported for common inline user-defined function"
248
254
  )
255
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
256
+ raise exception
249
257
 
250
258
 
251
259
  def _aggregate_function_check(
@@ -253,9 +261,11 @@ def _aggregate_function_check(
253
261
  ):
254
262
  name, is_aggregate_function = get_is_aggregate_function()
255
263
  if not udf_proto.deterministic and name != "default" and is_aggregate_function:
256
- raise AnalysisException(
264
+ exception = AnalysisException(
257
265
  f"[AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION] Non-deterministic expression {name}({udf_proto.function_name}) should not appear in the arguments of an aggregate function."
258
266
  )
267
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
268
+ raise exception
259
269
 
260
270
 
261
271
  def _join_checks(snowpark_udf_arg_names: list[str]):
@@ -282,23 +292,27 @@ def _join_checks(snowpark_udf_arg_names: list[str]):
282
292
  and is_left_evaluable
283
293
  and is_right_evaluable
284
294
  ):
285
- raise AnalysisException(
295
+ exception = AnalysisException(
286
296
  f"Detected implicit cartesian product for {is_evaluating_join_condition[0]} join between logical plans. \n"
287
297
  f"Join condition is missing or trivial. \n"
288
298
  f"Either: use the CROSS JOIN syntax to allow cartesian products between those relations, or; "
289
299
  f"enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=True."
290
300
  )
301
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
302
+ raise exception
291
303
  if (
292
304
  is_evaluating_join_condition[0] != "INNER"
293
305
  and is_evaluating_join_condition[1]
294
306
  and is_left_evaluable
295
307
  and is_right_evaluable
296
308
  ):
297
- raise AnalysisException(
309
+ exception = AnalysisException(
298
310
  f"[UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE] The feature is not supported: "
299
311
  f"Python UDF in the ON clause of a {is_evaluating_join_condition[0]} JOIN. "
300
312
  f"In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause."
301
313
  )
314
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
315
+ raise exception
302
316
 
303
317
 
304
318
  def infer_snowpark_arguments(
@@ -103,7 +103,7 @@ class ProcessCommonInlineUserDefinedFunction:
103
103
  )
104
104
  case _:
105
105
  raise ValueError(
106
- f"Function type {self._function_type} not supported for common inline user-defined function"
106
+ f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
107
107
  )
108
108
 
109
109
  @property
@@ -112,7 +112,7 @@ class ProcessCommonInlineUserDefinedFunction:
112
112
  return self._snowpark_udf_args
113
113
  else:
114
114
  raise ValueError(
115
- "Column mapping is not provided, cannot get snowpark udf args"
115
+ "[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf args"
116
116
  )
117
117
 
118
118
  @property
@@ -121,7 +121,7 @@ class ProcessCommonInlineUserDefinedFunction:
121
121
  return self._snowpark_udf_arg_names
122
122
  else:
123
123
  raise ValueError(
124
- "Column mapping is not provided, cannot get snowpark udf arg names"
124
+ "[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf arg names"
125
125
  )
126
126
 
127
127
  def _create_python_udf(self):
@@ -291,5 +291,5 @@ class ProcessCommonInlineUserDefinedFunction:
291
291
  return create_scala_udf(self)
292
292
  case _:
293
293
  raise ValueError(
294
- f"Function type {self._function_type} not supported for common inline user-defined function"
294
+ f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
295
295
  )
@@ -16,6 +16,8 @@ import snowflake.snowpark_connect.tcm as tcm
16
16
  from snowflake import snowpark
17
17
  from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
18
18
  from snowflake.snowpark.types import DataType, StructType, _parse_datatype_json_value
19
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
20
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
21
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
20
22
  from snowflake.snowpark_connect.utils import pandas_udtf_utils, udtf_utils
21
23
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
@@ -37,7 +39,9 @@ def udtf_check(
37
39
  udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
38
40
  ) -> None:
39
41
  if udtf_proto.WhichOneof("function") != "python_udtf":
40
- raise ValueError(f"Not python udtf {udtf_proto.function}")
42
+ exception = ValueError(f"Not python udtf {udtf_proto.function}")
43
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
44
+ raise exception
41
45
 
42
46
 
43
47
  def require_creating_udtf_in_sproc(