snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (84) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +47 -17
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/error/error_utils.py +25 -0
  6. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  7. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  8. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  13. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  14. snowflake/snowpark_connect/expression/typer.py +6 -6
  15. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  16. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  17. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  18. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  19. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  20. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  21. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  22. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  26. snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
  27. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  28. snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
  29. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  30. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  31. snowflake/snowpark_connect/relation/map_join.py +72 -63
  32. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  33. snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
  34. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  35. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  36. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  37. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  38. snowflake/snowpark_connect/relation/map_sql.py +141 -237
  39. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  40. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  41. snowflake/snowpark_connect/relation/map_udtf.py +10 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  50. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  51. snowflake/snowpark_connect/relation/utils.py +11 -5
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  53. snowflake/snowpark_connect/relation/write/map_write.py +259 -56
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  55. snowflake/snowpark_connect/server.py +43 -4
  56. snowflake/snowpark_connect/type_mapping.py +6 -23
  57. snowflake/snowpark_connect/utils/cache.py +27 -22
  58. snowflake/snowpark_connect/utils/context.py +33 -17
  59. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  60. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  61. snowflake/snowpark_connect/utils/session.py +41 -38
  62. snowflake/snowpark_connect/utils/telemetry.py +214 -63
  63. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  64. snowflake/snowpark_connect/version.py +1 -1
  65. snowflake/snowpark_decoder/__init__.py +0 -0
  66. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  67. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  68. snowflake/snowpark_decoder/dp_session.py +111 -0
  69. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  70. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
  71. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
  72. snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
  73. snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
  74. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  75. spark/__init__.py +0 -0
  76. spark/connect/__init__.py +0 -0
  77. spark/connect/envelope_pb2.py +31 -0
  78. spark/connect/envelope_pb2.pyi +46 -0
  79. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  80. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -15,8 +15,9 @@ def map_tree_string(
15
15
  ) -> proto_base.AnalyzePlanResponse:
16
16
  # TODO: tracking the difference with pyspark in SNOW-1853347
17
17
  tree_string = request.tree_string
18
- snowpark_df = map_relation(tree_string.plan.root)
19
- column_map = snowpark_df._column_map
18
+ snowpark_df_container = map_relation(tree_string.plan.root)
19
+ snowpark_df = snowpark_df_container.dataframe
20
+ column_map = snowpark_df_container.column_map
20
21
 
21
22
  snowpark_tree_string = snowpark_df._format_schema(
22
23
  level=tree_string.level if tree_string.HasField("level") else None,
@@ -12,14 +12,13 @@ from functools import cached_property
12
12
 
13
13
  from pyspark.errors.exceptions.base import AnalysisException
14
14
 
15
- from snowflake import snowpark
16
15
  from snowflake.snowpark import DataFrame
17
16
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
18
17
  quote_name_without_upper_casing,
19
18
  unquote_if_quoted,
20
19
  )
21
20
  from snowflake.snowpark._internal.utils import quote_name
22
- from snowflake.snowpark.types import DataType, StructField, StructType
21
+ from snowflake.snowpark.types import StructType
23
22
  from snowflake.snowpark_connect.config import global_config
24
23
  from snowflake.snowpark_connect.utils.context import get_current_operation_scope
25
24
 
@@ -41,64 +40,6 @@ def set_schema_getter(df: DataFrame, get_schema: Callable[[], StructType]) -> No
41
40
  df.__class__ = PatchedDataFrame
42
41
 
43
42
 
44
- def with_column_map(
45
- result_df: snowpark.DataFrame,
46
- spark_column_names: list[str],
47
- snowpark_column_names: list[str],
48
- snowpark_column_types: list[DataType] = None,
49
- column_metadata: dict | None = None,
50
- column_qualifiers: list[list[str]] | None = None,
51
- parent_column_name_map: ColumnNameMap | None = None,
52
- ) -> snowpark.DataFrame:
53
- """
54
- Build a mapping from the DataFrame's column names to the Spark column names.
55
-
56
- This is used to track the original column names and handle column naming differences
57
- between Spark and Snowpark.
58
-
59
- The elements in result_df.columns and the elements in spark_column_names must be a one-to-one mapping.
60
-
61
- Args:
62
- result_df (snowpark.DataFrame): The DataFrame to map.
63
- spark_column_names (list[str]): The Spark column names.
64
- snowpark_column_names (list[str]): The Snowpark column names.
65
- snowpark_column_types (list[DataType], optional): The Snowpark column types. **if provided df.schema will be overridden with inferred schema**
66
- column_metadata (dict, optional): Metadata for the columns.
67
- column_qualifiers (list[list[str]], optional): Qualifiers for the columns, used to handle table aliases or DataFrame aliases.
68
- parent_column_name_map (ColumnNameMap, optional): A ColumnNameMap, that came from the dataframe used to create result_df (parent df)
69
-
70
- Returns:
71
- snowpark.DataFrame: The mapped DataFrame.
72
- """
73
- assert len(snowpark_column_names) == len(
74
- spark_column_names
75
- ), "Number of Spark column names must match number of columns in DataFrame"
76
- result_df._column_map = ColumnNameMap(
77
- spark_column_names,
78
- snowpark_column_names,
79
- column_metadata=column_metadata,
80
- column_qualifiers=column_qualifiers,
81
- parent_column_name_map=parent_column_name_map,
82
- )
83
- result_df._table_name = None
84
-
85
- if snowpark_column_types is not None:
86
- assert len(snowpark_column_names) == len(
87
- snowpark_column_types
88
- ), "Number of Snowpark column names and types must match"
89
-
90
- set_schema_getter(
91
- result_df,
92
- lambda: StructType(
93
- [
94
- StructField(n, t, _is_column=False)
95
- for n, t in zip(snowpark_column_names, snowpark_column_types)
96
- ]
97
- ),
98
- )
99
- return result_df
100
-
101
-
102
43
  def make_column_names_snowpark_compatible(
103
44
  names: list[str], plan_id: int, offset: int = 0
104
45
  ) -> list[str]:
@@ -189,7 +130,7 @@ class ColumnNameMap:
189
130
  column_qualifiers: Optional qualifiers for the columns, used to handle table aliases or DataFrame aliases.
190
131
  parent_column_name_map: parent ColumnNameMap
191
132
  """
192
- self.columns = []
133
+ self.columns: list[ColumnNames] = []
193
134
  self.spark_to_col = defaultdict(list)
194
135
  self.uppercase_spark_to_col = defaultdict(list)
195
136
  self.snowpark_to_col = defaultdict(list)
@@ -602,11 +543,11 @@ class ColumnNameMap:
602
543
  class JoinColumnNameMap(ColumnNameMap):
603
544
  def __init__(
604
545
  self,
605
- left_input: snowpark.DataFrame,
606
- right_input: snowpark.DataFrame,
546
+ left_colmap: ColumnNameMap,
547
+ right_colmap: ColumnNameMap,
607
548
  ) -> None:
608
- self.left_column_mapping: ColumnNameMap = left_input._column_map
609
- self.right_column_mapping: ColumnNameMap = right_input._column_map
549
+ self.left_column_mapping: ColumnNameMap = left_colmap
550
+ self.right_column_mapping: ColumnNameMap = right_colmap
610
551
 
611
552
  def get_snowpark_column_name_from_spark_column_name(
612
553
  self,
@@ -9,7 +9,7 @@ import re
9
9
  import sys
10
10
  import time
11
11
  from collections import defaultdict
12
- from copy import copy
12
+ from copy import copy, deepcopy
13
13
  from typing import Any
14
14
 
15
15
  import jpype
@@ -33,7 +33,7 @@ from snowflake.snowpark_connect.version import VERSION as sas_version
33
33
 
34
34
 
35
35
  def str_to_bool(boolean_str: str) -> bool:
36
- assert boolean_str in [
36
+ assert boolean_str in (
37
37
  "True",
38
38
  "true",
39
39
  "False",
@@ -41,7 +41,7 @@ def str_to_bool(boolean_str: str) -> bool:
41
41
  "1",
42
42
  "0",
43
43
  "", # This is the default value, equivalent to False.
44
- ], f"Invalid boolean value: {boolean_str}"
44
+ ), f"Invalid boolean value: {boolean_str}"
45
45
  return boolean_str in ["True", "true", "1"]
46
46
 
47
47
 
@@ -131,6 +131,7 @@ class GlobalConfig:
131
131
  "spark.sql.caseSensitive": "false",
132
132
  "spark.sql.mapKeyDedupPolicy": "EXCEPTION",
133
133
  "spark.sql.ansi.enabled": "false",
134
+ "spark.sql.legacy.allowHashOnMapType": "false",
134
135
  "spark.sql.sources.default": "parquet",
135
136
  "spark.Catalog.databaseFilterInformationSchema": "false",
136
137
  "spark.sql.parser.quotedRegexColumnNames": "false",
@@ -145,6 +146,7 @@ class GlobalConfig:
145
146
  "spark.sql.crossJoin.enabled",
146
147
  "spark.sql.caseSensitive",
147
148
  "spark.sql.ansi.enabled",
149
+ "spark.sql.legacy.allowHashOnMapType",
148
150
  "spark.Catalog.databaseFilterInformationSchema",
149
151
  "spark.sql.parser.quotedRegexColumnNames",
150
152
  ]
@@ -166,6 +168,9 @@ class GlobalConfig:
166
168
  "snowpark.connect.udf.packages": lambda session, packages: session.add_packages(
167
169
  *packages.strip("[] ").split(",")
168
170
  ),
171
+ "snowpark.connect.udf.imports": lambda session, imports: parse_imports(
172
+ session, imports
173
+ ),
169
174
  }
170
175
 
171
176
  float_config_list = []
@@ -250,10 +255,10 @@ SESSION_CONFIG_KEY_WHITELIST = {
250
255
  "spark.sql.tvf.allowMultipleTableArguments.enabled",
251
256
  "snowpark.connect.sql.passthrough",
252
257
  "snowpark.connect.iceberg.external_volume",
253
- "snowpark.connect.auto-uppercase.ddl",
254
- "snowpark.connect.auto-uppercase.dml",
258
+ "snowpark.connect.sql.identifiers.auto-uppercase",
255
259
  "snowpark.connect.udtf.compatibility_mode",
256
260
  "snowpark.connect.views.duplicate_column_names_handling_mode",
261
+ "enable_snowflake_extension_behavior",
257
262
  }
258
263
  AZURE_SAS_KEY = re.compile(
259
264
  r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
@@ -271,17 +276,17 @@ class SessionConfig:
271
276
  """This class contains the session configuration for the Spark Server."""
272
277
 
273
278
  default_session_config = {
274
- "snowpark.connect.auto-uppercase.ddl": "true",
275
- "snowpark.connect.auto-uppercase.dml": "true",
279
+ "snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
276
280
  "snowpark.connect.sql.passthrough": "false",
277
281
  "snowpark.connect.udtf.compatibility_mode": "false",
278
282
  "snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
279
283
  "spark.sql.execution.pythonUDTF.arrow.enabled": "false",
280
284
  "spark.sql.tvf.allowMultipleTableArguments.enabled": "true",
285
+ "enable_snowflake_extension_behavior": "false",
281
286
  }
282
287
 
283
288
  def __init__(self) -> None:
284
- self.config = copy(self.default_session_config)
289
+ self.config = deepcopy(self.default_session_config)
285
290
 
286
291
  def __getitem__(self, item: str) -> str:
287
292
  return self.get(item)
@@ -304,7 +309,13 @@ CONFIG_ALLOWED_VALUES: dict[str, tuple] = {
304
309
  "rename",
305
310
  "fail",
306
311
  "drop",
307
- )
312
+ ),
313
+ "snowpark.connect.sql.identifiers.auto-uppercase": (
314
+ "all_except_columns",
315
+ "only_columns",
316
+ "all",
317
+ "none",
318
+ ),
308
319
  }
309
320
 
310
321
  # Set some default configuration that are necessary for the driver.
@@ -324,7 +335,7 @@ def route_config_proto(
324
335
  match op_type:
325
336
  case "set":
326
337
  logger.info("SET")
327
-
338
+ telemetry.report_config_set(config.operation.set.pairs)
328
339
  for pair in config.operation.set.pairs:
329
340
  # Check if the value field is present, not present when invalid fields are set in conf.
330
341
  if not pair.HasField("value"):
@@ -334,7 +345,6 @@ def route_config_proto(
334
345
  f"Cannot set config '{pair.key}' to None"
335
346
  )
336
347
 
337
- telemetry.report_config_set(pair.key, pair.value)
338
348
  set_config_param(
339
349
  config.session_id, pair.key, pair.value, snowpark_session
340
350
  )
@@ -342,14 +352,15 @@ def route_config_proto(
342
352
  return proto_base.ConfigResponse(session_id=config.session_id)
343
353
  case "unset":
344
354
  logger.info("UNSET")
355
+ telemetry.report_config_unset(config.operation.unset.keys)
345
356
  for key in config.operation.unset.keys:
346
- telemetry.report_config_unset(key)
347
357
  unset_config_param(config.session_id, key, snowpark_session)
348
358
 
349
359
  return proto_base.ConfigResponse(session_id=config.session_id)
350
360
  case "get":
351
361
  logger.info("GET")
352
362
  res = proto_base.ConfigResponse(session_id=config.session_id)
363
+ telemetry.report_config_get(config.operation.get.keys)
353
364
  for key in config.operation.get.keys:
354
365
  pair = res.pairs.add()
355
366
  pair.key = key
@@ -359,6 +370,9 @@ def route_config_proto(
359
370
  return res
360
371
  case "get_with_default":
361
372
  logger.info("GET_WITH_DEFAULT")
373
+ telemetry.report_config_get(
374
+ [pair.key for pair in config.operation.get_with_default.pairs]
375
+ )
362
376
  result_pairs = [
363
377
  proto_base.KeyValue(
364
378
  key=pair.key,
@@ -375,6 +389,7 @@ def route_config_proto(
375
389
  case "get_option":
376
390
  logger.info("GET_OPTION")
377
391
  res = proto_base.ConfigResponse(session_id=config.session_id)
392
+ telemetry.report_config_get(config.operation.get_option.keys)
378
393
  for key in config.operation.get_option.keys:
379
394
  pair = res.pairs.add()
380
395
  pair.key = key
@@ -403,6 +418,7 @@ def route_config_proto(
403
418
  case "is_modifiable":
404
419
  logger.info("IS_MODIFIABLE")
405
420
  res = proto_base.ConfigResponse(session_id=config.session_id)
421
+ telemetry.report_config_get(config.operation.is_modifiable.keys)
406
422
  for key in config.operation.is_modifiable.keys:
407
423
  pair = res.pairs.add()
408
424
  pair.key = key
@@ -533,7 +549,7 @@ def set_snowflake_parameters(
533
549
  value = global_config.default_static_global_config.get(key)
534
550
 
535
551
  snowpark_name = quote_name_without_upper_casing(value)
536
- if auto_uppercase_ddl():
552
+ if auto_uppercase_non_column_identifiers():
537
553
  snowpark_name = snowpark_name.upper()
538
554
 
539
555
  # Create the schema on demand. Before creating it, however,
@@ -568,9 +584,23 @@ def get_boolean_session_config_param(name: str) -> bool:
568
584
  return str_to_bool(session_config[name])
569
585
 
570
586
 
571
- def auto_uppercase_dml() -> bool:
572
- return get_boolean_session_config_param("snowpark.connect.auto-uppercase.dml")
587
+ def auto_uppercase_column_identifiers() -> bool:
588
+ session_config = sessions_config[get_session_id()]
589
+ return session_config[
590
+ "snowpark.connect.sql.identifiers.auto-uppercase"
591
+ ].lower() in ("all", "only_columns")
592
+
593
+
594
+ def auto_uppercase_non_column_identifiers() -> bool:
595
+ session_config = sessions_config[get_session_id()]
596
+ return session_config[
597
+ "snowpark.connect.sql.identifiers.auto-uppercase"
598
+ ].lower() in ("all", "all_except_columns")
599
+
573
600
 
601
+ def parse_imports(session: snowpark.Session, imports: str | None) -> None:
602
+ if not imports:
603
+ return
574
604
 
575
- def auto_uppercase_ddl() -> bool:
576
- return get_boolean_session_config_param("snowpark.connect.auto-uppercase.ddl")
605
+ for udf_import in imports.strip("[] ").split(","):
606
+ session.add_import(udf_import)
@@ -0,0 +1,242 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import TYPE_CHECKING, Callable
8
+
9
+ from snowflake import snowpark
10
+ from snowflake.snowpark.types import StructField, StructType
11
+
12
+ if TYPE_CHECKING:
13
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
14
+
15
+
16
+ class DataFrameContainer:
17
+ """
18
+ A container class that wraps a Snowpark DataFrame along with additional metadata.
19
+
20
+ This class provides a unified interface for managing Snowpark DataFrames along with
21
+ their column mappings, schema information, and metadata.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ dataframe: snowpark.DataFrame,
27
+ column_map: ColumnNameMap | None = None,
28
+ table_name: str | None = None,
29
+ alias: str | None = None,
30
+ cached_schema_getter: Callable[[], StructType] | None = None,
31
+ ) -> None:
32
+ """
33
+ Initialize a new DataFrameContainer.
34
+
35
+ Args:
36
+ dataframe: The underlying Snowpark DataFrame
37
+ column_map: Optional column name mapping
38
+ table_name: Optional table name for the DataFrame
39
+ alias: Optional alias for the DataFrame
40
+ cached_schema_getter: Optional function to get cached schema
41
+ """
42
+ self._dataframe = dataframe
43
+ self._column_map = self._create_default_column_map(column_map)
44
+ self._table_name = table_name
45
+ self._alias = alias
46
+
47
+ if cached_schema_getter is not None:
48
+ self._apply_cached_schema_getter(cached_schema_getter)
49
+
50
+ @classmethod
51
+ def create_with_column_mapping(
52
+ cls,
53
+ dataframe: snowpark.DataFrame,
54
+ spark_column_names: list[str],
55
+ snowpark_column_names: list[str],
56
+ snowpark_column_types: list | None = None,
57
+ column_metadata: dict | None = None,
58
+ column_qualifiers: list[list[str]] | None = None,
59
+ parent_column_name_map: ColumnNameMap | None = None,
60
+ table_name: str | None = None,
61
+ alias: str | None = None,
62
+ cached_schema_getter: Callable[[], StructType] | None = None,
63
+ ) -> DataFrameContainer:
64
+ """
65
+ Create a new container with complete column mapping configuration.
66
+
67
+ Args:
68
+ dataframe: The underlying Snowpark DataFrame
69
+ spark_column_names: List of Spark column names
70
+ snowpark_column_names: List of corresponding Snowpark column names
71
+ snowpark_column_types: Optional list of column types
72
+ column_metadata: Optional metadata dictionary
73
+ column_qualifiers: Optional column qualifiers
74
+ parent_column_name_map: Optional parent column name map
75
+ table_name: Optional table name
76
+ alias: Optional alias
77
+ cached_schema_getter: Optional function to get cached schema
78
+
79
+ Returns:
80
+ A new DataFrameContainer instance
81
+
82
+ Raises:
83
+ AssertionError: If column names and types don't match expected lengths
84
+ """
85
+ # Validate inputs
86
+ cls._validate_column_mapping_inputs(
87
+ spark_column_names, snowpark_column_names, snowpark_column_types
88
+ )
89
+
90
+ column_map = cls._create_column_map(
91
+ spark_column_names,
92
+ snowpark_column_names,
93
+ column_metadata,
94
+ column_qualifiers,
95
+ parent_column_name_map,
96
+ )
97
+
98
+ # Determine the schema getter to use
99
+ final_schema_getter = None
100
+
101
+ if cached_schema_getter is not None:
102
+ # Use the provided schema getter
103
+ final_schema_getter = cached_schema_getter
104
+ elif snowpark_column_types is not None:
105
+ # Create schema from types and wrap in function
106
+ schema = cls._create_schema_from_types(
107
+ snowpark_column_names, snowpark_column_types
108
+ )
109
+ if schema is not None:
110
+
111
+ def get_schema():
112
+ return schema
113
+
114
+ final_schema_getter = get_schema
115
+
116
+ return cls(
117
+ dataframe=dataframe,
118
+ column_map=column_map,
119
+ table_name=table_name,
120
+ alias=alias,
121
+ cached_schema_getter=final_schema_getter,
122
+ )
123
+
124
+ @property
125
+ def dataframe(self) -> snowpark.DataFrame:
126
+ """Get the underlying Snowpark DataFrame."""
127
+ # Ensure the DataFrame has the _column_map attribute for backward compatibility
128
+ # Some of the snowpark code needs references to _column_map
129
+ self._dataframe._column_map = self._column_map
130
+ return self._dataframe
131
+
132
+ @property
133
+ def column_map(self) -> ColumnNameMap:
134
+ """Get the column name mapping."""
135
+ return self._column_map
136
+
137
+ @column_map.setter
138
+ def column_map(self, value: ColumnNameMap) -> None:
139
+ """Set the column name mapping."""
140
+ self._column_map = value
141
+
142
+ @property
143
+ def table_name(self) -> str | None:
144
+ """Get the table name."""
145
+ return self._table_name
146
+
147
+ @table_name.setter
148
+ def table_name(self, value: str | None) -> None:
149
+ """Set the table name."""
150
+ self._table_name = value
151
+
152
+ @property
153
+ def alias(self) -> str | None:
154
+ """Get the alias name."""
155
+ return self._alias
156
+
157
+ @alias.setter
158
+ def alias(self, value: str | None) -> None:
159
+ """Set the alias name."""
160
+ self._alias = value
161
+
162
+ def _create_default_column_map(
163
+ self, column_map: ColumnNameMap | None
164
+ ) -> ColumnNameMap:
165
+ """Create a default column map if none provided."""
166
+ if column_map is not None:
167
+ return column_map
168
+
169
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
170
+
171
+ return ColumnNameMap([], [])
172
+
173
+ def _apply_cached_schema_getter(
174
+ self, schema_getter: Callable[[], StructType]
175
+ ) -> None:
176
+ """Apply a cached schema getter to the dataframe."""
177
+ from snowflake.snowpark_connect.column_name_handler import set_schema_getter
178
+
179
+ set_schema_getter(self._dataframe, schema_getter)
180
+
181
+ @staticmethod
182
+ def _validate_column_mapping_inputs(
183
+ spark_column_names: list[str],
184
+ snowpark_column_names: list[str],
185
+ snowpark_column_types: list | None = None,
186
+ ) -> None:
187
+ """
188
+ Validate inputs for column mapping creation.
189
+
190
+ Raises:
191
+ AssertionError: If validation fails
192
+ """
193
+ assert len(snowpark_column_names) == len(
194
+ spark_column_names
195
+ ), "Number of Spark column names must match number of columns in DataFrame"
196
+
197
+ if snowpark_column_types is not None:
198
+ assert len(snowpark_column_names) == len(
199
+ snowpark_column_types
200
+ ), "Number of Snowpark column names and types must match"
201
+
202
+ @staticmethod
203
+ def _create_column_map(
204
+ spark_column_names: list[str],
205
+ snowpark_column_names: list[str],
206
+ column_metadata: dict | None = None,
207
+ column_qualifiers: list[list[str]] | None = None,
208
+ parent_column_name_map: ColumnNameMap | None = None,
209
+ ) -> ColumnNameMap:
210
+ """Create a ColumnNameMap with the provided configuration."""
211
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
212
+
213
+ return ColumnNameMap(
214
+ spark_column_names,
215
+ snowpark_column_names,
216
+ column_metadata=column_metadata,
217
+ column_qualifiers=column_qualifiers,
218
+ parent_column_name_map=parent_column_name_map,
219
+ )
220
+
221
+ @staticmethod
222
+ def _create_schema_from_types(
223
+ snowpark_column_names: list[str],
224
+ snowpark_column_types: list | None,
225
+ ) -> StructType | None:
226
+ """
227
+ Create a StructType schema from column names and types.
228
+
229
+ Returns:
230
+ StructType if types are provided, None otherwise
231
+ """
232
+ if snowpark_column_types is None:
233
+ return None
234
+
235
+ return StructType(
236
+ [
237
+ StructField(name, column_type, _is_column=False)
238
+ for name, column_type in zip(
239
+ snowpark_column_names, snowpark_column_types
240
+ )
241
+ ]
242
+ )
@@ -28,7 +28,9 @@ from pyspark.errors.exceptions.base import (
28
28
  PySparkException,
29
29
  PythonException,
30
30
  SparkRuntimeException,
31
+ UnsupportedOperationException,
31
32
  )
33
+ from pyspark.errors.exceptions.connect import SparkConnectGrpcException
32
34
  from snowflake.core.exceptions import NotFoundError
33
35
 
34
36
  from snowflake.connector.errors import ProgrammingError
@@ -49,7 +51,9 @@ SPARK_PYTHON_TO_JAVA_EXCEPTION = {
49
51
  ArrayIndexOutOfBoundsException: "java.lang.ArrayIndexOutOfBoundsException",
50
52
  NumberFormatException: "java.lang.NumberFormatException",
51
53
  SparkRuntimeException: "org.apache.spark.SparkRuntimeException",
54
+ SparkConnectGrpcException: "pyspark.errors.exceptions.connect.SparkConnectGrpcException",
52
55
  PythonException: "org.apache.spark.api.python.PythonException",
56
+ UnsupportedOperationException: "java.lang.UnsupportedOperationException",
53
57
  }
54
58
 
55
59
  WINDOW_FUNCTION_ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {1005, 2303}
@@ -68,6 +72,9 @@ init_multi_args_exception_pattern = (
68
72
  terminate_multi_args_exception_pattern = (
69
73
  r"terminate\(\) missing \d+ required positional argument"
70
74
  )
75
+ snowpark_connect_exception_pattern = re.compile(
76
+ r"\[snowpark-connect-exception(?::(\w+))?\]\s*(.+?)'\s*is not recognized"
77
+ )
71
78
 
72
79
 
73
80
  def contains_udtf_select(sql_string):
@@ -100,6 +107,19 @@ def _get_converted_known_sql_or_custom_exception(
100
107
  return SparkRuntimeException(
101
108
  message="Unexpected value for start in function slice: SQL array indices start at 1."
102
109
  )
110
+ match = snowpark_connect_exception_pattern.search(
111
+ ex.message if hasattr(ex, "message") else str(ex)
112
+ )
113
+ if match:
114
+ class_name = match.group(1)
115
+ message = match.group(2)
116
+ exception_class = (
117
+ globals().get(class_name, SparkConnectGrpcException)
118
+ if class_name
119
+ else SparkConnectGrpcException
120
+ )
121
+ return exception_class(message=message)
122
+
103
123
  if "select with no columns" in msg and contains_udtf_select(query):
104
124
  # We try our best to detect if the SQL string contains a UDTF call and the output schema is empty.
105
125
  return PythonException(message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}")
@@ -131,6 +151,11 @@ def _get_converted_known_sql_or_custom_exception(
131
151
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the terminate method: {ex.message}"
132
152
  )
133
153
 
154
+ if "failed to split string, provided pattern:" in msg:
155
+ return IllegalArgumentException(
156
+ message=f"Failed to split string using provided pattern. {ex.message}"
157
+ )
158
+
134
159
  if "100357" in msg and "wrong tuple size for returned value" in msg:
135
160
  return PythonException(
136
161
  message=f"[UDTF_RETURN_SCHEMA_MISMATCH] The number of columns in the result does not match the specified schema. {ex.message}"