snowpark-connect 0.20.2__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (67) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +28 -14
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  6. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  7. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  8. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  9. snowflake/snowpark_connect/expression/map_unresolved_function.py +279 -43
  10. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  11. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  12. snowflake/snowpark_connect/expression/typer.py +6 -6
  13. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  14. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  15. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  16. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  17. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  18. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  19. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  20. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  21. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  22. snowflake/snowpark_connect/relation/map_aggregate.py +72 -47
  23. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  24. snowflake/snowpark_connect/relation/map_column_ops.py +207 -144
  25. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  26. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  27. snowflake/snowpark_connect/relation/map_join.py +72 -63
  28. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  29. snowflake/snowpark_connect/relation/map_map_partitions.py +21 -16
  30. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  31. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  32. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  33. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  34. snowflake/snowpark_connect/relation/map_sql.py +155 -78
  35. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  36. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  37. snowflake/snowpark_connect/relation/map_udtf.py +6 -9
  38. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  39. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  40. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  41. snowflake/snowpark_connect/relation/read/map_read_json.py +7 -7
  42. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  43. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  44. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  45. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  46. snowflake/snowpark_connect/relation/utils.py +11 -5
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  48. snowflake/snowpark_connect/relation/write/map_write.py +199 -40
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  50. snowflake/snowpark_connect/server.py +34 -4
  51. snowflake/snowpark_connect/type_mapping.py +2 -23
  52. snowflake/snowpark_connect/utils/cache.py +27 -22
  53. snowflake/snowpark_connect/utils/context.py +33 -17
  54. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  55. snowflake/snowpark_connect/utils/session.py +41 -34
  56. snowflake/snowpark_connect/utils/telemetry.py +1 -2
  57. snowflake/snowpark_connect/version.py +1 -1
  58. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/METADATA +5 -3
  59. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/RECORD +67 -64
  60. snowpark_connect-0.21.0.dist-info/licenses/LICENSE-binary +568 -0
  61. snowpark_connect-0.21.0.dist-info/licenses/NOTICE-binary +1533 -0
  62. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-connect +0 -0
  63. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-session +0 -0
  64. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-submit +0 -0
  65. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/WHEEL +0 -0
  66. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
  67. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/top_level.txt +0 -0
@@ -15,8 +15,9 @@ def map_tree_string(
15
15
  ) -> proto_base.AnalyzePlanResponse:
16
16
  # TODO: tracking the difference with pyspark in SNOW-1853347
17
17
  tree_string = request.tree_string
18
- snowpark_df = map_relation(tree_string.plan.root)
19
- column_map = snowpark_df._column_map
18
+ snowpark_df_container = map_relation(tree_string.plan.root)
19
+ snowpark_df = snowpark_df_container.dataframe
20
+ column_map = snowpark_df_container.column_map
20
21
 
21
22
  snowpark_tree_string = snowpark_df._format_schema(
22
23
  level=tree_string.level if tree_string.HasField("level") else None,
@@ -12,14 +12,13 @@ from functools import cached_property
12
12
 
13
13
  from pyspark.errors.exceptions.base import AnalysisException
14
14
 
15
- from snowflake import snowpark
16
15
  from snowflake.snowpark import DataFrame
17
16
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
18
17
  quote_name_without_upper_casing,
19
18
  unquote_if_quoted,
20
19
  )
21
20
  from snowflake.snowpark._internal.utils import quote_name
22
- from snowflake.snowpark.types import DataType, StructField, StructType
21
+ from snowflake.snowpark.types import StructType
23
22
  from snowflake.snowpark_connect.config import global_config
24
23
  from snowflake.snowpark_connect.utils.context import get_current_operation_scope
25
24
 
@@ -41,64 +40,6 @@ def set_schema_getter(df: DataFrame, get_schema: Callable[[], StructType]) -> No
41
40
  df.__class__ = PatchedDataFrame
42
41
 
43
42
 
44
- def with_column_map(
45
- result_df: snowpark.DataFrame,
46
- spark_column_names: list[str],
47
- snowpark_column_names: list[str],
48
- snowpark_column_types: list[DataType] = None,
49
- column_metadata: dict | None = None,
50
- column_qualifiers: list[list[str]] | None = None,
51
- parent_column_name_map: ColumnNameMap | None = None,
52
- ) -> snowpark.DataFrame:
53
- """
54
- Build a mapping from the DataFrame's column names to the Spark column names.
55
-
56
- This is used to track the original column names and handle column naming differences
57
- between Spark and Snowpark.
58
-
59
- The elements in result_df.columns and the elements in spark_column_names must be a one-to-one mapping.
60
-
61
- Args:
62
- result_df (snowpark.DataFrame): The DataFrame to map.
63
- spark_column_names (list[str]): The Spark column names.
64
- snowpark_column_names (list[str]): The Snowpark column names.
65
- snowpark_column_types (list[DataType], optional): The Snowpark column types. **if provided df.schema will be overridden with inferred schema**
66
- column_metadata (dict, optional): Metadata for the columns.
67
- column_qualifiers (list[list[str]], optional): Qualifiers for the columns, used to handle table aliases or DataFrame aliases.
68
- parent_column_name_map (ColumnNameMap, optional): A ColumnNameMap, that came from the dataframe used to create result_df (parent df)
69
-
70
- Returns:
71
- snowpark.DataFrame: The mapped DataFrame.
72
- """
73
- assert len(snowpark_column_names) == len(
74
- spark_column_names
75
- ), "Number of Spark column names must match number of columns in DataFrame"
76
- result_df._column_map = ColumnNameMap(
77
- spark_column_names,
78
- snowpark_column_names,
79
- column_metadata=column_metadata,
80
- column_qualifiers=column_qualifiers,
81
- parent_column_name_map=parent_column_name_map,
82
- )
83
- result_df._table_name = None
84
-
85
- if snowpark_column_types is not None:
86
- assert len(snowpark_column_names) == len(
87
- snowpark_column_types
88
- ), "Number of Snowpark column names and types must match"
89
-
90
- set_schema_getter(
91
- result_df,
92
- lambda: StructType(
93
- [
94
- StructField(n, t, _is_column=False)
95
- for n, t in zip(snowpark_column_names, snowpark_column_types)
96
- ]
97
- ),
98
- )
99
- return result_df
100
-
101
-
102
43
  def make_column_names_snowpark_compatible(
103
44
  names: list[str], plan_id: int, offset: int = 0
104
45
  ) -> list[str]:
@@ -189,7 +130,7 @@ class ColumnNameMap:
189
130
  column_qualifiers: Optional qualifiers for the columns, used to handle table aliases or DataFrame aliases.
190
131
  parent_column_name_map: parent ColumnNameMap
191
132
  """
192
- self.columns = []
133
+ self.columns: list[ColumnNames] = []
193
134
  self.spark_to_col = defaultdict(list)
194
135
  self.uppercase_spark_to_col = defaultdict(list)
195
136
  self.snowpark_to_col = defaultdict(list)
@@ -602,11 +543,11 @@ class ColumnNameMap:
602
543
  class JoinColumnNameMap(ColumnNameMap):
603
544
  def __init__(
604
545
  self,
605
- left_input: snowpark.DataFrame,
606
- right_input: snowpark.DataFrame,
546
+ left_colmap: ColumnNameMap,
547
+ right_colmap: ColumnNameMap,
607
548
  ) -> None:
608
- self.left_column_mapping: ColumnNameMap = left_input._column_map
609
- self.right_column_mapping: ColumnNameMap = right_input._column_map
549
+ self.left_column_mapping: ColumnNameMap = left_colmap
550
+ self.right_column_mapping: ColumnNameMap = right_colmap
610
551
 
611
552
  def get_snowpark_column_name_from_spark_column_name(
612
553
  self,
@@ -9,7 +9,7 @@ import re
9
9
  import sys
10
10
  import time
11
11
  from collections import defaultdict
12
- from copy import copy
12
+ from copy import copy, deepcopy
13
13
  from typing import Any
14
14
 
15
15
  import jpype
@@ -33,7 +33,7 @@ from snowflake.snowpark_connect.version import VERSION as sas_version
33
33
 
34
34
 
35
35
  def str_to_bool(boolean_str: str) -> bool:
36
- assert boolean_str in [
36
+ assert boolean_str in (
37
37
  "True",
38
38
  "true",
39
39
  "False",
@@ -41,7 +41,7 @@ def str_to_bool(boolean_str: str) -> bool:
41
41
  "1",
42
42
  "0",
43
43
  "", # This is the default value, equivalent to False.
44
- ], f"Invalid boolean value: {boolean_str}"
44
+ ), f"Invalid boolean value: {boolean_str}"
45
45
  return boolean_str in ["True", "true", "1"]
46
46
 
47
47
 
@@ -131,6 +131,7 @@ class GlobalConfig:
131
131
  "spark.sql.caseSensitive": "false",
132
132
  "spark.sql.mapKeyDedupPolicy": "EXCEPTION",
133
133
  "spark.sql.ansi.enabled": "false",
134
+ "spark.sql.legacy.allowHashOnMapType": "false",
134
135
  "spark.sql.sources.default": "parquet",
135
136
  "spark.Catalog.databaseFilterInformationSchema": "false",
136
137
  "spark.sql.parser.quotedRegexColumnNames": "false",
@@ -145,6 +146,7 @@ class GlobalConfig:
145
146
  "spark.sql.crossJoin.enabled",
146
147
  "spark.sql.caseSensitive",
147
148
  "spark.sql.ansi.enabled",
149
+ "spark.sql.legacy.allowHashOnMapType",
148
150
  "spark.Catalog.databaseFilterInformationSchema",
149
151
  "spark.sql.parser.quotedRegexColumnNames",
150
152
  ]
@@ -250,10 +252,10 @@ SESSION_CONFIG_KEY_WHITELIST = {
250
252
  "spark.sql.tvf.allowMultipleTableArguments.enabled",
251
253
  "snowpark.connect.sql.passthrough",
252
254
  "snowpark.connect.iceberg.external_volume",
253
- "snowpark.connect.auto-uppercase.ddl",
254
- "snowpark.connect.auto-uppercase.dml",
255
+ "snowpark.connect.sql.identifiers.auto-uppercase",
255
256
  "snowpark.connect.udtf.compatibility_mode",
256
257
  "snowpark.connect.views.duplicate_column_names_handling_mode",
258
+ "enable_snowflake_extension_behavior",
257
259
  }
258
260
  AZURE_SAS_KEY = re.compile(
259
261
  r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
@@ -271,17 +273,17 @@ class SessionConfig:
271
273
  """This class contains the session configuration for the Spark Server."""
272
274
 
273
275
  default_session_config = {
274
- "snowpark.connect.auto-uppercase.ddl": "true",
275
- "snowpark.connect.auto-uppercase.dml": "true",
276
+ "snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
276
277
  "snowpark.connect.sql.passthrough": "false",
277
278
  "snowpark.connect.udtf.compatibility_mode": "false",
278
279
  "snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
279
280
  "spark.sql.execution.pythonUDTF.arrow.enabled": "false",
280
281
  "spark.sql.tvf.allowMultipleTableArguments.enabled": "true",
282
+ "enable_snowflake_extension_behavior": "false",
281
283
  }
282
284
 
283
285
  def __init__(self) -> None:
284
- self.config = copy(self.default_session_config)
286
+ self.config = deepcopy(self.default_session_config)
285
287
 
286
288
  def __getitem__(self, item: str) -> str:
287
289
  return self.get(item)
@@ -304,7 +306,13 @@ CONFIG_ALLOWED_VALUES: dict[str, tuple] = {
304
306
  "rename",
305
307
  "fail",
306
308
  "drop",
307
- )
309
+ ),
310
+ "snowpark.connect.sql.identifiers.auto-uppercase": (
311
+ "all_except_columns",
312
+ "only_columns",
313
+ "all",
314
+ "none",
315
+ ),
308
316
  }
309
317
 
310
318
  # Set some default configuration that are necessary for the driver.
@@ -533,7 +541,7 @@ def set_snowflake_parameters(
533
541
  value = global_config.default_static_global_config.get(key)
534
542
 
535
543
  snowpark_name = quote_name_without_upper_casing(value)
536
- if auto_uppercase_ddl():
544
+ if auto_uppercase_non_column_identifiers():
537
545
  snowpark_name = snowpark_name.upper()
538
546
 
539
547
  # Create the schema on demand. Before creating it, however,
@@ -568,9 +576,15 @@ def get_boolean_session_config_param(name: str) -> bool:
568
576
  return str_to_bool(session_config[name])
569
577
 
570
578
 
571
- def auto_uppercase_dml() -> bool:
572
- return get_boolean_session_config_param("snowpark.connect.auto-uppercase.dml")
579
+ def auto_uppercase_column_identifiers() -> bool:
580
+ session_config = sessions_config[get_session_id()]
581
+ return session_config[
582
+ "snowpark.connect.sql.identifiers.auto-uppercase"
583
+ ].lower() in ("all", "only_columns")
573
584
 
574
585
 
575
- def auto_uppercase_ddl() -> bool:
576
- return get_boolean_session_config_param("snowpark.connect.auto-uppercase.ddl")
586
+ def auto_uppercase_non_column_identifiers() -> bool:
587
+ session_config = sessions_config[get_session_id()]
588
+ return session_config[
589
+ "snowpark.connect.sql.identifiers.auto-uppercase"
590
+ ].lower() in ("all", "all_except_columns")
@@ -0,0 +1,242 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import TYPE_CHECKING, Callable
8
+
9
+ from snowflake import snowpark
10
+ from snowflake.snowpark.types import StructField, StructType
11
+
12
+ if TYPE_CHECKING:
13
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
14
+
15
+
16
+ class DataFrameContainer:
17
+ """
18
+ A container class that wraps a Snowpark DataFrame along with additional metadata.
19
+
20
+ This class provides a unified interface for managing Snowpark DataFrames along with
21
+ their column mappings, schema information, and metadata.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ dataframe: snowpark.DataFrame,
27
+ column_map: ColumnNameMap | None = None,
28
+ table_name: str | None = None,
29
+ alias: str | None = None,
30
+ cached_schema_getter: Callable[[], StructType] | None = None,
31
+ ) -> None:
32
+ """
33
+ Initialize a new DataFrameContainer.
34
+
35
+ Args:
36
+ dataframe: The underlying Snowpark DataFrame
37
+ column_map: Optional column name mapping
38
+ table_name: Optional table name for the DataFrame
39
+ alias: Optional alias for the DataFrame
40
+ cached_schema_getter: Optional function to get cached schema
41
+ """
42
+ self._dataframe = dataframe
43
+ self._column_map = self._create_default_column_map(column_map)
44
+ self._table_name = table_name
45
+ self._alias = alias
46
+
47
+ if cached_schema_getter is not None:
48
+ self._apply_cached_schema_getter(cached_schema_getter)
49
+
50
+ @classmethod
51
+ def create_with_column_mapping(
52
+ cls,
53
+ dataframe: snowpark.DataFrame,
54
+ spark_column_names: list[str],
55
+ snowpark_column_names: list[str],
56
+ snowpark_column_types: list | None = None,
57
+ column_metadata: dict | None = None,
58
+ column_qualifiers: list[list[str]] | None = None,
59
+ parent_column_name_map: ColumnNameMap | None = None,
60
+ table_name: str | None = None,
61
+ alias: str | None = None,
62
+ cached_schema_getter: Callable[[], StructType] | None = None,
63
+ ) -> DataFrameContainer:
64
+ """
65
+ Create a new container with complete column mapping configuration.
66
+
67
+ Args:
68
+ dataframe: The underlying Snowpark DataFrame
69
+ spark_column_names: List of Spark column names
70
+ snowpark_column_names: List of corresponding Snowpark column names
71
+ snowpark_column_types: Optional list of column types
72
+ column_metadata: Optional metadata dictionary
73
+ column_qualifiers: Optional column qualifiers
74
+ parent_column_name_map: Optional parent column name map
75
+ table_name: Optional table name
76
+ alias: Optional alias
77
+ cached_schema_getter: Optional function to get cached schema
78
+
79
+ Returns:
80
+ A new DataFrameContainer instance
81
+
82
+ Raises:
83
+ AssertionError: If column names and types don't match expected lengths
84
+ """
85
+ # Validate inputs
86
+ cls._validate_column_mapping_inputs(
87
+ spark_column_names, snowpark_column_names, snowpark_column_types
88
+ )
89
+
90
+ column_map = cls._create_column_map(
91
+ spark_column_names,
92
+ snowpark_column_names,
93
+ column_metadata,
94
+ column_qualifiers,
95
+ parent_column_name_map,
96
+ )
97
+
98
+ # Determine the schema getter to use
99
+ final_schema_getter = None
100
+
101
+ if cached_schema_getter is not None:
102
+ # Use the provided schema getter
103
+ final_schema_getter = cached_schema_getter
104
+ elif snowpark_column_types is not None:
105
+ # Create schema from types and wrap in function
106
+ schema = cls._create_schema_from_types(
107
+ snowpark_column_names, snowpark_column_types
108
+ )
109
+ if schema is not None:
110
+
111
+ def get_schema():
112
+ return schema
113
+
114
+ final_schema_getter = get_schema
115
+
116
+ return cls(
117
+ dataframe=dataframe,
118
+ column_map=column_map,
119
+ table_name=table_name,
120
+ alias=alias,
121
+ cached_schema_getter=final_schema_getter,
122
+ )
123
+
124
+ @property
125
+ def dataframe(self) -> snowpark.DataFrame:
126
+ """Get the underlying Snowpark DataFrame."""
127
+ # Ensure the DataFrame has the _column_map attribute for backward compatibility
128
+ # Some of the snowpark code needs references to _column_map
129
+ self._dataframe._column_map = self._column_map
130
+ return self._dataframe
131
+
132
+ @property
133
+ def column_map(self) -> ColumnNameMap:
134
+ """Get the column name mapping."""
135
+ return self._column_map
136
+
137
+ @column_map.setter
138
+ def column_map(self, value: ColumnNameMap) -> None:
139
+ """Set the column name mapping."""
140
+ self._column_map = value
141
+
142
+ @property
143
+ def table_name(self) -> str | None:
144
+ """Get the table name."""
145
+ return self._table_name
146
+
147
+ @table_name.setter
148
+ def table_name(self, value: str | None) -> None:
149
+ """Set the table name."""
150
+ self._table_name = value
151
+
152
+ @property
153
+ def alias(self) -> str | None:
154
+ """Get the alias name."""
155
+ return self._alias
156
+
157
+ @alias.setter
158
+ def alias(self, value: str | None) -> None:
159
+ """Set the alias name."""
160
+ self._alias = value
161
+
162
+ def _create_default_column_map(
163
+ self, column_map: ColumnNameMap | None
164
+ ) -> ColumnNameMap:
165
+ """Create a default column map if none provided."""
166
+ if column_map is not None:
167
+ return column_map
168
+
169
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
170
+
171
+ return ColumnNameMap([], [])
172
+
173
+ def _apply_cached_schema_getter(
174
+ self, schema_getter: Callable[[], StructType]
175
+ ) -> None:
176
+ """Apply a cached schema getter to the dataframe."""
177
+ from snowflake.snowpark_connect.column_name_handler import set_schema_getter
178
+
179
+ set_schema_getter(self._dataframe, schema_getter)
180
+
181
+ @staticmethod
182
+ def _validate_column_mapping_inputs(
183
+ spark_column_names: list[str],
184
+ snowpark_column_names: list[str],
185
+ snowpark_column_types: list | None = None,
186
+ ) -> None:
187
+ """
188
+ Validate inputs for column mapping creation.
189
+
190
+ Raises:
191
+ AssertionError: If validation fails
192
+ """
193
+ assert len(snowpark_column_names) == len(
194
+ spark_column_names
195
+ ), "Number of Spark column names must match number of columns in DataFrame"
196
+
197
+ if snowpark_column_types is not None:
198
+ assert len(snowpark_column_names) == len(
199
+ snowpark_column_types
200
+ ), "Number of Snowpark column names and types must match"
201
+
202
+ @staticmethod
203
+ def _create_column_map(
204
+ spark_column_names: list[str],
205
+ snowpark_column_names: list[str],
206
+ column_metadata: dict | None = None,
207
+ column_qualifiers: list[list[str]] | None = None,
208
+ parent_column_name_map: ColumnNameMap | None = None,
209
+ ) -> ColumnNameMap:
210
+ """Create a ColumnNameMap with the provided configuration."""
211
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
212
+
213
+ return ColumnNameMap(
214
+ spark_column_names,
215
+ snowpark_column_names,
216
+ column_metadata=column_metadata,
217
+ column_qualifiers=column_qualifiers,
218
+ parent_column_name_map=parent_column_name_map,
219
+ )
220
+
221
+ @staticmethod
222
+ def _create_schema_from_types(
223
+ snowpark_column_names: list[str],
224
+ snowpark_column_types: list | None,
225
+ ) -> StructType | None:
226
+ """
227
+ Create a StructType schema from column names and types.
228
+
229
+ Returns:
230
+ StructType if types are provided, None otherwise
231
+ """
232
+ if snowpark_column_types is None:
233
+ return None
234
+
235
+ return StructType(
236
+ [
237
+ StructField(name, column_type, _is_column=False)
238
+ for name, column_type in zip(
239
+ snowpark_column_names, snowpark_column_types
240
+ )
241
+ ]
242
+ )
@@ -1,21 +1,13 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
5
4
  from collections import Counter
6
5
 
7
6
  import pyspark.sql.connect.proto.base_pb2 as proto_base
8
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
9
8
 
10
- from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
- quote_name_without_upper_casing,
12
- )
13
9
  from snowflake.snowpark_connect.column_name_handler import ColumnNames
14
- from snowflake.snowpark_connect.config import (
15
- auto_uppercase_ddl,
16
- global_config,
17
- sessions_config,
18
- )
10
+ from snowflake.snowpark_connect.config import global_config, sessions_config
19
11
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
20
12
  from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
21
13
  from snowflake.snowpark_connect.expression import map_udf
@@ -24,24 +16,23 @@ from snowflake.snowpark_connect.relation.map_relation import map_relation
24
16
  from snowflake.snowpark_connect.relation.map_sql import map_sql_to_pandas_df
25
17
  from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
26
18
  from snowflake.snowpark_connect.utils.context import get_session_id
19
+ from snowflake.snowpark_connect.utils.identifiers import (
20
+ spark_to_sf_single_id,
21
+ spark_to_sf_single_id_with_unquoting,
22
+ )
27
23
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
28
24
  from snowflake.snowpark_connect.utils.telemetry import (
29
25
  SnowparkConnectNotImplementedError,
30
26
  )
31
27
 
32
28
 
33
- def _spark_to_snowflake_single_id(name: str) -> str:
34
- name = quote_name_without_upper_casing(name)
35
- return name.upper() if auto_uppercase_ddl() else name
36
-
37
-
38
29
  def _create_column_rename_map(
39
30
  columns: list[ColumnNames], rename_duplicated: bool
40
31
  ) -> dict:
41
32
  if rename_duplicated is False:
42
33
  # if we are not renaming duplicated columns, we can just return the original names
43
34
  return {
44
- col.snowpark_name: _spark_to_snowflake_single_id(col.spark_name)
35
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
45
36
  for col in columns
46
37
  }
47
38
 
@@ -64,7 +55,7 @@ def _create_column_rename_map(
64
55
 
65
56
  if len(renamed_cols) == 0:
66
57
  return {
67
- col.snowpark_name: _spark_to_snowflake_single_id(col.spark_name)
58
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
68
59
  for col in not_renamed_cols
69
60
  }
70
61
 
@@ -95,12 +86,9 @@ def map_execution_command(
95
86
  match request.plan.command.WhichOneof("command_type"):
96
87
  case "create_dataframe_view":
97
88
  req = request.plan.command.create_dataframe_view
98
- input_df = map_relation(req.input)
99
- # Use real column names when writing to sf.
100
- assert hasattr(
101
- input_df, "_column_map"
102
- ), "input_df does not have the _column_map attribute"
103
- column_map = input_df._column_map
89
+ input_df_container = map_relation(req.input)
90
+ input_df = input_df_container.dataframe
91
+ column_map = input_df_container.column_map
104
92
 
105
93
  session_config = sessions_config[get_session_id()]
106
94
  duplicate_column_names_handling_mode = session_config[
@@ -133,7 +121,9 @@ def map_execution_command(
133
121
  view_name = [global_config.spark_sql_globalTempDatabase, req.name]
134
122
  else:
135
123
  view_name = [req.name]
136
- view_name = [_spark_to_snowflake_single_id(part) for part in view_name]
124
+ view_name = [
125
+ spark_to_sf_single_id_with_unquoting(part) for part in view_name
126
+ ]
137
127
 
138
128
  if req.replace:
139
129
  input_df.create_or_replace_temp_view(view_name)
@@ -20,6 +20,7 @@ from snowflake.snowpark._internal.utils import (
20
20
  create_or_update_statement_params_with_query_tag,
21
21
  )
22
22
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
23
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
23
24
  from snowflake.snowpark_connect.execute_plan.utils import (
24
25
  arrow_table_to_arrow_bytes,
25
26
  pandas_to_arrow_batches_bytes,
@@ -89,13 +90,16 @@ def to_arrow_batch_iter(result_df: snowpark.DataFrame) -> Iterator[Table]:
89
90
  def map_execution_root(
90
91
  request: proto_base.ExecutePlanRequest,
91
92
  ) -> Iterator[proto_base.ExecutePlanResponse | QueryResult]:
92
- result_df: snowpark.DataFrame | pandas.DataFrame = map_relation(request.plan.root)
93
+ result: DataFrameContainer | pandas.DataFrame = map_relation(request.plan.root)
94
+ if isinstance(result, pandas.DataFrame):
95
+ result_df = result
96
+ else:
97
+ result_df = result.dataframe
98
+
93
99
  if isinstance(result_df, snowpark.DataFrame):
94
100
  snowpark_schema = result_df.schema
95
- schema = snowpark_to_proto_type(
96
- snowpark_schema, result_df._column_map, result_df
97
- )
98
- spark_columns = result_df._column_map.get_spark_columns()
101
+ schema = snowpark_to_proto_type(snowpark_schema, result.column_map, result_df)
102
+ spark_columns = result.column_map.get_spark_columns()
99
103
  if tcm.TCM_MODE:
100
104
  # TCM result handling:
101
105
  # - small result (only one batch): just return the executePlanResponse
@@ -58,7 +58,8 @@ def map_extension(
58
58
  from snowflake.snowpark_connect.relation.map_relation import map_relation
59
59
 
60
60
  with push_evaluating_sql_scope():
61
- df = map_relation(extension.subquery_expression.input)
61
+ df_container = map_relation(extension.subquery_expression.input)
62
+ df = df_container.dataframe
62
63
 
63
64
  queries = df.queries["queries"]
64
65
  if len(queries) != 1:
@@ -17,15 +17,15 @@ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
17
17
  from snowflake.snowpark_connect.config import global_config
18
18
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
19
19
  from snowflake.snowpark_connect.typed_column import TypedColumn
20
- from snowflake.snowpark_connect.utils.attribute_handling import (
21
- split_fully_qualified_spark_name,
22
- )
23
20
  from snowflake.snowpark_connect.utils.context import (
24
21
  get_is_evaluating_sql,
25
22
  get_outer_dataframes,
26
23
  get_plan_id_map,
27
24
  resolve_lca_alias,
28
25
  )
26
+ from snowflake.snowpark_connect.utils.identifiers import (
27
+ split_fully_qualified_spark_name,
28
+ )
29
29
 
30
30
  SPARK_QUOTED = re.compile("^(`.*`)$", re.DOTALL)
31
31
 
@@ -46,11 +46,12 @@ def map_unresolved_attribute(
46
46
 
47
47
  if has_plan_id:
48
48
  plan_id = exp.unresolved_attribute.plan_id
49
- target_df = get_plan_id_map(plan_id)
49
+ target_df_container = get_plan_id_map(plan_id)
50
+ target_df = target_df_container.dataframe
50
51
  assert (
51
52
  target_df is not None
52
53
  ), f"resolving an attribute of a unresolved dataframe {plan_id}"
53
- column_mapping = target_df._column_map
54
+ column_mapping = target_df_container.column_map
54
55
  typer = ExpressionTyper(target_df)
55
56
 
56
57
  def get_col(snowpark_name):
@@ -146,8 +147,8 @@ def map_unresolved_attribute(
146
147
  name_parts[0], allow_non_exists=True
147
148
  )
148
149
  if snowpark_name is None:
149
- for outer_df in get_outer_dataframes():
150
- snowpark_name = outer_df._column_map.get_snowpark_column_name_from_spark_column_name(
150
+ for outer_df_container in get_outer_dataframes():
151
+ snowpark_name = outer_df_container.column_map.get_snowpark_column_name_from_spark_column_name(
151
152
  name_parts[0], allow_non_exists=True
152
153
  )
153
154
  if snowpark_name is not None: