snowpark-connect 0.27.0__py3-none-any.whl → 0.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (42) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +3 -93
  2. snowflake/snowpark_connect/config.py +99 -1
  3. snowflake/snowpark_connect/dataframe_container.py +0 -6
  4. snowflake/snowpark_connect/execute_plan/map_execution_command.py +31 -68
  5. snowflake/snowpark_connect/expression/map_expression.py +22 -7
  6. snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
  7. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +4 -26
  8. snowflake/snowpark_connect/expression/map_unresolved_function.py +12 -3
  9. snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
  10. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  11. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +207 -20
  12. snowflake/snowpark_connect/relation/map_extension.py +14 -10
  13. snowflake/snowpark_connect/relation/map_join.py +62 -258
  14. snowflake/snowpark_connect/relation/map_relation.py +5 -1
  15. snowflake/snowpark_connect/relation/map_sql.py +464 -68
  16. snowflake/snowpark_connect/relation/read/map_read_table.py +58 -0
  17. snowflake/snowpark_connect/relation/write/map_write.py +228 -120
  18. snowflake/snowpark_connect/resources_initializer.py +20 -5
  19. snowflake/snowpark_connect/server.py +16 -17
  20. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  21. snowflake/snowpark_connect/utils/context.py +21 -0
  22. snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
  23. snowflake/snowpark_connect/utils/identifiers.py +128 -2
  24. snowflake/snowpark_connect/utils/io_utils.py +21 -1
  25. snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
  26. snowflake/snowpark_connect/utils/session.py +16 -26
  27. snowflake/snowpark_connect/utils/telemetry.py +53 -0
  28. snowflake/snowpark_connect/utils/temporary_view_cache.py +61 -0
  29. snowflake/snowpark_connect/utils/udf_utils.py +9 -8
  30. snowflake/snowpark_connect/utils/udtf_utils.py +3 -2
  31. snowflake/snowpark_connect/version.py +1 -1
  32. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/METADATA +2 -2
  33. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/RECORD +41 -41
  34. snowflake/snowpark_connect/hidden_column.py +0 -39
  35. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-connect +0 -0
  36. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-session +0 -0
  37. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-submit +0 -0
  38. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/WHEEL +0 -0
  39. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/LICENSE-binary +0 -0
  40. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/LICENSE.txt +0 -0
  41. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/NOTICE-binary +0 -0
  42. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,6 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
20
20
  from snowflake.snowpark._internal.utils import quote_name
21
21
  from snowflake.snowpark.types import StructType
22
22
  from snowflake.snowpark_connect.config import global_config
23
- from snowflake.snowpark_connect.hidden_column import HiddenColumn
24
23
  from snowflake.snowpark_connect.utils.context import get_current_operation_scope
25
24
  from snowflake.snowpark_connect.utils.identifiers import (
26
25
  split_fully_qualified_spark_name,
@@ -124,7 +123,6 @@ class ColumnNameMap:
124
123
  ] = lambda: global_config.spark_sql_caseSensitive,
125
124
  column_metadata: dict | None = None,
126
125
  column_qualifiers: list[list[str]] | None = None,
127
- hidden_columns: set[HiddenColumn] | None = None,
128
126
  parent_column_name_map: ColumnNameMap | None = None,
129
127
  ) -> None:
130
128
  """
@@ -135,7 +133,6 @@ class ColumnNameMap:
135
133
  The key is the original Spark column name, and the value is the metadata.
136
134
  example: Dict('age', {'foo': 'bar'})
137
135
  column_qualifiers: Optional qualifiers for the columns, used to handle table aliases or DataFrame aliases.
138
- hidden_columns: Optional set of HiddenColumn objects.
139
136
  parent_column_name_map: parent ColumnNameMap
140
137
  """
141
138
  self.columns: list[ColumnNames] = []
@@ -144,7 +141,6 @@ class ColumnNameMap:
144
141
  self.snowpark_to_col = defaultdict(list)
145
142
  self.is_case_sensitive = is_case_sensitive
146
143
  self.column_metadata = column_metadata
147
- self.hidden_columns = hidden_columns
148
144
 
149
145
  # Rename chain dictionary to track column renaming history
150
146
  self.rename_chains: dict[str, str] = {} # old_name -> new_name mapping
@@ -338,8 +334,6 @@ class ColumnNameMap:
338
334
  *,
339
335
  allow_non_exists: bool = False,
340
336
  return_first: bool = False,
341
- is_qualified: bool = False,
342
- source_qualifiers: list[str] | None = None,
343
337
  ) -> str | None:
344
338
  assert isinstance(spark_column_name, str)
345
339
  resolved_name = (
@@ -347,37 +341,9 @@ class ColumnNameMap:
347
341
  if self.rename_chains
348
342
  else spark_column_name
349
343
  )
350
-
351
- # We need to check hidden columns first. We want to avoid the code path
352
- # within get_snowpark_column_names_from_spark_column_names that checks the parent ColumnNameMap.
353
- # This is because that will return the name of the using column that's been dropped from the result
354
- # dataframe. We want to fetch and resolve the hidden column to its visible using column name instead.
355
- # Even if this is an unqualified reference or one to the visible column, it will resolve correctly to
356
- # the visible name anyway.
357
- snowpark_names = []
358
- # Only check hidden columns for qualified references with source qualifiers
359
- if is_qualified and source_qualifiers is not None and self.hidden_columns:
360
- column_name = spark_column_name
361
-
362
- # Check each hidden column for column name AND qualifier match
363
- for hidden_col in self.hidden_columns:
364
- if (
365
- hidden_col.spark_name == column_name
366
- and hidden_col.qualifiers == source_qualifiers
367
- ):
368
- if not global_config.spark_sql_caseSensitive:
369
- if hidden_col.spark_name.upper() == column_name.upper() and [
370
- q.upper() for q in hidden_col.qualifiers
371
- ] == [q.upper() for q in source_qualifiers]:
372
- snowpark_names.append(hidden_col.visible_snowpark_name)
373
- else:
374
- snowpark_names.append(hidden_col.visible_snowpark_name)
375
-
376
- # If not found in hidden columns, proceed with normal lookup
377
- if not snowpark_names:
378
- snowpark_names = self.get_snowpark_column_names_from_spark_column_names(
379
- [resolved_name], return_first
380
- )
344
+ snowpark_names = self.get_snowpark_column_names_from_spark_column_names(
345
+ [resolved_name], return_first
346
+ )
381
347
 
382
348
  snowpark_names_len = len(snowpark_names)
383
349
  if snowpark_names_len > 1:
@@ -464,27 +430,6 @@ class ColumnNameMap:
464
430
  snowpark_columns.append(c.snowpark_name)
465
431
  qualifiers.append(c.qualifiers)
466
432
 
467
- # Note: The following code is commented out because there is a bug with handling duplicate columns in
468
- # qualified select *'s. This needs to be revisited once a solution for that is found.
469
- # TODO: https://snowflakecomputing.atlassian.net/browse/SNOW-2265240
470
-
471
- # # Handles fetching/resolving the hidden columns if they also match the qualifiers
472
- # # This method is only ever called for qualified references, so we need to check hidden columns as well.
473
- # if self.hidden_columns:
474
- # for hidden_col in self.hidden_columns:
475
- # col_qualifiers = (
476
- # [q.upper() for q in hidden_col.qualifiers]
477
- # if not self.is_case_sensitive()
478
- # else hidden_col.qualifiers
479
- # )
480
- # if len(col_qualifiers) < len(qualifiers_input):
481
- # continue
482
- # if col_qualifiers[-len(qualifiers_input) :] == qualifiers_input:
483
- # # This hidden column matches! Add it to the results
484
- # spark_columns.append(hidden_col.spark_name)
485
- # snowpark_columns.append(hidden_col.visible_snowpark_name)
486
- # qualifiers.append(hidden_col.qualifiers)
487
-
488
433
  return spark_columns, snowpark_columns, qualifiers
489
434
 
490
435
  def get_snowpark_columns(self) -> list[str]:
@@ -616,35 +561,6 @@ class ColumnNameMap:
616
561
  else:
617
562
  return spark_name.upper()
618
563
 
619
- def is_hidden_column_reference(
620
- self, spark_column_name: str, source_qualifiers: list[str] | None = None
621
- ) -> bool:
622
- """
623
- Check if a column reference would be resolved through hidden columns.
624
- """
625
- if not self.hidden_columns or source_qualifiers is None:
626
- return False
627
-
628
- # For qualified references with source_qualifiers
629
- column_name = (
630
- spark_column_name # When has_plan_id=True, this is just the column name
631
- )
632
-
633
- for hidden_col in self.hidden_columns:
634
- if (
635
- hidden_col.spark_name == column_name
636
- and hidden_col.qualifiers == source_qualifiers
637
- ):
638
- if not global_config.spark_sql_caseSensitive:
639
- if hidden_col.spark_name.upper() == column_name.upper() and [
640
- q.upper() for q in hidden_col.qualifiers
641
- ] == [q.upper() for q in source_qualifiers]:
642
- return True
643
- else:
644
- return True
645
-
646
- return False
647
-
648
564
 
649
565
  class JoinColumnNameMap(ColumnNameMap):
650
566
  def __init__(
@@ -654,9 +570,6 @@ class JoinColumnNameMap(ColumnNameMap):
654
570
  ) -> None:
655
571
  self.left_column_mapping: ColumnNameMap = left_colmap
656
572
  self.right_column_mapping: ColumnNameMap = right_colmap
657
- # Ensure attributes expected by base-class helpers exist to avoid AttributeError
658
- # when generic code paths (e.g., hidden column checks) touch them.
659
- self.hidden_columns: set[HiddenColumn] | None = None
660
573
 
661
574
  def get_snowpark_column_name_from_spark_column_name(
662
575
  self,
@@ -664,9 +577,6 @@ class JoinColumnNameMap(ColumnNameMap):
664
577
  *,
665
578
  allow_non_exists: bool = False,
666
579
  return_first: bool = False,
667
- # JoinColumnNameMap will never be called with using columns, so these parameters are not used.
668
- is_qualified: bool = False,
669
- source_qualifiers: list[str] | None = None,
670
580
  ) -> str | None:
671
581
  snowpark_column_name_in_left = (
672
582
  self.left_column_mapping.get_snowpark_column_name_from_spark_column_name(
@@ -8,7 +8,7 @@ import re
8
8
  import sys
9
9
  from collections import defaultdict
10
10
  from copy import copy, deepcopy
11
- from typing import Any
11
+ from typing import Any, Dict
12
12
 
13
13
  import jpype
14
14
  import pyspark.sql.connect.proto.base_pb2 as proto_base
@@ -17,6 +17,7 @@ from tzlocal import get_localzone_name
17
17
  from snowflake import snowpark
18
18
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
19
19
  quote_name_without_upper_casing,
20
+ unquote_if_quoted,
20
21
  )
21
22
  from snowflake.snowpark.exceptions import SnowparkSQLException
22
23
  from snowflake.snowpark.types import TimestampTimeZone, TimestampType
@@ -257,6 +258,7 @@ SESSION_CONFIG_KEY_WHITELIST = {
257
258
  "spark.sql.execution.pythonUDTF.arrow.enabled",
258
259
  "spark.sql.tvf.allowMultipleTableArguments.enabled",
259
260
  "snowpark.connect.sql.passthrough",
261
+ "snowpark.connect.cte.optimization_enabled",
260
262
  "snowpark.connect.iceberg.external_volume",
261
263
  "snowpark.connect.sql.identifiers.auto-uppercase",
262
264
  "snowpark.connect.udtf.compatibility_mode",
@@ -281,6 +283,7 @@ class SessionConfig:
281
283
  default_session_config = {
282
284
  "snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
283
285
  "snowpark.connect.sql.passthrough": "false",
286
+ "snowpark.connect.cte.optimization_enabled": "false",
284
287
  "snowpark.connect.udtf.compatibility_mode": "false",
285
288
  "snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
286
289
  "spark.sql.execution.pythonUDTF.arrow.enabled": "false",
@@ -290,6 +293,7 @@ class SessionConfig:
290
293
 
291
294
  def __init__(self) -> None:
292
295
  self.config = deepcopy(self.default_session_config)
296
+ self.table_metadata: Dict[str, Dict[str, Any]] = {}
293
297
 
294
298
  def __getitem__(self, item: str) -> str:
295
299
  return self.get(item)
@@ -569,6 +573,12 @@ def set_snowflake_parameters(
569
573
  snowpark_session.use_database(db)
570
574
  case (prev, curr) if prev != curr:
571
575
  snowpark_session.use_schema(prev)
576
+ case "snowpark.connect.cte.optimization_enabled":
577
+ # Set CTE optimization on the snowpark session
578
+ cte_enabled = str_to_bool(value)
579
+ snowpark_session.cte_optimization_enabled = cte_enabled
580
+ logger.info(f"Updated snowpark session CTE optimization: {cte_enabled}")
581
+
572
582
  case _:
573
583
  pass
574
584
 
@@ -578,6 +588,16 @@ def get_boolean_session_config_param(name: str) -> bool:
578
588
  return str_to_bool(session_config[name])
579
589
 
580
590
 
591
+ def get_string_session_config_param(name: str) -> str:
592
+ session_config = sessions_config[get_session_id()]
593
+ return str(session_config[name])
594
+
595
+
596
+ def get_cte_optimization_enabled() -> bool:
597
+ """Get the CTE optimization configuration setting."""
598
+ return get_boolean_session_config_param("snowpark.connect.cte.optimization_enabled")
599
+
600
+
581
601
  def auto_uppercase_column_identifiers() -> bool:
582
602
  session_config = sessions_config[get_session_id()]
583
603
  return session_config[
@@ -613,3 +633,81 @@ def get_timestamp_type():
613
633
  # shouldn't happen since `spark.sql.timestampType` is always defined, and `spark.conf.unset` sets it to default (TIMESTAMP_LTZ)
614
634
  timestamp_type = TimestampType(TimestampTimeZone.LTZ)
615
635
  return timestamp_type
636
+
637
+
638
+ def record_table_metadata(
639
+ table_identifier: str,
640
+ table_type: str,
641
+ data_source: str,
642
+ supports_column_rename: bool = True,
643
+ ) -> None:
644
+ """
645
+ Record metadata about a table for Spark compatibility checks.
646
+
647
+ Args:
648
+ table_identifier: Full table identifier (catalog.database.table)
649
+ table_type: "v1" or "v2"
650
+ data_source: Source format (parquet, csv, iceberg, etc.)
651
+ supports_column_rename: Whether the table supports RENAME COLUMN
652
+ """
653
+ session_id = get_session_id()
654
+ session_config = sessions_config[session_id]
655
+
656
+ # Normalize table identifier for consistent lookup
657
+ # Use the full catalog.database.table identifier to avoid conflicts
658
+ normalized_identifier = table_identifier.upper().strip('"')
659
+
660
+ session_config.table_metadata[normalized_identifier] = {
661
+ "table_type": table_type,
662
+ "data_source": data_source,
663
+ "supports_column_rename": supports_column_rename,
664
+ }
665
+
666
+
667
+ def get_table_metadata(table_identifier: str) -> Dict[str, Any] | None:
668
+ """
669
+ Get stored metadata for a table.
670
+
671
+ Args:
672
+ table_identifier: Full table identifier (catalog.database.table)
673
+
674
+ Returns:
675
+ Table metadata dict or None if not found
676
+ """
677
+ session_id = get_session_id()
678
+ session_config = sessions_config[session_id]
679
+
680
+ normalized_identifier = unquote_if_quoted(table_identifier).upper()
681
+
682
+ return session_config.table_metadata.get(normalized_identifier)
683
+
684
+
685
+ def check_table_supports_operation(table_identifier: str, operation: str) -> bool:
686
+ """
687
+ Check if a table supports a given operation based on metadata and config.
688
+
689
+ Args:
690
+ table_identifier: Full table identifier (catalog.database.table)
691
+ operation: Operation to check (e.g., "rename_column")
692
+
693
+ Returns:
694
+ True if operation is supported, False if should be blocked
695
+ """
696
+ table_metadata = get_table_metadata(table_identifier)
697
+
698
+ if not table_metadata:
699
+ return True
700
+
701
+ session_id = get_session_id()
702
+ session_config = sessions_config[session_id]
703
+ enable_extensions = str_to_bool(
704
+ session_config.get("enable_snowflake_extension_behavior", "false")
705
+ )
706
+
707
+ if enable_extensions:
708
+ return True
709
+
710
+ if operation == "rename_column":
711
+ return table_metadata.get("supports_column_rename", True)
712
+
713
+ return True
@@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, Callable
8
8
 
9
9
  from snowflake import snowpark
10
10
  from snowflake.snowpark.types import StructField, StructType
11
- from snowflake.snowpark_connect.hidden_column import HiddenColumn
12
11
 
13
12
  if TYPE_CHECKING:
14
13
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
@@ -61,7 +60,6 @@ class DataFrameContainer:
61
60
  column_metadata: dict | None = None,
62
61
  column_qualifiers: list[list[str]] | None = None,
63
62
  parent_column_name_map: ColumnNameMap | None = None,
64
- hidden_columns: set[HiddenColumn] | None = None,
65
63
  table_name: str | None = None,
66
64
  alias: str | None = None,
67
65
  cached_schema_getter: Callable[[], StructType] | None = None,
@@ -78,7 +76,6 @@ class DataFrameContainer:
78
76
  column_metadata: Optional metadata dictionary
79
77
  column_qualifiers: Optional column qualifiers
80
78
  parent_column_name_map: Optional parent column name map
81
- hidden_columns: Optional list of hidden column names
82
79
  table_name: Optional table name
83
80
  alias: Optional alias
84
81
  cached_schema_getter: Optional function to get cached schema
@@ -101,7 +98,6 @@ class DataFrameContainer:
101
98
  column_metadata,
102
99
  column_qualifiers,
103
100
  parent_column_name_map,
104
- hidden_columns,
105
101
  )
106
102
 
107
103
  # Determine the schema getter to use
@@ -226,7 +222,6 @@ class DataFrameContainer:
226
222
  column_metadata: dict | None = None,
227
223
  column_qualifiers: list[list[str]] | None = None,
228
224
  parent_column_name_map: ColumnNameMap | None = None,
229
- hidden_columns: set[HiddenColumn] | None = None,
230
225
  ) -> ColumnNameMap:
231
226
  """Create a ColumnNameMap with the provided configuration."""
232
227
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
@@ -237,7 +232,6 @@ class DataFrameContainer:
237
232
  column_metadata=column_metadata,
238
233
  column_qualifiers=column_qualifiers,
239
234
  parent_column_name_map=parent_column_name_map,
240
- hidden_columns=hidden_columns,
241
235
  )
242
236
 
243
237
  @staticmethod
@@ -1,18 +1,16 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
- import re
5
- import uuid
6
4
  from collections import Counter
7
5
 
8
6
  import pyspark.sql.connect.proto.base_pb2 as proto_base
9
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
10
8
 
11
- from snowflake.snowpark import DataFrame, Session
12
- from snowflake.snowpark.exceptions import SnowparkSQLException
9
+ from snowflake.snowpark.types import StructField, StructType
13
10
  from snowflake.snowpark_connect.column_name_handler import ColumnNames
14
11
  from snowflake.snowpark_connect.config import global_config, sessions_config
15
12
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
13
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
16
14
  from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
17
15
  from snowflake.snowpark_connect.expression import map_udf
18
16
  from snowflake.snowpark_connect.relation import map_udtf
@@ -28,10 +26,7 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
28
26
  from snowflake.snowpark_connect.utils.telemetry import (
29
27
  SnowparkConnectNotImplementedError,
30
28
  )
31
-
32
- _INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
33
-
34
- _CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
29
+ from snowflake.snowpark_connect.utils.temporary_view_cache import register_temp_view
35
30
 
36
31
 
37
32
  def _create_column_rename_map(
@@ -98,32 +93,35 @@ def map_execution_command(
98
93
  input_df = input_df_container.dataframe
99
94
  column_map = input_df_container.column_map
100
95
 
96
+ # TODO: Remove code handling deduplication. When view are not materialized we don't have to care about it.
101
97
  session_config = sessions_config[get_session_id()]
102
98
  duplicate_column_names_handling_mode = session_config[
103
99
  "snowpark.connect.views.duplicate_column_names_handling_mode"
104
100
  ]
105
101
 
102
+ spark_columns = input_df_container.column_map.get_spark_columns()
106
103
  # rename columns to match spark names
107
104
  if duplicate_column_names_handling_mode == "rename":
108
105
  # deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
109
- input_df = input_df.rename(
110
- _create_column_rename_map(column_map.columns, True)
111
- )
106
+ rename_map = _create_column_rename_map(column_map.columns, True)
107
+ snowpark_columns = list(rename_map.values())
108
+ input_df = input_df.rename(rename_map)
112
109
  elif duplicate_column_names_handling_mode == "drop":
113
110
  # Drop duplicate column names by removing all but the first occurrence.
114
111
  duplicated_columns, remaining_columns = _find_duplicated_columns(
115
112
  column_map.columns
116
113
  )
114
+ rename_map = _create_column_rename_map(remaining_columns, False)
115
+ snowpark_columns = list(rename_map.values())
116
+ spark_columns = list(dict.fromkeys(spark_columns))
117
117
  if len(duplicated_columns) > 0:
118
118
  input_df = input_df.drop(*duplicated_columns)
119
- input_df = input_df.rename(
120
- _create_column_rename_map(remaining_columns, False)
121
- )
119
+ input_df = input_df.rename(rename_map)
122
120
  else:
123
121
  # rename columns without deduplication
124
- input_df = input_df.rename(
125
- _create_column_rename_map(column_map.columns, False)
126
- )
122
+ rename_map = _create_column_rename_map(column_map.columns, True)
123
+ snowpark_columns = list(rename_map.values())
124
+ input_df = input_df.rename(rename_map)
127
125
 
128
126
  if req.is_global:
129
127
  view_name = [global_config.spark_sql_globalTempDatabase, req.name]
@@ -132,18 +130,23 @@ def map_execution_command(
132
130
  view_name = [
133
131
  spark_to_sf_single_id_with_unquoting(part) for part in view_name
134
132
  ]
133
+ joined_view_name = ".".join(view_name)
135
134
 
136
- if req.replace:
137
- try:
138
- input_df.create_or_replace_temp_view(view_name)
139
- except SnowparkSQLException as exc:
140
- if _is_error_caused_by_view_referencing_itself(exc):
141
- # This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
142
- _create_chained_view(input_df, view_name)
143
- else:
144
- raise
145
- else:
146
- input_df.create_temp_view(view_name)
135
+ schema = StructType(
136
+ [
137
+ StructField(field.name, field.datatype)
138
+ for field in input_df.schema.fields
139
+ ]
140
+ )
141
+ input_df_container = DataFrameContainer.create_with_column_mapping(
142
+ dataframe=input_df,
143
+ spark_column_names=spark_columns,
144
+ snowpark_column_names=snowpark_columns,
145
+ parent_column_name_map=input_df_container.column_map,
146
+ cached_schema_getter=lambda: schema,
147
+ )
148
+
149
+ register_temp_view(joined_view_name, input_df_container, req.replace)
147
150
  case "write_stream_operation_start":
148
151
  match request.plan.command.write_stream_operation_start.format:
149
152
  case "console":
@@ -207,43 +210,3 @@ def map_execution_command(
207
210
  raise SnowparkConnectNotImplementedError(
208
211
  f"Command type {other} not implemented"
209
212
  )
210
-
211
-
212
- def _generate_random_builtin_view_name() -> str:
213
- return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
214
-
215
-
216
- def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
217
- return "view definition refers to view being defined" in str(exc).lower()
218
-
219
-
220
- def _create_chained_view(input_df: DataFrame, view_name: str) -> None:
221
- """
222
- In order to create a view, which references itself, Spark would here take the previous
223
- definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
224
- as a workaround, we create a chain of internal views instead. This function:
225
- 1. Renames previous definition of A to some internal name (instead of deleting).
226
- 2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
227
- """
228
-
229
- session = Session.get_active_session()
230
-
231
- view_name = ".".join(view_name)
232
-
233
- tmp_name = _generate_random_builtin_view_name()
234
- old_name_replacement = _generate_random_builtin_view_name()
235
-
236
- input_df.create_or_replace_temp_view(tmp_name)
237
-
238
- session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
239
-
240
- ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
241
-
242
- ddl = ddl.replace(view_name, old_name_replacement)
243
-
244
- # GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
245
- ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
246
-
247
- session.sql(ddl).collect()
248
-
249
- session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
@@ -38,6 +38,7 @@ from snowflake.snowpark_connect.utils.context import (
38
38
  get_current_lambda_params,
39
39
  is_function_argument_being_resolved,
40
40
  is_lambda_being_resolved,
41
+ not_resolving_fun_args,
41
42
  )
42
43
  from snowflake.snowpark_connect.utils.telemetry import (
43
44
  SnowparkConnectNotImplementedError,
@@ -136,7 +137,10 @@ def map_expression(
136
137
  case "expression_string":
137
138
  return map_sql_expr(exp, column_mapping, typer)
138
139
  case "extension":
139
- return map_extension.map_extension(exp, column_mapping, typer)
140
+ # Extensions can be passed as function args, and we need to reset the context here.
141
+ # Matters only for resolving alias expressions in the extensions rel.
142
+ with not_resolving_fun_args():
143
+ return map_extension.map_extension(exp, column_mapping, typer)
140
144
  case "lambda_function":
141
145
  lambda_name, lambda_body = map_single_column_expression(
142
146
  exp.lambda_function.function, column_mapping, typer
@@ -278,13 +282,24 @@ def map_expression(
278
282
  current_params = get_current_lambda_params()
279
283
 
280
284
  if current_params and var_name not in current_params:
281
- raise AnalysisException(
282
- f"Reference to non-lambda variable '{var_name}' within lambda function. "
283
- f"Lambda functions can only access their own parameters. "
284
- f"Available lambda parameters are: {current_params}. "
285
- f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
286
- f"that is an unsupported feature in Snowflake SQL."
285
+ outer_col_name = (
286
+ column_mapping.get_snowpark_column_name_from_spark_column_name(
287
+ var_name, allow_non_exists=True
288
+ )
287
289
  )
290
+ if outer_col_name:
291
+ col = snowpark_fn.col(outer_col_name)
292
+ return ["namedlambdavariable()"], TypedColumn(
293
+ col, lambda: typer.type(col)
294
+ )
295
+ else:
296
+ raise AnalysisException(
297
+ f"Cannot resolve variable '{var_name}' within lambda function. "
298
+ f"Lambda functions can access their own parameters and parent dataframe columns. "
299
+ f"Current lambda parameters: {current_params}. "
300
+ f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
301
+ f"that is an unsupported feature in Snowflake SQL."
302
+ )
288
303
 
289
304
  col = snowpark_fn.Column(
290
305
  UnresolvedAttribute(exp.unresolved_named_lambda_variable.name_parts[0])
@@ -11,9 +11,10 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
11
11
  import pyspark.sql.connect.proto.types_pb2 as types_proto
12
12
  from google.protobuf.any_pb2 import Any
13
13
  from pyspark.errors.exceptions.base import AnalysisException
14
+ from pyspark.sql.connect import functions as pyspark_functions
14
15
 
15
16
  import snowflake.snowpark_connect.proto.snowflake_expression_ext_pb2 as snowflake_proto
16
- from snowflake import snowpark
17
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
17
18
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
18
19
  from snowflake.snowpark_connect.config import global_config
19
20
  from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -89,6 +90,11 @@ def as_scala_seq(input):
89
90
  )
90
91
 
91
92
 
93
+ @cache
94
+ def _scala_some():
95
+ return jpype.JClass("scala.Some")
96
+
97
+
92
98
  def map_sql_expr(
93
99
  exp: expressions_proto.Expression,
94
100
  column_mapping: ColumnNameMap,
@@ -223,9 +229,6 @@ def apply_filter_clause(
223
229
 
224
230
 
225
231
  def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Expression:
226
- from snowflake.snowpark_connect.expression.map_expression import (
227
- map_single_column_expression,
228
- )
229
232
  from snowflake.snowpark_connect.relation.map_sql import map_logical_plan_relation
230
233
 
231
234
  class_name = str(exp.getClass().getSimpleName())
@@ -308,22 +311,23 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
308
311
  )
309
312
  proto = expressions_proto.Expression(extension=any_proto)
310
313
  case "ExpressionWithUnresolvedIdentifier":
311
- plan_id = None
312
- identifierExpr = map_logical_plan_expression(exp.identifierExpr())
313
- session = snowpark.Session.get_active_session()
314
- m = ColumnNameMap([], [], None)
315
- expr = map_single_column_expression(
316
- identifierExpr, m, ExpressionTyper.dummy_typer(session)
314
+ from snowflake.snowpark_connect.relation.map_sql import (
315
+ get_relation_identifier_name,
317
316
  )
318
- value = session.range(1).select(expr[1].col).collect()[0][0]
319
317
 
320
- proto = expressions_proto.Expression(
321
- unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
322
- unparsed_identifier=str(value),
323
- plan_id=plan_id,
324
- ),
325
- )
326
- # TODO: support identifier referencing unresolved function
318
+ value = unquote_if_quoted(get_relation_identifier_name(exp))
319
+ if getattr(pyspark_functions, value.lower(), None) is not None:
320
+ unresolved_function = exp.exprBuilder().apply(
321
+ _scala_some()(value).toList()
322
+ )
323
+ proto = map_logical_plan_expression(unresolved_function)
324
+ else:
325
+ proto = expressions_proto.Expression(
326
+ unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
327
+ unparsed_identifier=str(value),
328
+ plan_id=None,
329
+ ),
330
+ )
327
331
  case "InSubquery":
328
332
  rel_proto = map_logical_plan_relation(exp.query().plan())
329
333
  any_proto = Any()