snowpark-connect 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (42) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +3 -93
  2. snowflake/snowpark_connect/config.py +99 -4
  3. snowflake/snowpark_connect/dataframe_container.py +0 -6
  4. snowflake/snowpark_connect/expression/map_expression.py +31 -1
  5. snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
  6. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +22 -26
  7. snowflake/snowpark_connect/expression/map_unresolved_function.py +28 -10
  8. snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
  9. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  10. snowflake/snowpark_connect/relation/map_extension.py +7 -1
  11. snowflake/snowpark_connect/relation/map_join.py +62 -258
  12. snowflake/snowpark_connect/relation/map_map_partitions.py +36 -77
  13. snowflake/snowpark_connect/relation/map_relation.py +8 -2
  14. snowflake/snowpark_connect/relation/map_show_string.py +2 -0
  15. snowflake/snowpark_connect/relation/map_sql.py +413 -15
  16. snowflake/snowpark_connect/relation/write/map_write.py +195 -114
  17. snowflake/snowpark_connect/resources_initializer.py +20 -5
  18. snowflake/snowpark_connect/server.py +20 -18
  19. snowflake/snowpark_connect/utils/artifacts.py +4 -5
  20. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  21. snowflake/snowpark_connect/utils/context.py +41 -1
  22. snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
  23. snowflake/snowpark_connect/utils/identifiers.py +120 -0
  24. snowflake/snowpark_connect/utils/io_utils.py +21 -1
  25. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +86 -2
  26. snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
  27. snowflake/snowpark_connect/utils/session.py +16 -26
  28. snowflake/snowpark_connect/utils/telemetry.py +53 -0
  29. snowflake/snowpark_connect/utils/udf_utils.py +66 -103
  30. snowflake/snowpark_connect/utils/udtf_helper.py +17 -7
  31. snowflake/snowpark_connect/version.py +2 -3
  32. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
  33. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +41 -42
  34. snowflake/snowpark_connect/hidden_column.py +0 -39
  35. {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
  36. {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
  37. {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
  38. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
  39. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
  40. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
  41. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
  42. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@ from snowflake.snowpark._internal.analyzer import analyzer_utils
12
12
  from snowflake.snowpark.functions import col
13
13
  from snowflake.snowpark.types import DateType, StringType, StructField, StructType
14
14
  from snowflake.snowpark_connect.column_name_handler import set_schema_getter
15
+ from snowflake.snowpark_connect.config import global_config
15
16
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
16
17
  from snowflake.snowpark_connect.relation.map_relation import map_relation
17
18
 
@@ -33,6 +34,7 @@ def map_show_string(rel: relation_proto.Relation) -> pandas.DataFrame:
33
34
  truncate=rel.show_string.truncate,
34
35
  vertical=rel.show_string.vertical,
35
36
  _spark_column_names=input_df_container.column_map.get_spark_columns(),
37
+ _spark_session_tz=global_config.spark_sql_session_timeZone,
36
38
  )
37
39
  return pandas.DataFrame({"show_string": [show_string]})
38
40
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  import re
6
6
  from collections.abc import MutableMapping, MutableSequence
7
- from contextlib import contextmanager
7
+ from contextlib import contextmanager, suppress
8
8
  from contextvars import ContextVar
9
9
  from functools import reduce
10
10
 
@@ -30,10 +30,13 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
30
30
  )
31
31
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
32
32
  from snowflake.snowpark._internal.utils import is_sql_select_statement, quote_name
33
+ from snowflake.snowpark.functions import when_matched, when_not_matched
33
34
  from snowflake.snowpark_connect.config import (
34
35
  auto_uppercase_non_column_identifiers,
36
+ check_table_supports_operation,
35
37
  get_boolean_session_config_param,
36
38
  global_config,
39
+ record_table_metadata,
37
40
  set_config_param,
38
41
  unset_config_param,
39
42
  )
@@ -56,8 +59,10 @@ from snowflake.snowpark_connect.utils.context import (
56
59
  _accessing_temp_object,
57
60
  gen_sql_plan_id,
58
61
  get_session_id,
62
+ get_sql_plan,
59
63
  push_evaluating_sql_scope,
60
64
  push_sql_scope,
65
+ set_plan_id_map,
61
66
  set_sql_args,
62
67
  set_sql_plan_name,
63
68
  )
@@ -67,6 +72,7 @@ from snowflake.snowpark_connect.utils.telemetry import (
67
72
  telemetry,
68
73
  )
69
74
 
75
+ from .. import column_name_handler
70
76
  from ..expression.map_sql_expression import (
71
77
  _window_specs,
72
78
  as_java_list,
@@ -202,6 +208,9 @@ def _rename_columns(
202
208
  def _create_table_as_select(logical_plan, mode: str) -> None:
203
209
  # TODO: for as select create tables we'd map multi layer identifier here
204
210
  name = get_relation_identifier_name(logical_plan.name())
211
+ full_table_identifier = get_relation_identifier_name(
212
+ logical_plan.name(), is_multi_part=True
213
+ )
205
214
  comment = logical_plan.tableSpec().comment()
206
215
 
207
216
  container = execute_logical_plan(logical_plan.query())
@@ -222,6 +231,15 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
222
231
  mode=mode,
223
232
  )
224
233
 
234
+ # Record table metadata for CREATE TABLE AS SELECT
235
+ # These are typically considered v2 tables and support RENAME COLUMN
236
+ record_table_metadata(
237
+ table_identifier=full_table_identifier,
238
+ table_type="v2",
239
+ data_source="default",
240
+ supports_column_rename=True,
241
+ )
242
+
225
243
 
226
244
  def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
227
245
  # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
@@ -299,6 +317,65 @@ def _remove_column_data_type(node):
299
317
  return node
300
318
 
301
319
 
320
+ def _get_condition_from_action(action, column_mapping, typer):
321
+ condition = None
322
+ if action.condition().isDefined():
323
+ (_, condition_typed_col,) = map_single_column_expression(
324
+ map_logical_plan_expression(action.condition().get()),
325
+ column_mapping,
326
+ typer,
327
+ )
328
+ condition = condition_typed_col.col
329
+ return condition
330
+
331
+
332
+ def _get_assignments_from_action(
333
+ action,
334
+ column_mapping_source,
335
+ column_mapping_target,
336
+ typer_source,
337
+ typer_target,
338
+ ):
339
+ assignments = dict()
340
+ if (
341
+ action.getClass().getSimpleName() == "InsertAction"
342
+ or action.getClass().getSimpleName() == "UpdateAction"
343
+ ):
344
+ incoming_assignments = as_java_list(action.assignments())
345
+ for assignment in incoming_assignments:
346
+ (_, key_typ_col) = map_single_column_expression(
347
+ map_logical_plan_expression(assignment.key()),
348
+ column_mapping=column_mapping_target,
349
+ typer=typer_target,
350
+ )
351
+ key_name = typer_target.df.select(key_typ_col.col).columns[0]
352
+
353
+ (_, val_typ_col) = map_single_column_expression(
354
+ map_logical_plan_expression(assignment.value()),
355
+ column_mapping=column_mapping_source,
356
+ typer=typer_source,
357
+ )
358
+
359
+ assignments[key_name] = val_typ_col.col
360
+ elif (
361
+ action.getClass().getSimpleName() == "InsertStarAction"
362
+ or action.getClass().getSimpleName() == "UpdateStarAction"
363
+ ):
364
+ if len(column_mapping_source.columns) != len(column_mapping_target.columns):
365
+ raise ValueError(
366
+ "source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
367
+ )
368
+ for i, col in enumerate(column_mapping_target.columns):
369
+ if assignments.get(col.snowpark_name) is not None:
370
+ raise SnowparkConnectNotImplementedError(
371
+ "UpdateStarAction or InsertStarAction is not supported with duplicate columns."
372
+ )
373
+ assignments[col.snowpark_name] = snowpark_fn.col(
374
+ column_mapping_source.columns[i].snowpark_name
375
+ )
376
+ return assignments
377
+
378
+
302
379
  def map_sql_to_pandas_df(
303
380
  sql_string: str,
304
381
  named_args: MutableMapping[str, expressions_proto.Expression.Literal],
@@ -420,6 +497,9 @@ def map_sql_to_pandas_df(
420
497
  )
421
498
 
422
499
  name = get_relation_identifier_name(logical_plan.name())
500
+ full_table_identifier = get_relation_identifier_name(
501
+ logical_plan.name(), is_multi_part=True
502
+ )
423
503
  columns = ", ".join(
424
504
  _spark_field_to_sql(f, True)
425
505
  for f in logical_plan.tableSchema().fields()
@@ -430,10 +510,48 @@ def map_sql_to_pandas_df(
430
510
  if comment_opt.isDefined()
431
511
  else ""
432
512
  )
513
+
514
+ # Extract data source for metadata tracking
515
+ data_source = "default"
516
+
517
+ with suppress(Exception):
518
+ # Get data source from tableSpec.provider() (for USING clause)
519
+ if hasattr(logical_plan, "tableSpec"):
520
+ table_spec = logical_plan.tableSpec()
521
+ if hasattr(table_spec, "provider"):
522
+ provider_opt = table_spec.provider()
523
+ if provider_opt.isDefined():
524
+ data_source = str(provider_opt.get()).lower()
525
+ else:
526
+ # Fall back to checking properties for FORMAT
527
+ table_properties = table_spec.properties()
528
+ if not table_properties.isEmpty():
529
+ for prop in table_properties.get():
530
+ if str(prop.key()) == "FORMAT":
531
+ data_source = str(prop.value()).lower()
532
+ break
533
+
433
534
  # NOTE: We are intentionally ignoring any FORMAT=... parameters here.
434
535
  session.sql(
435
536
  f"CREATE {replace_table} TABLE {if_not_exists}{name} ({columns}) {comment}"
436
537
  ).collect()
538
+
539
+ # Record table metadata for Spark compatibility
540
+ # Tables created with explicit schema are considered v1 tables
541
+ # v1 tables with certain data sources don't support RENAME COLUMN in OSS Spark
542
+ supports_rename = data_source not in (
543
+ "parquet",
544
+ "csv",
545
+ "json",
546
+ "orc",
547
+ "avro",
548
+ )
549
+ record_table_metadata(
550
+ table_identifier=full_table_identifier,
551
+ table_type="v1",
552
+ data_source=data_source,
553
+ supports_column_rename=supports_rename,
554
+ )
437
555
  case "CreateTableAsSelect":
438
556
  mode = "ignore" if logical_plan.ignoreIfExists() else "errorifexists"
439
557
  _create_table_as_select(logical_plan, mode=mode)
@@ -727,15 +845,147 @@ def map_sql_to_pandas_df(
727
845
  f"INSERT {overwrite_str} INTO {name} {cols_str} {final_query}",
728
846
  ).collect()
729
847
  case "MergeIntoTable":
730
- raise UnsupportedOperationException(
731
- "[UNSUPPORTED_SQL_EXTENSION] The MERGE INTO command failed.\n"
732
- + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
848
+ source_df_container = map_relation(
849
+ map_logical_plan_relation(logical_plan.sourceTable())
733
850
  )
851
+ source_df = source_df_container.dataframe
852
+ plan_id = gen_sql_plan_id()
853
+ target_df_container = map_relation(
854
+ map_logical_plan_relation(logical_plan.targetTable(), plan_id)
855
+ )
856
+ target_df = target_df_container.dataframe
857
+
858
+ if (
859
+ logical_plan.targetTable().getClass().getSimpleName()
860
+ == "UnresolvedRelation"
861
+ ):
862
+ target_table_name = _spark_to_snowflake(
863
+ logical_plan.targetTable().multipartIdentifier()
864
+ )
865
+ else:
866
+ target_table_name = _spark_to_snowflake(
867
+ logical_plan.targetTable().child().multipartIdentifier()
868
+ )
869
+
870
+ target_table = session.table(target_table_name)
871
+ target_table_columns = target_table.columns
872
+ target_df_spark_names = []
873
+ for target_table_col, target_df_col in zip(
874
+ target_table_columns, target_df_container.column_map.columns
875
+ ):
876
+ target_df = target_df.with_column_renamed(
877
+ target_df_col.snowpark_name,
878
+ target_table_col,
879
+ )
880
+ target_df_spark_names.append(target_df_col.spark_name)
881
+ target_df_container = DataFrameContainer.create_with_column_mapping(
882
+ dataframe=target_df,
883
+ spark_column_names=target_df_spark_names,
884
+ snowpark_column_names=target_table_columns,
885
+ )
886
+
887
+ set_plan_id_map(plan_id, target_df_container)
888
+
889
+ joined_df_before_condition: snowpark.DataFrame = source_df.join(
890
+ target_df
891
+ )
892
+
893
+ column_mapping_for_conditions = column_name_handler.JoinColumnNameMap(
894
+ source_df_container.column_map,
895
+ target_df_container.column_map,
896
+ )
897
+ typer_for_expressions = ExpressionTyper(joined_df_before_condition)
898
+
899
+ (_, merge_condition_typed_col,) = map_single_column_expression(
900
+ map_logical_plan_expression(logical_plan.mergeCondition()),
901
+ column_mapping=column_mapping_for_conditions,
902
+ typer=typer_for_expressions,
903
+ )
904
+
905
+ clauses = []
906
+
907
+ for matched_action in as_java_list(logical_plan.matchedActions()):
908
+ condition = _get_condition_from_action(
909
+ matched_action,
910
+ column_mapping_for_conditions,
911
+ typer_for_expressions,
912
+ )
913
+ if matched_action.getClass().getSimpleName() == "DeleteAction":
914
+ clauses.append(when_matched(condition).delete())
915
+ elif (
916
+ matched_action.getClass().getSimpleName() == "UpdateAction"
917
+ or matched_action.getClass().getSimpleName()
918
+ == "UpdateStarAction"
919
+ ):
920
+ assignments = _get_assignments_from_action(
921
+ matched_action,
922
+ source_df_container.column_map,
923
+ target_df_container.column_map,
924
+ ExpressionTyper(source_df),
925
+ ExpressionTyper(target_df),
926
+ )
927
+ clauses.append(when_matched(condition).update(assignments))
928
+
929
+ for not_matched_action in as_java_list(
930
+ logical_plan.notMatchedActions()
931
+ ):
932
+ condition = _get_condition_from_action(
933
+ not_matched_action,
934
+ column_mapping_for_conditions,
935
+ typer_for_expressions,
936
+ )
937
+ if (
938
+ not_matched_action.getClass().getSimpleName() == "InsertAction"
939
+ or not_matched_action.getClass().getSimpleName()
940
+ == "InsertStarAction"
941
+ ):
942
+ assignments = _get_assignments_from_action(
943
+ not_matched_action,
944
+ source_df_container.column_map,
945
+ target_df_container.column_map,
946
+ ExpressionTyper(source_df),
947
+ ExpressionTyper(target_df),
948
+ )
949
+ clauses.append(when_not_matched(condition).insert(assignments))
950
+
951
+ if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
952
+ raise SnowparkConnectNotImplementedError(
953
+ "Snowflake does not support 'not matched by source' actions in MERGE statements."
954
+ )
955
+
956
+ target_table.merge(source_df, merge_condition_typed_col.col, clauses)
734
957
  case "DeleteFromTable":
735
- raise UnsupportedOperationException(
736
- "[UNSUPPORTED_SQL_EXTENSION] The DELETE FROM command failed.\n"
737
- + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
958
+ df_container = map_relation(
959
+ map_logical_plan_relation(logical_plan.table())
960
+ )
961
+ name = get_relation_identifier_name(logical_plan.table(), True)
962
+ table = session.table(name)
963
+ table_columns = table.columns
964
+ df = df_container.dataframe
965
+ spark_names = []
966
+ for table_col, df_col in zip(
967
+ table_columns, df_container.column_map.columns
968
+ ):
969
+ df = df.with_column_renamed(
970
+ df_col.snowpark_name,
971
+ table_col,
972
+ )
973
+ spark_names.append(df_col.spark_name)
974
+ df_container = DataFrameContainer.create_with_column_mapping(
975
+ dataframe=df,
976
+ spark_column_names=spark_names,
977
+ snowpark_column_names=table_columns,
978
+ )
979
+ df = df_container.dataframe
980
+ (
981
+ condition_column_name,
982
+ condition_typed_col,
983
+ ) = map_single_column_expression(
984
+ map_logical_plan_expression(logical_plan.condition()),
985
+ df_container.column_map,
986
+ ExpressionTyper(df),
738
987
  )
988
+ table.delete(condition_typed_col.col)
739
989
  case "UpdateTable":
740
990
  # Databricks/Delta-specific extension not supported by SAS.
741
991
  # Provide an actionable, clear error.
@@ -744,7 +994,20 @@ def map_sql_to_pandas_df(
744
994
  + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
745
995
  )
746
996
  case "RenameColumn":
747
- table_name = get_relation_identifier_name(logical_plan.table(), True)
997
+ full_table_identifier = get_relation_identifier_name(
998
+ logical_plan.table(), True
999
+ )
1000
+
1001
+ # Check Spark compatibility for RENAME COLUMN operation
1002
+ if not check_table_supports_operation(
1003
+ full_table_identifier, "rename_column"
1004
+ ):
1005
+ raise AnalysisException(
1006
+ f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
1007
+ f"This table was created as a v1 table with a data source that doesn't support column renaming. "
1008
+ f"To enable this operation, set 'enable_snowflake_extension_behavior' to 'true'."
1009
+ )
1010
+
748
1011
  column_obj = logical_plan.column()
749
1012
  old_column_name = ".".join(
750
1013
  spark_to_sf_single_id(str(part), is_column=True)
@@ -754,7 +1017,7 @@ def map_sql_to_pandas_df(
754
1017
  case_insensitive_name = next(
755
1018
  (
756
1019
  f.name
757
- for f in session.table(table_name).schema.fields
1020
+ for f in session.table(full_table_identifier).schema.fields
758
1021
  if f.name.lower() == old_column_name.lower()
759
1022
  ),
760
1023
  None,
@@ -766,7 +1029,7 @@ def map_sql_to_pandas_df(
766
1029
  )
767
1030
 
768
1031
  # Pass through to Snowflake
769
- snowflake_sql = f"ALTER TABLE {table_name} RENAME COLUMN {old_column_name} TO {new_column_name}"
1032
+ snowflake_sql = f"ALTER TABLE {full_table_identifier} RENAME COLUMN {old_column_name} TO {new_column_name}"
770
1033
  session.sql(snowflake_sql).collect()
771
1034
  case "RenameTable":
772
1035
  name = get_relation_identifier_name(logical_plan.child(), True)
@@ -997,6 +1260,90 @@ def get_sql_passthrough() -> bool:
997
1260
  return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
998
1261
 
999
1262
 
1263
+ def change_default_to_public(name: str) -> str:
1264
+ """
1265
+ Change the namespace to PUBLIC when given name is DEFAULT
1266
+ :param name: Given namespace
1267
+ :return: if name is DEFAULT return PUBLIC otherwise name
1268
+ """
1269
+ if name.startswith('"'):
1270
+ if name.upper() == '"DEFAULT"':
1271
+ return name.replace("DEFAULT", "PUBLIC")
1272
+ elif name.upper() == "DEFAULT":
1273
+ return "PUBLIC"
1274
+ return name
1275
+
1276
+
1277
+ def _preprocess_identifier_calls(sql_query: str) -> str:
1278
+ """
1279
+ Pre-process SQL query to resolve IDENTIFIER() calls before Spark parsing.
1280
+
1281
+ Transforms: IDENTIFIER('abs')(c2) -> abs(c2)
1282
+ Transforms: IDENTIFIER('COAL' || 'ESCE')(NULL, 1) -> COALESCE(NULL, 1)
1283
+
1284
+ This preserves all function arguments in their original positions, eliminating
1285
+ the need to reconstruct them at the expression level.
1286
+ """
1287
+ import re
1288
+
1289
+ # Pattern to match IDENTIFIER(...) followed by optional function call arguments
1290
+ # This captures both the identifier expression and any trailing arguments
1291
+ # Note: We need to be careful about whitespace preservation
1292
+ identifier_pattern = r"IDENTIFIER\s*\(\s*([^)]+)\s*\)(\s*)(\([^)]*\))?"
1293
+
1294
+ def resolve_identifier_match(match):
1295
+ identifier_expr_str = match.group(1).strip()
1296
+ whitespace = match.group(2) if match.group(2) else ""
1297
+ function_args = match.group(3) if match.group(3) else ""
1298
+
1299
+ try:
1300
+ # Handle string concatenation FIRST: IDENTIFIER('COAL' || 'ESCE')
1301
+ # (Must check this before simple strings since it also starts/ends with quotes)
1302
+ if "||" in identifier_expr_str:
1303
+ # Parse basic string concatenation with proper quote handling
1304
+ parts = []
1305
+ split_parts = identifier_expr_str.split("||")
1306
+ for part in split_parts:
1307
+ part = part.strip()
1308
+ if part.startswith("'") and part.endswith("'"):
1309
+ unquoted = part[1:-1] # Remove quotes from each part
1310
+ parts.append(unquoted)
1311
+ else:
1312
+ # Non-string parts - return original for safety
1313
+ return match.group(0)
1314
+ resolved_name = "".join(parts) # Concatenate the unquoted parts
1315
+
1316
+ # Handle simple string literals: IDENTIFIER('abs')
1317
+ elif identifier_expr_str.startswith("'") and identifier_expr_str.endswith(
1318
+ "'"
1319
+ ):
1320
+ resolved_name = identifier_expr_str[1:-1] # Remove quotes
1321
+
1322
+ else:
1323
+ # Complex expressions not supported yet - return original
1324
+ return match.group(0)
1325
+
1326
+ # Return resolved function call with preserved arguments and whitespace
1327
+ if function_args:
1328
+ # Function call case: IDENTIFIER('abs')(c1) -> abs(c1)
1329
+ result = f"{resolved_name}{function_args}"
1330
+ else:
1331
+ # Column reference case: IDENTIFIER('c1') FROM -> c1 FROM (preserve whitespace)
1332
+ result = f"{resolved_name}{whitespace}"
1333
+ return result
1334
+
1335
+ except Exception:
1336
+ # Return original to avoid breaking the query
1337
+ return match.group(0)
1338
+
1339
+ # Apply the transformation
1340
+ processed_query = re.sub(
1341
+ identifier_pattern, resolve_identifier_match, sql_query, flags=re.IGNORECASE
1342
+ )
1343
+
1344
+ return processed_query
1345
+
1346
+
1000
1347
  def map_sql(
1001
1348
  rel: relation_proto.Relation,
1002
1349
  ) -> DataFrameContainer:
@@ -1008,7 +1355,6 @@ def map_sql(
1008
1355
  In passthough mode as True, SAS calls session.sql() and not calling Spark Parser.
1009
1356
  This is to mitigate any issue not covered by spark logical plan to protobuf conversion.
1010
1357
  """
1011
-
1012
1358
  snowpark_connect_sql_passthrough = get_sql_passthrough()
1013
1359
 
1014
1360
  if not snowpark_connect_sql_passthrough:
@@ -1353,6 +1699,7 @@ def map_logical_plan_relation(
1353
1699
  left_input=map_logical_plan_relation(children[0]),
1354
1700
  right_input=map_logical_plan_relation(children[1]),
1355
1701
  set_op_type=relation_proto.SetOperation.SET_OP_TYPE_UNION,
1702
+ is_all=True,
1356
1703
  by_name=rel.byName(),
1357
1704
  allow_missing_columns=rel.allowMissingCol(),
1358
1705
  )
@@ -1701,7 +2048,50 @@ def map_logical_plan_relation(
1701
2048
  _window_specs.get()[key] = window_spec
1702
2049
  proto = map_logical_plan_relation(rel.child())
1703
2050
  case "Generate":
1704
- input_relation = map_logical_plan_relation(rel.child())
2051
+ # Generate creates a nested Project relation (see lines 1785-1790) without
2052
+ # setting its plan_id field. When this Project is later processed by map_project
2053
+ # (map_column_ops.py), it uses rel.common.plan_id which defaults to 0 for unset
2054
+ # protobuf fields. This means all columns from the Generate operation (both exploded
2055
+ # columns and passthrough columns) will have plan_id=0 in their names.
2056
+ #
2057
+ # If Generate's child is a SubqueryAlias whose inner relation was processed
2058
+ # with a non-zero plan_id, there will be a mismatch between:
2059
+ # - The columns referenced in the Project (expecting plan_id from SubqueryAlias's child)
2060
+ # - The actual column names created by Generate's Project (using plan_id=0)
2061
+
2062
+ # Therefore, when Generate has a SubqueryAlias child, we explicitly process the inner
2063
+ # relation with plan_id=0 to match what Generate's Project will use. This only applies when
2064
+ # the immediate child of Generate is a SubqueryAlias and preserves existing registrations (like CTEs),
2065
+ # so it won't affect other patterns.
2066
+
2067
+ child_class = str(rel.child().getClass().getSimpleName())
2068
+
2069
+ if child_class == "SubqueryAlias":
2070
+ alias = str(rel.child().alias())
2071
+
2072
+ # Check if this alias was already registered during initial SQL parsing
2073
+ existing_plan_id = get_sql_plan(alias)
2074
+
2075
+ if existing_plan_id is not None:
2076
+ # Use the existing plan_id to maintain consistency with prior registration
2077
+ used_plan_id = existing_plan_id
2078
+ else:
2079
+ # Use plan_id=0 to match what the nested Project will use (protobuf default)
2080
+ used_plan_id = 0
2081
+ set_sql_plan_name(alias, used_plan_id)
2082
+
2083
+ # Process the inner child with the determined plan_id
2084
+ inner_child = map_logical_plan_relation(
2085
+ rel.child().child(), plan_id=used_plan_id
2086
+ )
2087
+ input_relation = relation_proto.Relation(
2088
+ subquery_alias=relation_proto.SubqueryAlias(
2089
+ input=inner_child,
2090
+ alias=alias,
2091
+ )
2092
+ )
2093
+ else:
2094
+ input_relation = map_logical_plan_relation(rel.child())
1705
2095
  generator_output_list = as_java_list(rel.generatorOutput())
1706
2096
  generator_output_list_expressions = [
1707
2097
  map_logical_plan_expression(e) for e in generator_output_list
@@ -1784,8 +2174,11 @@ def map_logical_plan_relation(
1784
2174
 
1785
2175
 
1786
2176
  def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
1787
- if name_obj.getClass().getSimpleName() == "PlanWithUnresolvedIdentifier":
1788
- # IDENTIFIER(<table_name>)
2177
+ if name_obj.getClass().getSimpleName() in (
2178
+ "PlanWithUnresolvedIdentifier",
2179
+ "ExpressionWithUnresolvedIdentifier",
2180
+ ):
2181
+ # IDENTIFIER(<table_name>), or IDENTIFIER(<method name>)
1789
2182
  expr_proto = map_logical_plan_expression(name_obj.identifierExpr())
1790
2183
  session = snowpark.Session.get_active_session()
1791
2184
  m = ColumnNameMap([], [], None)
@@ -1797,7 +2190,12 @@ def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
1797
2190
  )
1798
2191
  else:
1799
2192
  if is_multi_part:
1800
- name = _spark_to_snowflake(name_obj.multipartIdentifier())
2193
+ try:
2194
+ # Try multipartIdentifier first for full catalog.database.table
2195
+ name = _spark_to_snowflake(name_obj.multipartIdentifier())
2196
+ except AttributeError:
2197
+ # Fallback to nameParts if multipartIdentifier not available
2198
+ name = _spark_to_snowflake(name_obj.nameParts())
1801
2199
  else:
1802
2200
  name = _spark_to_snowflake(name_obj.nameParts())
1803
2201