snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (84) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +47 -17
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/error/error_utils.py +25 -0
  6. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  7. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  8. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  13. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  14. snowflake/snowpark_connect/expression/typer.py +6 -6
  15. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  16. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  17. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  18. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  19. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  20. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  21. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  22. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  26. snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
  27. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  28. snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
  29. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  30. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  31. snowflake/snowpark_connect/relation/map_join.py +72 -63
  32. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  33. snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
  34. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  35. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  36. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  37. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  38. snowflake/snowpark_connect/relation/map_sql.py +141 -237
  39. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  40. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  41. snowflake/snowpark_connect/relation/map_udtf.py +10 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  50. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  51. snowflake/snowpark_connect/relation/utils.py +11 -5
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  53. snowflake/snowpark_connect/relation/write/map_write.py +259 -56
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  55. snowflake/snowpark_connect/server.py +43 -4
  56. snowflake/snowpark_connect/type_mapping.py +6 -23
  57. snowflake/snowpark_connect/utils/cache.py +27 -22
  58. snowflake/snowpark_connect/utils/context.py +33 -17
  59. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  60. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  61. snowflake/snowpark_connect/utils/session.py +41 -38
  62. snowflake/snowpark_connect/utils/telemetry.py +214 -63
  63. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  64. snowflake/snowpark_connect/version.py +1 -1
  65. snowflake/snowpark_decoder/__init__.py +0 -0
  66. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  67. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  68. snowflake/snowpark_decoder/dp_session.py +111 -0
  69. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  70. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
  71. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
  72. snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
  73. snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
  74. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  75. spark/__init__.py +0 -0
  76. spark/connect/__init__.py +0 -0
  77. spark/connect/envelope_pb2.py +31 -0
  78. spark/connect/envelope_pb2.pyi +46 -0
  79. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  80. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -14,7 +14,10 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
14
14
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
15
15
  import sqlglot
16
16
  from google.protobuf.any_pb2 import Any
17
- from pyspark.errors.exceptions.base import AnalysisException
17
+ from pyspark.errors.exceptions.base import (
18
+ AnalysisException,
19
+ UnsupportedOperationException,
20
+ )
18
21
  from sqlglot.expressions import ColumnDef, DataType, FileFormatProperty, Identifier
19
22
 
20
23
  import snowflake.snowpark.functions as snowpark_fn
@@ -26,15 +29,15 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
26
29
  unquote_if_quoted,
27
30
  )
28
31
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.snowpark._internal.utils import is_sql_select_statement
30
- from snowflake.snowpark.functions import when_matched, when_not_matched
32
+ from snowflake.snowpark._internal.utils import is_sql_select_statement, quote_name
31
33
  from snowflake.snowpark_connect.config import (
32
- auto_uppercase_ddl,
34
+ auto_uppercase_non_column_identifiers,
33
35
  get_boolean_session_config_param,
34
36
  global_config,
35
37
  set_config_param,
36
38
  unset_config_param,
37
39
  )
40
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
38
41
  from snowflake.snowpark_connect.expression.map_expression import (
39
42
  ColumnNameMap,
40
43
  map_single_column_expression,
@@ -55,16 +58,15 @@ from snowflake.snowpark_connect.utils.context import (
55
58
  get_session_id,
56
59
  push_evaluating_sql_scope,
57
60
  push_sql_scope,
58
- set_plan_id_map,
59
61
  set_sql_args,
60
62
  set_sql_plan_name,
61
63
  )
62
64
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
63
65
  from snowflake.snowpark_connect.utils.telemetry import (
64
66
  SnowparkConnectNotImplementedError,
67
+ telemetry,
65
68
  )
66
69
 
67
- from .. import column_name_handler
68
70
  from ..expression.map_sql_expression import (
69
71
  _window_specs,
70
72
  as_java_list,
@@ -72,6 +74,7 @@ from ..expression.map_sql_expression import (
72
74
  map_logical_plan_expression,
73
75
  sql_parser,
74
76
  )
77
+ from ..utils.identifiers import spark_to_sf_single_id
75
78
 
76
79
  _ctes = ContextVar[dict[str, relation_proto.Relation]]("_ctes", default={})
77
80
 
@@ -159,38 +162,33 @@ def parse_pos_args(
159
162
  return dict(zip(sorted(positions), pos_args))
160
163
 
161
164
 
162
- def execute_logical_plan(logical_plan) -> snowpark.DataFrame:
165
+ def execute_logical_plan(logical_plan) -> DataFrameContainer:
163
166
  proto = map_logical_plan_relation(logical_plan)
167
+ telemetry.report_parsed_sql_plan(proto)
164
168
  with push_evaluating_sql_scope():
165
169
  return map_relation(proto)
166
170
 
167
171
 
168
- def _spark_to_snowflake_single_id(name: str) -> str:
169
- name = quote_name_without_upper_casing(name)
170
- return name.upper() if auto_uppercase_ddl() else name
171
-
172
-
173
172
  def _spark_to_snowflake(multipart_id: jpype.JObject) -> str:
174
173
  return ".".join(
175
- _spark_to_snowflake_single_id(str(part)) for part in as_java_list(multipart_id)
174
+ spark_to_sf_single_id(str(part)) for part in as_java_list(multipart_id)
176
175
  )
177
176
 
178
177
 
179
178
  def _rename_columns(
180
- df: snowpark.DataFrame, user_specified_columns
179
+ df: snowpark.DataFrame, user_specified_columns, column_map: ColumnNameMap
181
180
  ) -> snowpark.DataFrame:
182
181
  user_columns = [str(col._1()) for col in as_java_list(user_specified_columns)]
183
182
 
184
183
  if user_columns:
185
184
  columns = zip(df.columns, user_columns)
186
185
  else:
187
- columns = df._column_map.snowpark_to_spark_map().items()
186
+ columns = column_map.snowpark_to_spark_map().items()
188
187
 
189
188
  for orig_column, user_column in columns:
190
189
  df = df.with_column_renamed(
191
- orig_column, _spark_to_snowflake_single_id(user_column)
190
+ orig_column, spark_to_sf_single_id(user_column, is_column=True)
192
191
  )
193
-
194
192
  return df
195
193
 
196
194
 
@@ -199,11 +197,12 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
199
197
  name = get_relation_identifier_name(logical_plan.name())
200
198
  comment = logical_plan.tableSpec().comment()
201
199
 
202
- df = execute_logical_plan(logical_plan.query())
203
- columns = df._column_map.snowpark_to_spark_map().items()
200
+ container = execute_logical_plan(logical_plan.query())
201
+ df = container.dataframe
202
+ columns = container.column_map.snowpark_to_spark_map().items()
204
203
  for orig_column, user_column in columns:
205
204
  df = df.with_column_renamed(
206
- orig_column, _spark_to_snowflake_single_id(user_column)
205
+ orig_column, spark_to_sf_single_id(user_column, is_column=True)
207
206
  )
208
207
 
209
208
  # TODO escaping should be handled by snowpark. remove when SNOW-2210271 is done
@@ -218,11 +217,11 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
218
217
 
219
218
 
220
219
  def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
221
- # Column names will be uppercased according to "snowpark.connect.auto-uppercase.ddl",
220
+ # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
222
221
  # and struct fields will be left as is. This should allow users to use the same names
223
222
  # in spark and Snowflake in most cases.
224
223
  if is_column:
225
- name = _spark_to_snowflake_single_id(str(field.name()))
224
+ name = spark_to_sf_single_id(str(field.name()), is_column=True)
226
225
  else:
227
226
  name = quote_name_without_upper_casing(str(field.name()))
228
227
  data_type_str = _spark_datatype_to_sql(field.dataType())
@@ -257,10 +256,12 @@ def _normalize_identifiers(node):
257
256
  The identifiers need to be uppercased to match Snowflake's behaviour. Users can disable this by setting
258
257
  the `snowpark.connect.auto_uppercase_ddl` config to False.
259
258
  """
260
- if isinstance(node, Identifier):
261
- fixed = node.this.upper() if auto_uppercase_ddl() else node.this
262
- return Identifier(this=fixed, quoted=True)
263
- return node
259
+ if not isinstance(node, Identifier):
260
+ return node
261
+ elif auto_uppercase_non_column_identifiers():
262
+ return Identifier(this=node.this.upper(), quoted=True)
263
+ else:
264
+ return Identifier(this=node.this, quoted=True)
264
265
 
265
266
 
266
267
  def _remove_file_format_property(node):
@@ -328,7 +329,10 @@ def map_sql_to_pandas_df(
328
329
  # Build Snowflake SQL from logical plan attributes
329
330
  for col in as_java_list(columns_to_add):
330
331
  # Follow the same pattern as AlterColumn for column name extraction
331
- col_name = ".".join(str(part) for part in as_java_list(col.name()))
332
+ col_name = ".".join(
333
+ spark_to_sf_single_id(part, is_column=True)
334
+ for part in as_java_list(col.name())
335
+ )
332
336
  col_type = _spark_datatype_to_sql(col.dataType())
333
337
  snowflake_sql = (
334
338
  f"ALTER TABLE {table_name} ADD COLUMN {col_name} {col_type}"
@@ -341,9 +345,22 @@ def map_sql_to_pandas_df(
341
345
 
342
346
  # Extract actual column name
343
347
  column_name = ".".join(
344
- str(part) for part in as_java_list(column_obj.name())
348
+ spark_to_sf_single_id(part, is_column=True)
349
+ for part in as_java_list(column_obj.name())
345
350
  )
346
351
 
352
+ if not global_config.spark_sql_caseSensitive:
353
+ case_insensitive_name = next(
354
+ (
355
+ f.name
356
+ for f in session.table(table_name).schema.fields
357
+ if f.name.lower() == column_name.lower()
358
+ ),
359
+ None,
360
+ )
361
+ if case_insensitive_name:
362
+ column_name = case_insensitive_name
363
+
347
364
  # Build ALTER COLUMN command from logical plan attributes
348
365
  alter_parts = []
349
366
 
@@ -381,9 +398,7 @@ def map_sql_to_pandas_df(
381
398
  if_not_exists = "IF NOT EXISTS " if logical_plan.ifNotExists() else ""
382
399
  session.sql(f"CREATE SCHEMA {if_not_exists}{name}").collect()
383
400
  if previous_name is not None:
384
- session.sql(
385
- f"USE SCHEMA {_spark_to_snowflake_single_id(previous_name)}"
386
- ).collect()
401
+ session.sql(f"USE SCHEMA {quote_name(previous_name)}").collect()
387
402
  else:
388
403
  # TODO: Unset the schema
389
404
  pass
@@ -443,7 +458,8 @@ def map_sql_to_pandas_df(
443
458
  else:
444
459
  object_name: str = as_java_list(logical_plan.child().nameParts())[0]
445
460
  _accessing_temp_object.set(False)
446
- df = execute_logical_plan(logical_plan.query())
461
+ df_container = execute_logical_plan(logical_plan.query())
462
+ df = df_container.dataframe
447
463
  if _accessing_temp_object.get():
448
464
  raise AnalysisException(
449
465
  f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
@@ -454,7 +470,9 @@ def map_sql_to_pandas_df(
454
470
  name = get_relation_identifier_name(logical_plan.child())
455
471
  comment = logical_plan.comment()
456
472
 
457
- df = _rename_columns(df, logical_plan.userSpecifiedColumns())
473
+ df = _rename_columns(
474
+ df, logical_plan.userSpecifiedColumns(), df_container.column_map
475
+ )
458
476
 
459
477
  # TODO: Support logical_plan.replace() == False
460
478
  df.create_or_replace_view(
@@ -464,7 +482,8 @@ def map_sql_to_pandas_df(
464
482
  else None,
465
483
  )
466
484
  case "CreateViewCommand":
467
- df = execute_logical_plan(logical_plan.plan())
485
+ df_container = execute_logical_plan(logical_plan.plan())
486
+ df = df_container.dataframe
468
487
  tmp_views = _get_current_temp_objects()
469
488
  tmp_views.add(
470
489
  (
@@ -475,7 +494,7 @@ def map_sql_to_pandas_df(
475
494
  )
476
495
 
477
496
  name = str(logical_plan.name().identifier())
478
- name = _spark_to_snowflake_single_id(name)
497
+ name = spark_to_sf_single_id(name)
479
498
  if isinstance(
480
499
  logical_plan.viewType(),
481
500
  jpype.JClass(
@@ -490,7 +509,9 @@ def map_sql_to_pandas_df(
490
509
  else None
491
510
  )
492
511
 
493
- df = _rename_columns(df, logical_plan.userSpecifiedColumns())
512
+ df = _rename_columns(
513
+ df, logical_plan.userSpecifiedColumns(), df_container.column_map
514
+ )
494
515
 
495
516
  if logical_plan.replace():
496
517
  df.create_or_replace_temp_view(
@@ -504,6 +525,7 @@ def map_sql_to_pandas_df(
504
525
  )
505
526
  case "DescribeColumn":
506
527
  name = get_relation_identifier_name(logical_plan.column())
528
+ # todo double check if this is correct
507
529
  rows = session.sql(f"DESCRIBE TABLE {name}").collect()
508
530
  case "DescribeNamespace":
509
531
  name = get_relation_identifier_name(logical_plan.namespace(), True)
@@ -521,11 +543,12 @@ def map_sql_to_pandas_df(
521
543
  # This gets the schema without executing the query (similar to Spark's DESCRIBE QUERY)
522
544
  # Get the inner query plan and convert it to SQL
523
545
  inner_query_plan = logical_plan.plan()
524
- df = execute_logical_plan(inner_query_plan)
546
+ df_container = execute_logical_plan(inner_query_plan)
547
+ df = df_container.dataframe
525
548
  schema = df.schema
526
549
 
527
550
  # Get original Spark column names using the column map from the original DataFrame
528
- spark_columns = df._column_map.get_spark_columns()
551
+ spark_columns = df_container.column_map.get_spark_columns()
529
552
  data = []
530
553
  for i, field in enumerate(schema.fields):
531
554
  # Use original Spark column name from column map
@@ -615,9 +638,9 @@ def map_sql_to_pandas_df(
615
638
  "UnresolvedHaving",
616
639
  "Distinct",
617
640
  ):
618
- expr = execute_logical_plan(logical_plan.logicalPlan()).queries[
619
- "queries"
620
- ][0]
641
+ expr = execute_logical_plan(
642
+ logical_plan.logicalPlan()
643
+ ).dataframe.queries["queries"][0]
621
644
  final_sql = f"EXPLAIN USING TEXT {expr}"
622
645
  rows = session.sql(final_sql).collect()
623
646
  elif (
@@ -626,7 +649,7 @@ def map_sql_to_pandas_df(
626
649
  ):
627
650
  expr = execute_logical_plan(
628
651
  logical_plan.logicalPlan().query()
629
- ).queries["queries"][0]
652
+ ).dataframe.queries["queries"][0]
630
653
  final_sql = f"EXPLAIN USING TEXT {expr}"
631
654
  rows = session.sql(final_sql).collect()
632
655
  else:
@@ -635,7 +658,8 @@ def map_sql_to_pandas_df(
635
658
  f"{logical_plan_name} is not supported yet with EXPLAIN."
636
659
  )
637
660
  case "InsertIntoStatement":
638
- df = execute_logical_plan(logical_plan.query())
661
+ df_container = execute_logical_plan(logical_plan.query())
662
+ df = df_container.dataframe
639
663
  queries = df.queries["queries"]
640
664
  if len(queries) != 1:
641
665
  raise SnowparkConnectNotImplementedError(
@@ -645,7 +669,7 @@ def map_sql_to_pandas_df(
645
669
  name = get_relation_identifier_name(logical_plan.table(), True)
646
670
 
647
671
  user_columns = [
648
- _spark_to_snowflake_single_id(str(col))
672
+ spark_to_sf_single_id(str(col), is_column=True)
649
673
  for col in as_java_list(logical_plan.userSpecifiedCols())
650
674
  ]
651
675
  overwrite_str = "OVERWRITE" if logical_plan.overwrite() else ""
@@ -690,192 +714,43 @@ def map_sql_to_pandas_df(
690
714
  f"INSERT {overwrite_str} INTO {name} {cols_str} {final_query}",
691
715
  ).collect()
692
716
  case "MergeIntoTable":
693
-
694
- def _get_condition_from_action(action, column_mapping, typer):
695
- condition = None
696
- if action.condition().isDefined():
697
- (_, condition_typed_col,) = map_single_column_expression(
698
- map_logical_plan_expression(
699
- matched_action.condition().get()
700
- ),
701
- column_mapping,
702
- typer,
703
- )
704
- condition = condition_typed_col.col
705
- return condition
706
-
707
- def _get_assignments_from_action(
708
- action,
709
- column_mapping_source,
710
- column_mapping_target,
711
- typer_source,
712
- typer_target,
713
- ):
714
- assignments = dict()
715
- if (
716
- action.getClass().getSimpleName() == "InsertAction"
717
- or action.getClass().getSimpleName() == "UpdateAction"
718
- ):
719
- incoming_assignments = as_java_list(action.assignments())
720
- for assignment in incoming_assignments:
721
- (key_name, _) = map_single_column_expression(
722
- map_logical_plan_expression(assignment.key()),
723
- column_mapping=column_mapping_target,
724
- typer=typer_source,
725
- )
726
-
727
- (_, val_typ_col) = map_single_column_expression(
728
- map_logical_plan_expression(assignment.value()),
729
- column_mapping=column_mapping_source,
730
- typer=typer_target,
731
- )
732
-
733
- assignments[key_name] = val_typ_col.col
734
- elif (
735
- action.getClass().getSimpleName() == "InsertStarAction"
736
- or action.getClass().getSimpleName() == "UpdateStarAction"
737
- ):
738
- if len(column_mapping_source.columns) != len(
739
- column_mapping_target.columns
740
- ):
741
- raise ValueError(
742
- "source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
743
- )
744
- for i, col in enumerate(column_mapping_target.columns):
745
- if assignments.get(col.snowpark_name) is not None:
746
- raise SnowparkConnectNotImplementedError(
747
- "UpdateStarAction or InsertStarAction is not supported with duplicate columns."
748
- )
749
- assignments[col.snowpark_name] = snowpark_fn.col(
750
- column_mapping_source.columns[i].snowpark_name
751
- )
752
- return assignments
753
-
754
- source_df = map_relation(
755
- map_logical_plan_relation(logical_plan.sourceTable())
756
- )
757
-
758
- plan_id = gen_sql_plan_id()
759
-
760
- target_df = map_relation(
761
- map_logical_plan_relation(logical_plan.targetTable(), plan_id)
762
- )
763
- for col in target_df._column_map.columns:
764
- target_df = target_df.with_column_renamed(
765
- col.snowpark_name, _spark_to_snowflake_single_id(col.spark_name)
766
- )
767
- target_df = column_name_handler.with_column_map(
768
- target_df, target_df.columns, target_df.columns
769
- )
770
-
771
- set_plan_id_map(plan_id, target_df)
772
-
773
- joined_df_before_condition: snowpark.DataFrame = source_df.join(
774
- target_df
775
- )
776
-
777
- column_mapping_for_conditions = column_name_handler.JoinColumnNameMap(
778
- source_df,
779
- target_df,
780
- )
781
- typer_for_expressions = ExpressionTyper(joined_df_before_condition)
782
-
783
- (_, merge_condition_typed_col,) = map_single_column_expression(
784
- map_logical_plan_expression(logical_plan.mergeCondition()),
785
- column_mapping=column_mapping_for_conditions,
786
- typer=typer_for_expressions,
787
- )
788
-
789
- clauses = []
790
-
791
- for matched_action in as_java_list(logical_plan.matchedActions()):
792
- condition = _get_condition_from_action(
793
- matched_action,
794
- column_mapping_for_conditions,
795
- typer_for_expressions,
796
- )
797
- if matched_action.getClass().getSimpleName() == "DeleteAction":
798
- clauses.append(when_matched(condition).delete())
799
- elif (
800
- matched_action.getClass().getSimpleName() == "UpdateAction"
801
- or matched_action.getClass().getSimpleName()
802
- == "UpdateStarAction"
803
- ):
804
- assignments = _get_assignments_from_action(
805
- matched_action,
806
- source_df._column_map,
807
- target_df._column_map,
808
- ExpressionTyper(source_df),
809
- ExpressionTyper(target_df),
810
- )
811
- clauses.append(when_matched(condition).update(assignments))
812
-
813
- for not_matched_action in as_java_list(
814
- logical_plan.notMatchedActions()
815
- ):
816
- condition = _get_condition_from_action(
817
- not_matched_action,
818
- column_mapping_for_conditions,
819
- typer_for_expressions,
820
- )
821
- if (
822
- not_matched_action.getClass().getSimpleName() == "InsertAction"
823
- or not_matched_action.getClass().getSimpleName()
824
- == "InsertStarAction"
825
- ):
826
- assignments = _get_assignments_from_action(
827
- not_matched_action,
828
- source_df._column_map,
829
- target_df._column_map,
830
- ExpressionTyper(source_df),
831
- ExpressionTyper(target_df),
832
- )
833
- clauses.append(when_not_matched(condition).insert(assignments))
834
-
835
- if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
836
- raise SnowparkConnectNotImplementedError(
837
- "Snowflake does not support 'not matched by source' actions in MERGE statements."
838
- )
839
-
840
- if (
841
- logical_plan.targetTable().getClass().getSimpleName()
842
- == "UnresolvedRelation"
843
- ):
844
- target_table_name = _spark_to_snowflake(
845
- logical_plan.targetTable().multipartIdentifier()
846
- )
847
- else:
848
- target_table_name = _spark_to_snowflake(
849
- logical_plan.targetTable().child().multipartIdentifier()
850
- )
851
- session.table(target_table_name).merge(
852
- source_df, merge_condition_typed_col.col, clauses
717
+ raise UnsupportedOperationException(
718
+ "[UNSUPPORTED_SQL_EXTENSION] The MERGE INTO command failed.\n"
719
+ + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
853
720
  )
854
721
  case "DeleteFromTable":
855
- df = map_relation(map_logical_plan_relation(logical_plan.table()))
856
- for col in df._column_map.columns:
857
- df = df.with_column_renamed(
858
- col.snowpark_name, _spark_to_snowflake_single_id(col.spark_name)
859
- )
860
- df = column_name_handler.with_column_map(df, df.columns, df.columns)
861
-
862
- name = get_relation_identifier_name(logical_plan.table(), True)
863
- (
864
- condition_column_name,
865
- condition_typed_col,
866
- ) = map_single_column_expression(
867
- map_logical_plan_expression(logical_plan.condition()),
868
- df._column_map,
869
- ExpressionTyper(df),
722
+ raise UnsupportedOperationException(
723
+ "[UNSUPPORTED_SQL_EXTENSION] The DELETE FROM command failed.\n"
724
+ + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
725
+ )
726
+ case "UpdateTable":
727
+ # Databricks/Delta-specific extension not supported by SAS.
728
+ # Provide an actionable, clear error.
729
+ raise UnsupportedOperationException(
730
+ "[UNSUPPORTED_SQL_EXTENSION] The UPDATE TABLE command failed.\n"
731
+ + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
870
732
  )
871
- session.table(name).delete(condition_typed_col.col)
872
733
  case "RenameColumn":
873
734
  table_name = get_relation_identifier_name(logical_plan.table(), True)
874
735
  column_obj = logical_plan.column()
875
736
  old_column_name = ".".join(
876
- str(part) for part in as_java_list(column_obj.name())
737
+ spark_to_sf_single_id(str(part), is_column=True)
738
+ for part in as_java_list(column_obj.name())
739
+ )
740
+ if not global_config.spark_sql_caseSensitive:
741
+ case_insensitive_name = next(
742
+ (
743
+ f.name
744
+ for f in session.table(table_name).schema.fields
745
+ if f.name.lower() == old_column_name.lower()
746
+ ),
747
+ None,
748
+ )
749
+ if case_insensitive_name:
750
+ old_column_name = case_insensitive_name
751
+ new_column_name = spark_to_sf_single_id(
752
+ str(logical_plan.newName()), is_column=True
877
753
  )
878
- new_column_name = str(logical_plan.newName())
879
754
 
880
755
  # Pass through to Snowflake
881
756
  snowflake_sql = f"ALTER TABLE {table_name} RENAME COLUMN {old_column_name} TO {new_column_name}"
@@ -971,6 +846,32 @@ def map_sql_to_pandas_df(
971
846
  else:
972
847
  rows = session.sql("SHOW TABLES").collect()
973
848
 
849
+ # Return empty DataFrame with proper schema if no results
850
+ if not rows:
851
+ if class_name == "ShowTableExtended":
852
+ return (
853
+ pandas.DataFrame(
854
+ {
855
+ "namespace": [""],
856
+ "tableName": [""],
857
+ "isTemporary": [""],
858
+ "information": [""],
859
+ }
860
+ ),
861
+ "",
862
+ )
863
+ else:
864
+ return (
865
+ pandas.DataFrame(
866
+ {
867
+ "namespace": [""],
868
+ "tableName": [""],
869
+ "isTemporary": [""],
870
+ }
871
+ ),
872
+ "",
873
+ )
874
+
974
875
  # Apply pattern filtering if pattern is provided
975
876
  # This is workaround to filter using Python regex.
976
877
  if pattern and rows:
@@ -1020,9 +921,7 @@ def map_sql_to_pandas_df(
1020
921
  if db_name and multi_part_len == 2:
1021
922
  # Check db_name is same as in the full table name
1022
923
  if (
1023
- _spark_to_snowflake_single_id(
1024
- str(db_and_table_name[0])
1025
- ).casefold()
924
+ spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
1026
925
  != db_name.casefold()
1027
926
  ):
1028
927
  raise AnalysisException(
@@ -1075,11 +974,7 @@ def map_sql_to_pandas_df(
1075
974
  if _is_sql_select_statement_helper(sql_string):
1076
975
  return None, None
1077
976
  session = snowpark.Session.get_active_session()
1078
- sql_df = session.sql(sql_string)
1079
- columns = sql_df.columns
1080
- column_name_handler.with_column_map(sql_df, columns, columns)
1081
- rows = sql_df.collect()
1082
-
977
+ rows = session.sql(sql_string).collect()
1083
978
  if rows:
1084
979
  return pandas.DataFrame(rows), ""
1085
980
  return pandas.DataFrame({"": [""]}), ""
@@ -1089,7 +984,9 @@ def get_sql_passthrough() -> bool:
1089
984
  return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
1090
985
 
1091
986
 
1092
- def map_sql(rel: relation_proto.Relation) -> snowpark.DataFrame:
987
+ def map_sql(
988
+ rel: relation_proto.Relation,
989
+ ) -> DataFrameContainer:
1093
990
  """
1094
991
  Map a SQL string to a DataFrame.
1095
992
 
@@ -1112,7 +1009,11 @@ def map_sql(rel: relation_proto.Relation) -> snowpark.DataFrame:
1112
1009
  session = snowpark.Session.get_active_session()
1113
1010
  sql_df = session.sql(rel.sql.query)
1114
1011
  columns = sql_df.columns
1115
- return column_name_handler.with_column_map(sql_df, columns, columns)
1012
+ return DataFrameContainer.create_with_column_mapping(
1013
+ dataframe=sql_df,
1014
+ spark_column_names=columns,
1015
+ snowpark_column_names=columns,
1016
+ )
1116
1017
 
1117
1018
 
1118
1019
  def map_logical_plan_relation(
@@ -1453,8 +1354,9 @@ def map_logical_plan_relation(
1453
1354
 
1454
1355
  # Need to find ids which are not part of values and remaining cols of df
1455
1356
  input_rel = map_logical_plan_relation(rel.child())
1456
- input_df: snowpark.DataFrame = map_relation(input_rel)
1457
- column_map = input_df._column_map
1357
+ result = map_relation(input_rel)
1358
+ input_df: snowpark.DataFrame = result.dataframe
1359
+ column_map = result.column_map
1458
1360
  typer = ExpressionTyper(input_df)
1459
1361
  unpivot_spark_names = []
1460
1362
  for v in values:
@@ -1744,8 +1646,8 @@ def map_logical_plan_relation(
1744
1646
  # )
1745
1647
 
1746
1648
  # This is a workaround to fix the bug in snowpark where if we select posexplode with *, it would return wrong columns
1747
- input_df = map_relation(input_relation)
1748
- spark_columns = input_df._column_map.get_spark_columns()
1649
+ input_container = map_relation(input_relation)
1650
+ spark_columns = input_container.column_map.get_spark_columns()
1749
1651
  column_expressions = [
1750
1652
  expressions_proto.Expression(
1751
1653
  unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
@@ -1796,7 +1698,9 @@ def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
1796
1698
  expr = map_single_column_expression(
1797
1699
  expr_proto, m, ExpressionTyper.dummy_typer(session)
1798
1700
  )
1799
- name = session.range(1).select(expr[1].col).collect()[0][0]
1701
+ name = spark_to_sf_single_id(
1702
+ session.range(1).select(expr[1].col).collect()[0][0]
1703
+ )
1800
1704
  else:
1801
1705
  if is_multi_part:
1802
1706
  name = _spark_to_snowflake(name_obj.multipartIdentifier())