snowpark-connect 0.20.2__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (67) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +28 -14
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  6. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  7. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  8. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  9. snowflake/snowpark_connect/expression/map_unresolved_function.py +279 -43
  10. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  11. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  12. snowflake/snowpark_connect/expression/typer.py +6 -6
  13. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  14. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  15. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  16. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  17. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  18. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  19. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  20. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  21. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  22. snowflake/snowpark_connect/relation/map_aggregate.py +72 -47
  23. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  24. snowflake/snowpark_connect/relation/map_column_ops.py +207 -144
  25. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  26. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  27. snowflake/snowpark_connect/relation/map_join.py +72 -63
  28. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  29. snowflake/snowpark_connect/relation/map_map_partitions.py +21 -16
  30. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  31. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  32. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  33. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  34. snowflake/snowpark_connect/relation/map_sql.py +155 -78
  35. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  36. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  37. snowflake/snowpark_connect/relation/map_udtf.py +6 -9
  38. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  39. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  40. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  41. snowflake/snowpark_connect/relation/read/map_read_json.py +7 -7
  42. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  43. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  44. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  45. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  46. snowflake/snowpark_connect/relation/utils.py +11 -5
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  48. snowflake/snowpark_connect/relation/write/map_write.py +199 -40
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  50. snowflake/snowpark_connect/server.py +34 -4
  51. snowflake/snowpark_connect/type_mapping.py +2 -23
  52. snowflake/snowpark_connect/utils/cache.py +27 -22
  53. snowflake/snowpark_connect/utils/context.py +33 -17
  54. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  55. snowflake/snowpark_connect/utils/session.py +41 -34
  56. snowflake/snowpark_connect/utils/telemetry.py +1 -2
  57. snowflake/snowpark_connect/version.py +1 -1
  58. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/METADATA +5 -3
  59. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/RECORD +67 -64
  60. snowpark_connect-0.21.0.dist-info/licenses/LICENSE-binary +568 -0
  61. snowpark_connect-0.21.0.dist-info/licenses/NOTICE-binary +1533 -0
  62. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-connect +0 -0
  63. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-session +0 -0
  64. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-submit +0 -0
  65. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/WHEEL +0 -0
  66. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
  67. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
7
7
 
8
8
  from snowflake import snowpark
9
- from snowflake.snowpark_connect.column_name_handler import set_schema_getter
9
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
10
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
11
11
  from snowflake.snowpark_connect.expression.map_expression import (
12
12
  map_single_column_expression,
@@ -15,21 +15,28 @@ from snowflake.snowpark_connect.expression.typer import ExpressionTyper
15
15
  from snowflake.snowpark_connect.relation.map_relation import map_relation
16
16
 
17
17
 
18
- def map_sample_by(rel: relation_proto.Relation) -> snowpark.DataFrame:
18
+ def map_sample_by(
19
+ rel: relation_proto.Relation,
20
+ ) -> DataFrameContainer:
19
21
  """
20
22
  Sample by an expression on the input DataFrame.
21
23
  """
22
- input_df: snowpark.DataFrame = map_relation(rel.sample_by.input)
24
+ input_container = map_relation(rel.sample_by.input)
25
+ input_df = input_container.dataframe
26
+
23
27
  exp: expressions_proto.Expression = rel.sample_by.col
24
28
  _, col_expr = map_single_column_expression(
25
- exp, input_df._column_map, ExpressionTyper(input_df)
29
+ exp, input_container.column_map, ExpressionTyper(input_df)
26
30
  )
27
31
  fractions = {
28
32
  get_literal_field_and_name(frac.stratum)[0]: frac.fraction
29
33
  for frac in rel.sample_by.fractions
30
34
  }
31
35
  result: snowpark.DataFrame = input_df.sampleBy(col_expr.col, fractions)
32
- result._column_map = input_df._column_map
33
- result._table_name = input_df._table_name
34
- set_schema_getter(result, lambda: input_df.schema)
35
- return result
36
+ return DataFrameContainer(
37
+ result,
38
+ column_map=input_container.column_map,
39
+ table_name=input_container.table_name,
40
+ alias=input_container.alias,
41
+ cached_schema_getter=lambda: input_df.schema,
42
+ )
@@ -2,11 +2,17 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
 
5
+ import copy
6
+
5
7
  import pandas
6
8
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
7
9
 
8
10
  from snowflake import snowpark
9
11
  from snowflake.snowpark._internal.analyzer import analyzer_utils
12
+ from snowflake.snowpark.functions import col
13
+ from snowflake.snowpark.types import DateType, StringType, StructField, StructType
14
+ from snowflake.snowpark_connect.column_name_handler import set_schema_getter
15
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
16
  from snowflake.snowpark_connect.relation.map_relation import map_relation
11
17
 
12
18
 
@@ -18,12 +24,15 @@ def map_show_string(rel: relation_proto.Relation) -> pandas.DataFrame:
18
24
  message creates a string. The client expects this string to be packed into an Arrow
19
25
  Buffer object as a single cell.
20
26
  """
21
- input_df: snowpark.DataFrame = map_relation(rel.show_string.input)
27
+ input_df_container: DataFrameContainer = map_relation(rel.show_string.input)
28
+ raw_input_df = input_df_container.dataframe
29
+ input_df = _handle_datetype_columns(raw_input_df)
30
+
22
31
  show_string = input_df._show_string_spark(
23
32
  num_rows=rel.show_string.num_rows,
24
33
  truncate=rel.show_string.truncate,
25
34
  vertical=rel.show_string.vertical,
26
- _spark_column_names=input_df._column_map.get_spark_columns(),
35
+ _spark_column_names=input_df_container.column_map.get_spark_columns(),
27
36
  )
28
37
  return pandas.DataFrame({"show_string": [show_string]})
29
38
 
@@ -32,13 +41,15 @@ def map_repr_html(rel: relation_proto.Relation) -> pandas.DataFrame:
32
41
  """
33
42
  Generate the html string representation of the input dataframe.
34
43
  """
35
- input_df: snowpark.DataFrame = map_relation(rel.html_string.input)
44
+ input_df_container: DataFrameContainer = map_relation(rel.html_string.input)
45
+ input_df = input_df_container.dataframe
46
+
36
47
  input_panda = input_df.toPandas()
37
48
  input_panda.rename(
38
49
  columns={
39
50
  analyzer_utils.unquote_if_quoted(
40
- input_df._column_map.get_snowpark_columns()[i]
41
- ): input_df._column_map.get_spark_columns()[i]
51
+ input_df_container.column_map.get_snowpark_columns()[i]
52
+ ): input_df_container.column_map.get_spark_columns()[i]
42
53
  for i in range(len(input_panda.columns))
43
54
  },
44
55
  inplace=True,
@@ -48,3 +59,29 @@ def map_repr_html(rel: relation_proto.Relation) -> pandas.DataFrame:
48
59
  max_rows=rel.html_string.num_rows,
49
60
  )
50
61
  return pandas.DataFrame({"html_string": [html_string]})
62
+
63
+
64
+ def _handle_datetype_columns(input_df: snowpark.DataFrame) -> snowpark.DataFrame:
65
+ """
66
+ Maps DateType columns to strings it aims to allow showing the dates which are out of range of datetime.datetime.
67
+ """
68
+ new_column_mapping = []
69
+ new_fields = []
70
+ transformation_required = False
71
+ for field in input_df.schema:
72
+ if isinstance(field.datatype, DateType):
73
+ transformation_required = True
74
+ new_column_mapping.append(col(field.name).cast(StringType()))
75
+ new_fields.append(StructField(field.name, StringType()))
76
+ else:
77
+ new_column_mapping.append(col(field.name))
78
+ new_fields.append(field)
79
+
80
+ if not transformation_required:
81
+ return input_df
82
+
83
+ transformed_df = input_df.select(new_column_mapping)
84
+ set_schema_getter(transformed_df, lambda: StructType(new_fields))
85
+ transformed_df._column_map = copy.deepcopy(input_df._column_map)
86
+
87
+ return transformed_df
@@ -26,15 +26,16 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
26
26
  unquote_if_quoted,
27
27
  )
28
28
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
- from snowflake.snowpark._internal.utils import is_sql_select_statement
29
+ from snowflake.snowpark._internal.utils import is_sql_select_statement, quote_name
30
30
  from snowflake.snowpark.functions import when_matched, when_not_matched
31
31
  from snowflake.snowpark_connect.config import (
32
- auto_uppercase_ddl,
32
+ auto_uppercase_non_column_identifiers,
33
33
  get_boolean_session_config_param,
34
34
  global_config,
35
35
  set_config_param,
36
36
  unset_config_param,
37
37
  )
38
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
38
39
  from snowflake.snowpark_connect.expression.map_expression import (
39
40
  ColumnNameMap,
40
41
  map_single_column_expression,
@@ -72,6 +73,7 @@ from ..expression.map_sql_expression import (
72
73
  map_logical_plan_expression,
73
74
  sql_parser,
74
75
  )
76
+ from ..utils.identifiers import spark_to_sf_single_id
75
77
 
76
78
  _ctes = ContextVar[dict[str, relation_proto.Relation]]("_ctes", default={})
77
79
 
@@ -159,38 +161,32 @@ def parse_pos_args(
159
161
  return dict(zip(sorted(positions), pos_args))
160
162
 
161
163
 
162
- def execute_logical_plan(logical_plan) -> snowpark.DataFrame:
164
+ def execute_logical_plan(logical_plan) -> DataFrameContainer:
163
165
  proto = map_logical_plan_relation(logical_plan)
164
166
  with push_evaluating_sql_scope():
165
167
  return map_relation(proto)
166
168
 
167
169
 
168
- def _spark_to_snowflake_single_id(name: str) -> str:
169
- name = quote_name_without_upper_casing(name)
170
- return name.upper() if auto_uppercase_ddl() else name
171
-
172
-
173
170
  def _spark_to_snowflake(multipart_id: jpype.JObject) -> str:
174
171
  return ".".join(
175
- _spark_to_snowflake_single_id(str(part)) for part in as_java_list(multipart_id)
172
+ spark_to_sf_single_id(str(part)) for part in as_java_list(multipart_id)
176
173
  )
177
174
 
178
175
 
179
176
  def _rename_columns(
180
- df: snowpark.DataFrame, user_specified_columns
177
+ df: snowpark.DataFrame, user_specified_columns, column_map: ColumnNameMap
181
178
  ) -> snowpark.DataFrame:
182
179
  user_columns = [str(col._1()) for col in as_java_list(user_specified_columns)]
183
180
 
184
181
  if user_columns:
185
182
  columns = zip(df.columns, user_columns)
186
183
  else:
187
- columns = df._column_map.snowpark_to_spark_map().items()
184
+ columns = column_map.snowpark_to_spark_map().items()
188
185
 
189
186
  for orig_column, user_column in columns:
190
187
  df = df.with_column_renamed(
191
- orig_column, _spark_to_snowflake_single_id(user_column)
188
+ orig_column, spark_to_sf_single_id(user_column, is_column=True)
192
189
  )
193
-
194
190
  return df
195
191
 
196
192
 
@@ -199,11 +195,12 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
199
195
  name = get_relation_identifier_name(logical_plan.name())
200
196
  comment = logical_plan.tableSpec().comment()
201
197
 
202
- df = execute_logical_plan(logical_plan.query())
203
- columns = df._column_map.snowpark_to_spark_map().items()
198
+ container = execute_logical_plan(logical_plan.query())
199
+ df = container.dataframe
200
+ columns = container.column_map.snowpark_to_spark_map().items()
204
201
  for orig_column, user_column in columns:
205
202
  df = df.with_column_renamed(
206
- orig_column, _spark_to_snowflake_single_id(user_column)
203
+ orig_column, spark_to_sf_single_id(user_column, is_column=True)
207
204
  )
208
205
 
209
206
  # TODO escaping should be handled by snowpark. remove when SNOW-2210271 is done
@@ -218,11 +215,11 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
218
215
 
219
216
 
220
217
  def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
221
- # Column names will be uppercased according to "snowpark.connect.auto-uppercase.ddl",
218
+ # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
222
219
  # and struct fields will be left as is. This should allow users to use the same names
223
220
  # in spark and Snowflake in most cases.
224
221
  if is_column:
225
- name = _spark_to_snowflake_single_id(str(field.name()))
222
+ name = spark_to_sf_single_id(str(field.name()), is_column=True)
226
223
  else:
227
224
  name = quote_name_without_upper_casing(str(field.name()))
228
225
  data_type_str = _spark_datatype_to_sql(field.dataType())
@@ -257,10 +254,12 @@ def _normalize_identifiers(node):
257
254
  The identifiers need to be uppercased to match Snowflake's behaviour. Users can disable this by setting
258
255
  the `snowpark.connect.auto_uppercase_ddl` config to False.
259
256
  """
260
- if isinstance(node, Identifier):
261
- fixed = node.this.upper() if auto_uppercase_ddl() else node.this
262
- return Identifier(this=fixed, quoted=True)
263
- return node
257
+ if not isinstance(node, Identifier):
258
+ return node
259
+ elif auto_uppercase_non_column_identifiers():
260
+ return Identifier(this=node.this.upper(), quoted=True)
261
+ else:
262
+ return Identifier(this=node.this, quoted=True)
264
263
 
265
264
 
266
265
  def _remove_file_format_property(node):
@@ -328,7 +327,10 @@ def map_sql_to_pandas_df(
328
327
  # Build Snowflake SQL from logical plan attributes
329
328
  for col in as_java_list(columns_to_add):
330
329
  # Follow the same pattern as AlterColumn for column name extraction
331
- col_name = ".".join(str(part) for part in as_java_list(col.name()))
330
+ col_name = ".".join(
331
+ spark_to_sf_single_id(part, is_column=True)
332
+ for part in as_java_list(col.name())
333
+ )
332
334
  col_type = _spark_datatype_to_sql(col.dataType())
333
335
  snowflake_sql = (
334
336
  f"ALTER TABLE {table_name} ADD COLUMN {col_name} {col_type}"
@@ -341,9 +343,22 @@ def map_sql_to_pandas_df(
341
343
 
342
344
  # Extract actual column name
343
345
  column_name = ".".join(
344
- str(part) for part in as_java_list(column_obj.name())
346
+ spark_to_sf_single_id(part, is_column=True)
347
+ for part in as_java_list(column_obj.name())
345
348
  )
346
349
 
350
+ if not global_config.spark_sql_caseSensitive:
351
+ case_insensitive_name = next(
352
+ (
353
+ f.name
354
+ for f in session.table(table_name).schema.fields
355
+ if f.name.lower() == column_name.lower()
356
+ ),
357
+ None,
358
+ )
359
+ if case_insensitive_name:
360
+ column_name = case_insensitive_name
361
+
347
362
  # Build ALTER COLUMN command from logical plan attributes
348
363
  alter_parts = []
349
364
 
@@ -381,9 +396,7 @@ def map_sql_to_pandas_df(
381
396
  if_not_exists = "IF NOT EXISTS " if logical_plan.ifNotExists() else ""
382
397
  session.sql(f"CREATE SCHEMA {if_not_exists}{name}").collect()
383
398
  if previous_name is not None:
384
- session.sql(
385
- f"USE SCHEMA {_spark_to_snowflake_single_id(previous_name)}"
386
- ).collect()
399
+ session.sql(f"USE SCHEMA {quote_name(previous_name)}").collect()
387
400
  else:
388
401
  # TODO: Unset the schema
389
402
  pass
@@ -443,7 +456,8 @@ def map_sql_to_pandas_df(
443
456
  else:
444
457
  object_name: str = as_java_list(logical_plan.child().nameParts())[0]
445
458
  _accessing_temp_object.set(False)
446
- df = execute_logical_plan(logical_plan.query())
459
+ df_container = execute_logical_plan(logical_plan.query())
460
+ df = df_container.dataframe
447
461
  if _accessing_temp_object.get():
448
462
  raise AnalysisException(
449
463
  f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
@@ -454,7 +468,9 @@ def map_sql_to_pandas_df(
454
468
  name = get_relation_identifier_name(logical_plan.child())
455
469
  comment = logical_plan.comment()
456
470
 
457
- df = _rename_columns(df, logical_plan.userSpecifiedColumns())
471
+ df = _rename_columns(
472
+ df, logical_plan.userSpecifiedColumns(), df_container.column_map
473
+ )
458
474
 
459
475
  # TODO: Support logical_plan.replace() == False
460
476
  df.create_or_replace_view(
@@ -464,7 +480,8 @@ def map_sql_to_pandas_df(
464
480
  else None,
465
481
  )
466
482
  case "CreateViewCommand":
467
- df = execute_logical_plan(logical_plan.plan())
483
+ df_container = execute_logical_plan(logical_plan.plan())
484
+ df = df_container.dataframe
468
485
  tmp_views = _get_current_temp_objects()
469
486
  tmp_views.add(
470
487
  (
@@ -475,7 +492,7 @@ def map_sql_to_pandas_df(
475
492
  )
476
493
 
477
494
  name = str(logical_plan.name().identifier())
478
- name = _spark_to_snowflake_single_id(name)
495
+ name = spark_to_sf_single_id(name)
479
496
  if isinstance(
480
497
  logical_plan.viewType(),
481
498
  jpype.JClass(
@@ -490,7 +507,9 @@ def map_sql_to_pandas_df(
490
507
  else None
491
508
  )
492
509
 
493
- df = _rename_columns(df, logical_plan.userSpecifiedColumns())
510
+ df = _rename_columns(
511
+ df, logical_plan.userSpecifiedColumns(), df_container.column_map
512
+ )
494
513
 
495
514
  if logical_plan.replace():
496
515
  df.create_or_replace_temp_view(
@@ -504,6 +523,7 @@ def map_sql_to_pandas_df(
504
523
  )
505
524
  case "DescribeColumn":
506
525
  name = get_relation_identifier_name(logical_plan.column())
526
+ # todo double check if this is correct
507
527
  rows = session.sql(f"DESCRIBE TABLE {name}").collect()
508
528
  case "DescribeNamespace":
509
529
  name = get_relation_identifier_name(logical_plan.namespace(), True)
@@ -521,11 +541,12 @@ def map_sql_to_pandas_df(
521
541
  # This gets the schema without executing the query (similar to Spark's DESCRIBE QUERY)
522
542
  # Get the inner query plan and convert it to SQL
523
543
  inner_query_plan = logical_plan.plan()
524
- df = execute_logical_plan(inner_query_plan)
544
+ df_container = execute_logical_plan(inner_query_plan)
545
+ df = df_container.dataframe
525
546
  schema = df.schema
526
547
 
527
548
  # Get original Spark column names using the column map from the original DataFrame
528
- spark_columns = df._column_map.get_spark_columns()
549
+ spark_columns = df_container.column_map.get_spark_columns()
529
550
  data = []
530
551
  for i, field in enumerate(schema.fields):
531
552
  # Use original Spark column name from column map
@@ -615,9 +636,9 @@ def map_sql_to_pandas_df(
615
636
  "UnresolvedHaving",
616
637
  "Distinct",
617
638
  ):
618
- expr = execute_logical_plan(logical_plan.logicalPlan()).queries[
619
- "queries"
620
- ][0]
639
+ expr = execute_logical_plan(
640
+ logical_plan.logicalPlan()
641
+ ).dataframe.queries["queries"][0]
621
642
  final_sql = f"EXPLAIN USING TEXT {expr}"
622
643
  rows = session.sql(final_sql).collect()
623
644
  elif (
@@ -626,7 +647,7 @@ def map_sql_to_pandas_df(
626
647
  ):
627
648
  expr = execute_logical_plan(
628
649
  logical_plan.logicalPlan().query()
629
- ).queries["queries"][0]
650
+ ).dataframe.queries["queries"][0]
630
651
  final_sql = f"EXPLAIN USING TEXT {expr}"
631
652
  rows = session.sql(final_sql).collect()
632
653
  else:
@@ -635,7 +656,8 @@ def map_sql_to_pandas_df(
635
656
  f"{logical_plan_name} is not supported yet with EXPLAIN."
636
657
  )
637
658
  case "InsertIntoStatement":
638
- df = execute_logical_plan(logical_plan.query())
659
+ df_container = execute_logical_plan(logical_plan.query())
660
+ df = df_container.dataframe
639
661
  queries = df.queries["queries"]
640
662
  if len(queries) != 1:
641
663
  raise SnowparkConnectNotImplementedError(
@@ -645,7 +667,7 @@ def map_sql_to_pandas_df(
645
667
  name = get_relation_identifier_name(logical_plan.table(), True)
646
668
 
647
669
  user_columns = [
648
- _spark_to_snowflake_single_id(str(col))
670
+ spark_to_sf_single_id(str(col), is_column=True)
649
671
  for col in as_java_list(logical_plan.userSpecifiedCols())
650
672
  ]
651
673
  overwrite_str = "OVERWRITE" if logical_plan.overwrite() else ""
@@ -751,32 +773,36 @@ def map_sql_to_pandas_df(
751
773
  )
752
774
  return assignments
753
775
 
754
- source_df = map_relation(
776
+ source_df_container = map_relation(
755
777
  map_logical_plan_relation(logical_plan.sourceTable())
756
778
  )
757
-
779
+ source_df = source_df_container.dataframe
758
780
  plan_id = gen_sql_plan_id()
759
-
760
- target_df = map_relation(
781
+ target_df_container = map_relation(
761
782
  map_logical_plan_relation(logical_plan.targetTable(), plan_id)
762
783
  )
763
- for col in target_df._column_map.columns:
784
+ target_df = target_df_container.dataframe
785
+
786
+ for col in target_df_container.column_map.columns:
764
787
  target_df = target_df.with_column_renamed(
765
- col.snowpark_name, _spark_to_snowflake_single_id(col.spark_name)
788
+ col.snowpark_name,
789
+ spark_to_sf_single_id(col.spark_name, is_column=True),
766
790
  )
767
- target_df = column_name_handler.with_column_map(
768
- target_df, target_df.columns, target_df.columns
791
+ target_df_container = DataFrameContainer.create_with_column_mapping(
792
+ dataframe=target_df,
793
+ spark_column_names=target_df.columns,
794
+ snowpark_column_names=target_df.columns,
769
795
  )
770
796
 
771
- set_plan_id_map(plan_id, target_df)
797
+ set_plan_id_map(plan_id, target_df_container)
772
798
 
773
799
  joined_df_before_condition: snowpark.DataFrame = source_df.join(
774
800
  target_df
775
801
  )
776
802
 
777
803
  column_mapping_for_conditions = column_name_handler.JoinColumnNameMap(
778
- source_df,
779
- target_df,
804
+ source_df_container.column_map,
805
+ target_df_container.column_map,
780
806
  )
781
807
  typer_for_expressions = ExpressionTyper(joined_df_before_condition)
782
808
 
@@ -803,8 +829,8 @@ def map_sql_to_pandas_df(
803
829
  ):
804
830
  assignments = _get_assignments_from_action(
805
831
  matched_action,
806
- source_df._column_map,
807
- target_df._column_map,
832
+ source_df_container.column_map,
833
+ target_df_container.column_map,
808
834
  ExpressionTyper(source_df),
809
835
  ExpressionTyper(target_df),
810
836
  )
@@ -825,8 +851,8 @@ def map_sql_to_pandas_df(
825
851
  ):
826
852
  assignments = _get_assignments_from_action(
827
853
  not_matched_action,
828
- source_df._column_map,
829
- target_df._column_map,
854
+ source_df_container.column_map,
855
+ target_df_container.column_map,
830
856
  ExpressionTyper(source_df),
831
857
  ExpressionTyper(target_df),
832
858
  )
@@ -852,20 +878,28 @@ def map_sql_to_pandas_df(
852
878
  source_df, merge_condition_typed_col.col, clauses
853
879
  )
854
880
  case "DeleteFromTable":
855
- df = map_relation(map_logical_plan_relation(logical_plan.table()))
856
- for col in df._column_map.columns:
881
+ df_container = map_relation(
882
+ map_logical_plan_relation(logical_plan.table())
883
+ )
884
+ df = df_container.dataframe
885
+ for col in df_container.column_map.columns:
857
886
  df = df.with_column_renamed(
858
- col.snowpark_name, _spark_to_snowflake_single_id(col.spark_name)
887
+ col.snowpark_name,
888
+ spark_to_sf_single_id(col.spark_name, is_column=True),
859
889
  )
860
- df = column_name_handler.with_column_map(df, df.columns, df.columns)
861
-
890
+ df_container = column_name_handler.create_with_column_mapping(
891
+ dataframe=df,
892
+ spark_column_names=df.columns,
893
+ snowpark_column_names=df.columns,
894
+ )
895
+ df = df_container.dataframe
862
896
  name = get_relation_identifier_name(logical_plan.table(), True)
863
897
  (
864
898
  condition_column_name,
865
899
  condition_typed_col,
866
900
  ) = map_single_column_expression(
867
901
  map_logical_plan_expression(logical_plan.condition()),
868
- df._column_map,
902
+ df_container.column_map,
869
903
  ExpressionTyper(df),
870
904
  )
871
905
  session.table(name).delete(condition_typed_col.col)
@@ -873,9 +907,23 @@ def map_sql_to_pandas_df(
873
907
  table_name = get_relation_identifier_name(logical_plan.table(), True)
874
908
  column_obj = logical_plan.column()
875
909
  old_column_name = ".".join(
876
- str(part) for part in as_java_list(column_obj.name())
910
+ spark_to_sf_single_id(str(part), is_column=True)
911
+ for part in as_java_list(column_obj.name())
912
+ )
913
+ if not global_config.spark_sql_caseSensitive:
914
+ case_insensitive_name = next(
915
+ (
916
+ f.name
917
+ for f in session.table(table_name).schema.fields
918
+ if f.name.lower() == old_column_name.lower()
919
+ ),
920
+ None,
921
+ )
922
+ if case_insensitive_name:
923
+ old_column_name = case_insensitive_name
924
+ new_column_name = spark_to_sf_single_id(
925
+ str(logical_plan.newName()), is_column=True
877
926
  )
878
- new_column_name = str(logical_plan.newName())
879
927
 
880
928
  # Pass through to Snowflake
881
929
  snowflake_sql = f"ALTER TABLE {table_name} RENAME COLUMN {old_column_name} TO {new_column_name}"
@@ -971,6 +1019,32 @@ def map_sql_to_pandas_df(
971
1019
  else:
972
1020
  rows = session.sql("SHOW TABLES").collect()
973
1021
 
1022
+ # Return empty DataFrame with proper schema if no results
1023
+ if not rows:
1024
+ if class_name == "ShowTableExtended":
1025
+ return (
1026
+ pandas.DataFrame(
1027
+ {
1028
+ "namespace": [""],
1029
+ "tableName": [""],
1030
+ "isTemporary": [""],
1031
+ "information": [""],
1032
+ }
1033
+ ),
1034
+ "",
1035
+ )
1036
+ else:
1037
+ return (
1038
+ pandas.DataFrame(
1039
+ {
1040
+ "namespace": [""],
1041
+ "tableName": [""],
1042
+ "isTemporary": [""],
1043
+ }
1044
+ ),
1045
+ "",
1046
+ )
1047
+
974
1048
  # Apply pattern filtering if pattern is provided
975
1049
  # This is workaround to filter using Python regex.
976
1050
  if pattern and rows:
@@ -1020,9 +1094,7 @@ def map_sql_to_pandas_df(
1020
1094
  if db_name and multi_part_len == 2:
1021
1095
  # Check db_name is same as in the full table name
1022
1096
  if (
1023
- _spark_to_snowflake_single_id(
1024
- str(db_and_table_name[0])
1025
- ).casefold()
1097
+ spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
1026
1098
  != db_name.casefold()
1027
1099
  ):
1028
1100
  raise AnalysisException(
@@ -1075,11 +1147,7 @@ def map_sql_to_pandas_df(
1075
1147
  if _is_sql_select_statement_helper(sql_string):
1076
1148
  return None, None
1077
1149
  session = snowpark.Session.get_active_session()
1078
- sql_df = session.sql(sql_string)
1079
- columns = sql_df.columns
1080
- column_name_handler.with_column_map(sql_df, columns, columns)
1081
- rows = sql_df.collect()
1082
-
1150
+ rows = session.sql(sql_string).collect()
1083
1151
  if rows:
1084
1152
  return pandas.DataFrame(rows), ""
1085
1153
  return pandas.DataFrame({"": [""]}), ""
@@ -1089,7 +1157,9 @@ def get_sql_passthrough() -> bool:
1089
1157
  return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
1090
1158
 
1091
1159
 
1092
- def map_sql(rel: relation_proto.Relation) -> snowpark.DataFrame:
1160
+ def map_sql(
1161
+ rel: relation_proto.Relation,
1162
+ ) -> DataFrameContainer:
1093
1163
  """
1094
1164
  Map a SQL string to a DataFrame.
1095
1165
 
@@ -1112,7 +1182,11 @@ def map_sql(rel: relation_proto.Relation) -> snowpark.DataFrame:
1112
1182
  session = snowpark.Session.get_active_session()
1113
1183
  sql_df = session.sql(rel.sql.query)
1114
1184
  columns = sql_df.columns
1115
- return column_name_handler.with_column_map(sql_df, columns, columns)
1185
+ return DataFrameContainer.create_with_column_mapping(
1186
+ dataframe=sql_df,
1187
+ spark_column_names=columns,
1188
+ snowpark_column_names=columns,
1189
+ )
1116
1190
 
1117
1191
 
1118
1192
  def map_logical_plan_relation(
@@ -1453,8 +1527,9 @@ def map_logical_plan_relation(
1453
1527
 
1454
1528
  # Need to find ids which are not part of values and remaining cols of df
1455
1529
  input_rel = map_logical_plan_relation(rel.child())
1456
- input_df: snowpark.DataFrame = map_relation(input_rel)
1457
- column_map = input_df._column_map
1530
+ result = map_relation(input_rel)
1531
+ input_df: snowpark.DataFrame = result.dataframe
1532
+ column_map = result.column_map
1458
1533
  typer = ExpressionTyper(input_df)
1459
1534
  unpivot_spark_names = []
1460
1535
  for v in values:
@@ -1744,8 +1819,8 @@ def map_logical_plan_relation(
1744
1819
  # )
1745
1820
 
1746
1821
  # This is a workaround to fix the bug in snowpark where if we select posexplode with *, it would return wrong columns
1747
- input_df = map_relation(input_relation)
1748
- spark_columns = input_df._column_map.get_spark_columns()
1822
+ input_container = map_relation(input_relation)
1823
+ spark_columns = input_container.column_map.get_spark_columns()
1749
1824
  column_expressions = [
1750
1825
  expressions_proto.Expression(
1751
1826
  unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
@@ -1796,7 +1871,9 @@ def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
1796
1871
  expr = map_single_column_expression(
1797
1872
  expr_proto, m, ExpressionTyper.dummy_typer(session)
1798
1873
  )
1799
- name = session.range(1).select(expr[1].col).collect()[0][0]
1874
+ name = spark_to_sf_single_id(
1875
+ session.range(1).select(expr[1].col).collect()[0][0]
1876
+ )
1800
1877
  else:
1801
1878
  if is_multi_part:
1802
1879
  name = _spark_to_snowflake(name_obj.multipartIdentifier())