snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (84) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +47 -17
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/error/error_utils.py +25 -0
  6. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  7. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  8. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  13. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  14. snowflake/snowpark_connect/expression/typer.py +6 -6
  15. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  16. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  17. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  18. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  19. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  20. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  21. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  22. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  26. snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
  27. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  28. snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
  29. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  30. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  31. snowflake/snowpark_connect/relation/map_join.py +72 -63
  32. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  33. snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
  34. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  35. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  36. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  37. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  38. snowflake/snowpark_connect/relation/map_sql.py +141 -237
  39. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  40. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  41. snowflake/snowpark_connect/relation/map_udtf.py +10 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  50. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  51. snowflake/snowpark_connect/relation/utils.py +11 -5
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  53. snowflake/snowpark_connect/relation/write/map_write.py +259 -56
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  55. snowflake/snowpark_connect/server.py +43 -4
  56. snowflake/snowpark_connect/type_mapping.py +6 -23
  57. snowflake/snowpark_connect/utils/cache.py +27 -22
  58. snowflake/snowpark_connect/utils/context.py +33 -17
  59. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  60. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  61. snowflake/snowpark_connect/utils/session.py +41 -38
  62. snowflake/snowpark_connect/utils/telemetry.py +214 -63
  63. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  64. snowflake/snowpark_connect/version.py +1 -1
  65. snowflake/snowpark_decoder/__init__.py +0 -0
  66. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  67. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  68. snowflake/snowpark_decoder/dp_session.py +111 -0
  69. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  70. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
  71. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
  72. snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
  73. snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
  74. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  75. spark/__init__.py +0 -0
  76. spark/connect/__init__.py +0 -0
  77. spark/connect/envelope_pb2.py +31 -0
  78. spark/connect/envelope_pb2.pyi +46 -0
  79. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  80. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -9,20 +9,30 @@ from pathlib import Path
9
9
  import pyspark.sql.connect.proto.base_pb2 as proto_base
10
10
  import pyspark.sql.connect.proto.commands_pb2 as commands_proto
11
11
  from pyspark.errors.exceptions.base import AnalysisException
12
- from pyspark.sql.connect.types import StructType
13
12
 
14
13
  from snowflake import snowpark
15
14
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
16
15
  quote_name_without_upper_casing,
17
16
  unquote_if_quoted,
18
17
  )
18
+ from snowflake.snowpark.exceptions import SnowparkSQLException
19
19
  from snowflake.snowpark.functions import col, lit, object_construct
20
+ from snowflake.snowpark.types import (
21
+ ArrayType,
22
+ DataType,
23
+ DateType,
24
+ MapType,
25
+ StringType,
26
+ StructType,
27
+ TimestampType,
28
+ _NumericType,
29
+ )
20
30
  from snowflake.snowpark_connect.config import (
21
- auto_uppercase_ddl,
22
31
  global_config,
23
32
  sessions_config,
24
33
  str_to_bool,
25
34
  )
35
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
26
36
  from snowflake.snowpark_connect.relation.io_utils import (
27
37
  convert_file_prefix_path,
28
38
  is_cloud_path,
@@ -32,16 +42,19 @@ from snowflake.snowpark_connect.relation.read.reader_config import CsvWriterConf
32
42
  from snowflake.snowpark_connect.relation.stage_locator import get_paths_from_stage
33
43
  from snowflake.snowpark_connect.relation.utils import random_string
34
44
  from snowflake.snowpark_connect.type_mapping import snowpark_to_iceberg_type
35
- from snowflake.snowpark_connect.utils.attribute_handling import (
45
+ from snowflake.snowpark_connect.utils.context import get_session_id
46
+ from snowflake.snowpark_connect.utils.identifiers import (
47
+ spark_to_sf_single_id,
36
48
  split_fully_qualified_spark_name,
37
49
  )
38
- from snowflake.snowpark_connect.utils.context import get_session_id
39
50
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
40
51
  from snowflake.snowpark_connect.utils.telemetry import (
41
52
  SnowparkConnectNotImplementedError,
42
53
  telemetry,
43
54
  )
44
55
 
56
+ _column_order_for_write = "name"
57
+
45
58
 
46
59
  # TODO: We will revise/refactor this after changes for all formats are finalized.
47
60
  def clean_params(params):
@@ -85,14 +98,9 @@ def get_param_from_options(params, options, source):
85
98
  params["format_type_options"]["NULL_IF"] = options["nullValue"]
86
99
 
87
100
 
88
- def _spark_to_snowflake_single_id(name: str) -> str:
89
- name = quote_name_without_upper_casing(name)
90
- return name.upper() if auto_uppercase_ddl() else name
91
-
92
-
93
101
  def _spark_to_snowflake(multipart_id: str) -> str:
94
102
  return ".".join(
95
- _spark_to_snowflake_single_id(part)
103
+ spark_to_sf_single_id(part)
96
104
  for part in split_fully_qualified_spark_name(multipart_id)
97
105
  )
98
106
 
@@ -115,9 +123,8 @@ def map_write(request: proto_base.ExecutePlanRequest):
115
123
  case commands_proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE:
116
124
  write_mode = "ignore"
117
125
 
118
- input_df: snowpark.DataFrame = handle_column_names(
119
- map_relation(write_op.input), write_op.source
120
- )
126
+ result = map_relation(write_op.input)
127
+ input_df: snowpark.DataFrame = handle_column_names(result, write_op.source)
121
128
  session: snowpark.Session = get_or_create_snowpark_session()
122
129
 
123
130
  # Snowflake saveAsTable doesn't support format
@@ -198,7 +205,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
198
205
  options = dict(write_op.options)
199
206
  if write_mode is None:
200
207
  write_mode = "errorifexists"
201
- map_write_jdbc(input_df, session, options, write_mode)
208
+ map_write_jdbc(result, session, options, write_mode)
202
209
  case "iceberg":
203
210
  table_name = (
204
211
  write_op.path
@@ -207,20 +214,71 @@ def map_write(request: proto_base.ExecutePlanRequest):
207
214
  )
208
215
  snowpark_table_name = _spark_to_snowflake(table_name)
209
216
 
210
- if write_mode == "overwrite":
211
- if check_snowflake_table_existance(snowpark_table_name, session):
212
- session.sql(f"DELETE FROM {snowpark_table_name}").collect()
213
- write_mode = "append"
214
-
215
- if write_mode in (None, "", "overwrite"):
216
- create_iceberg_table(
217
- snowpark_table_name=snowpark_table_name,
218
- location=write_op.options.get("location", None),
219
- schema=input_df.schema,
220
- snowpark_session=session,
221
- )
222
- write_mode = "append"
223
- input_df.write.saveAsTable(table_name=snowpark_table_name, mode=write_mode)
217
+ match write_mode:
218
+ case None | "error" | "errorifexists":
219
+ if check_snowflake_table_existence(snowpark_table_name, session):
220
+ raise AnalysisException(
221
+ f"Table {snowpark_table_name} already exists"
222
+ )
223
+ create_iceberg_table(
224
+ snowpark_table_name=snowpark_table_name,
225
+ location=write_op.options.get("location", None),
226
+ schema=input_df.schema,
227
+ snowpark_session=session,
228
+ )
229
+ _validate_schema_and_get_writer(
230
+ input_df, "append", snowpark_table_name
231
+ ).saveAsTable(
232
+ table_name=snowpark_table_name,
233
+ mode="append",
234
+ column_order=_column_order_for_write,
235
+ )
236
+ case "append":
237
+ _validate_schema_and_get_writer(
238
+ input_df, "append", snowpark_table_name
239
+ ).saveAsTable(
240
+ table_name=snowpark_table_name,
241
+ mode="append",
242
+ column_order=_column_order_for_write,
243
+ )
244
+ case "ignore":
245
+ if not check_snowflake_table_existence(
246
+ snowpark_table_name, session
247
+ ):
248
+ create_iceberg_table(
249
+ snowpark_table_name=snowpark_table_name,
250
+ location=write_op.options.get("location", None),
251
+ schema=input_df.schema,
252
+ snowpark_session=session,
253
+ )
254
+ _validate_schema_and_get_writer(
255
+ input_df, "append", snowpark_table_name
256
+ ).saveAsTable(
257
+ table_name=snowpark_table_name,
258
+ mode="append",
259
+ column_order=_column_order_for_write,
260
+ )
261
+ case "overwrite":
262
+ if check_snowflake_table_existence(snowpark_table_name, session):
263
+ session.sql(f"DELETE FROM {snowpark_table_name}").collect()
264
+ else:
265
+ create_iceberg_table(
266
+ snowpark_table_name=snowpark_table_name,
267
+ location=write_op.options.get("location", None),
268
+ schema=input_df.schema,
269
+ snowpark_session=session,
270
+ )
271
+ _validate_schema_and_get_writer(
272
+ input_df, "append", snowpark_table_name
273
+ ).saveAsTable(
274
+ table_name=snowpark_table_name,
275
+ mode="append",
276
+ column_order=_column_order_for_write,
277
+ )
278
+ case _:
279
+ raise SnowparkConnectNotImplementedError(
280
+ f"Write mode {write_mode} is not supported"
281
+ )
224
282
  case _:
225
283
  snowpark_table_name = _spark_to_snowflake(write_op.table.table_name)
226
284
 
@@ -228,17 +286,23 @@ def map_write(request: proto_base.ExecutePlanRequest):
228
286
  write_op.table.save_method
229
287
  == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
230
288
  ):
231
- input_df.write.saveAsTable(
289
+ _validate_schema_and_get_writer(
290
+ input_df, write_mode, snowpark_table_name
291
+ ).saveAsTable(
232
292
  table_name=snowpark_table_name,
233
293
  mode=write_mode,
294
+ column_order=_column_order_for_write,
234
295
  )
235
296
  elif (
236
297
  write_op.table.save_method
237
298
  == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
238
299
  ):
239
- input_df.write.saveAsTable(
300
+ _validate_schema_and_get_writer(
301
+ input_df, write_mode, snowpark_table_name
302
+ ).saveAsTable(
240
303
  table_name=snowpark_table_name,
241
304
  mode=write_mode or "append",
305
+ column_order=_column_order_for_write,
242
306
  )
243
307
  else:
244
308
  raise SnowparkConnectNotImplementedError(
@@ -265,10 +329,8 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
265
329
  )
266
330
 
267
331
  snowpark_table_name = _spark_to_snowflake(write_op.table_name)
268
-
269
- input_df: snowpark.DataFrame = handle_column_names(
270
- map_relation(write_op.input), "table"
271
- )
332
+ result = map_relation(write_op.input)
333
+ input_df: snowpark.DataFrame = handle_column_names(result, "table")
272
334
  session: snowpark.Session = get_or_create_snowpark_session()
273
335
 
274
336
  if write_op.table_name is None or write_op.table_name == "":
@@ -281,14 +343,14 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
281
343
  commands_proto.WriteOperationV2.MODE_OVERWRITE,
282
344
  commands_proto.WriteOperationV2.MODE_APPEND,
283
345
  ):
284
- if not check_snowflake_table_existance(snowpark_table_name, session):
346
+ if not check_snowflake_table_existence(snowpark_table_name, session):
285
347
  raise AnalysisException(
286
348
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found. "
287
349
  f"Verify the spelling and correctness of the schema and catalog.\n"
288
350
  )
289
351
 
290
352
  if write_op.provider.lower() == "iceberg":
291
- if write_mode == "overwrite" and check_snowflake_table_existance(
353
+ if write_mode == "overwrite" and check_snowflake_table_existence(
292
354
  snowpark_table_name, session
293
355
  ):
294
356
  session.sql(f"DELETE FROM {snowpark_table_name}").collect()
@@ -304,16 +366,161 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
304
366
  schema=input_df.schema,
305
367
  snowpark_session=session,
306
368
  )
307
-
308
- input_df.write.saveAsTable(
369
+ _validate_schema_and_get_writer(
370
+ input_df, write_mode, snowpark_table_name
371
+ ).saveAsTable(
309
372
  table_name=snowpark_table_name,
310
373
  mode="append",
374
+ column_order=_column_order_for_write,
311
375
  )
312
376
  else:
313
- input_df.write.saveAsTable(
377
+ _validate_schema_and_get_writer(
378
+ input_df, write_mode, snowpark_table_name
379
+ ).saveAsTable(
314
380
  table_name=snowpark_table_name,
315
381
  mode=write_mode,
382
+ column_order=_column_order_for_write,
383
+ )
384
+
385
+
386
+ def _validate_schema_and_get_writer(
387
+ input_df: snowpark.DataFrame, write_mode: str, snowpark_table_name: str
388
+ ) -> snowpark.DataFrameWriter:
389
+ if write_mode == "overwrite":
390
+ return input_df.write
391
+
392
+ table_schema = None
393
+ try:
394
+ table_schema = (
395
+ get_or_create_snowpark_session().table(snowpark_table_name).schema
316
396
  )
397
+ except SnowparkSQLException as e:
398
+ msg = e.message
399
+ if "SQL compilation error" in msg and "does not exist" in msg:
400
+ pass
401
+ else:
402
+ raise e
403
+
404
+ if table_schema is None:
405
+ # If table does not exist, we can skip the schema validation
406
+ return input_df.write
407
+
408
+ _validate_schema_for_append(table_schema, input_df.schema, snowpark_table_name)
409
+
410
+ # if table exists and case sensitivity is not enabled, we need to rename the columns to match existing table schema
411
+ if not global_config.spark_sql_caseSensitive:
412
+
413
+ for field in input_df.schema.fields:
414
+ # Find the matching field in the table schema (case-insensitive)
415
+ col_name = field.name
416
+ renamed = col_name
417
+ matching_field = next(
418
+ (f for f in table_schema.fields if f.name.lower() == col_name.lower()),
419
+ None,
420
+ )
421
+ if matching_field is not None and matching_field != col_name:
422
+ renamed = matching_field.name
423
+ input_df = input_df.withColumnRenamed(col_name, renamed)
424
+ # Cast column if type does not match
425
+
426
+ if field.datatype != matching_field.datatype:
427
+ if isinstance(matching_field.datatype, StructType):
428
+ input_df = input_df.withColumn(
429
+ renamed,
430
+ col(renamed).cast(matching_field.datatype, rename_fields=True),
431
+ )
432
+ else:
433
+ input_df = input_df.withColumn(
434
+ renamed, col(renamed).cast(matching_field.datatype)
435
+ )
436
+ return input_df.write
437
+
438
+
439
+ def _validate_schema_for_append(
440
+ table_schema: DataType, data_schema: DataType, snowpark_table_name: str
441
+ ):
442
+ match (table_schema, data_schema):
443
+ case (_, _) if table_schema == data_schema:
444
+ return
445
+
446
+ case (StructType() as table_struct, StructType() as data_struct):
447
+
448
+ def _comparable_col_name(col: str) -> str:
449
+ return col if global_config.spark_sql_caseSensitive else col.lower()
450
+
451
+ def invalid_struct_schema():
452
+ raise AnalysisException(
453
+ f"Cannot resolve columns for the existing table {snowpark_table_name} ({table_schema.simple_string()}) with the data schema ({data_schema.simple_string()})."
454
+ )
455
+
456
+ if len(table_struct.fields) != len(data_struct.fields):
457
+ raise AnalysisException(
458
+ f"The column number of the existing table {snowpark_table_name} ({table_schema.simple_string()}) doesn't match the data schema ({data_schema.simple_string()}).)"
459
+ )
460
+
461
+ table_field_names = {
462
+ _comparable_col_name(field.name) for field in table_struct.fields
463
+ }
464
+ data_field_names = {
465
+ _comparable_col_name(field.name) for field in data_struct.fields
466
+ }
467
+
468
+ if table_field_names != data_field_names:
469
+ invalid_struct_schema()
470
+
471
+ for data_field in data_struct.fields:
472
+ matching_table_field = next(
473
+ (
474
+ f
475
+ for f in table_struct.fields
476
+ if _comparable_col_name(f.name)
477
+ == _comparable_col_name(data_field.name)
478
+ ),
479
+ None,
480
+ )
481
+
482
+ if matching_table_field is None:
483
+ invalid_struct_schema()
484
+ else:
485
+ _validate_schema_for_append(
486
+ matching_table_field.datatype,
487
+ data_field.datatype,
488
+ snowpark_table_name,
489
+ )
490
+
491
+ return
492
+
493
+ case (StringType(), _) if not isinstance(
494
+ data_schema, (StructType, ArrayType, MapType, TimestampType, DateType)
495
+ ):
496
+ return
497
+
498
+ case (_, _) if isinstance(table_schema, _NumericType) and isinstance(
499
+ data_schema, _NumericType
500
+ ):
501
+ return
502
+
503
+ case (ArrayType() as table_array, ArrayType() as data_array):
504
+ _validate_schema_for_append(
505
+ table_array.element_type, data_array.element_type, snowpark_table_name
506
+ )
507
+
508
+ case (MapType() as table_map, MapType() as data_map):
509
+ _validate_schema_for_append(
510
+ table_map.key_type, data_map.key_type, snowpark_table_name
511
+ )
512
+ _validate_schema_for_append(
513
+ table_map.value_type, data_map.value_type, snowpark_table_name
514
+ )
515
+
516
+ case (TimestampType(), _) if isinstance(data_schema, (DateType, TimestampType)):
517
+ return
518
+ case (DateType(), _) if isinstance(data_schema, (DateType, TimestampType)):
519
+ return
520
+ case (_, _):
521
+ raise AnalysisException(
522
+ f"[INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST] Cannot write incompatible data for the table {snowpark_table_name}: Cannot safely cast {data_schema.simple_string()} to {table_schema.simple_string()}"
523
+ )
317
524
 
318
525
 
319
526
  def create_iceberg_table(
@@ -323,7 +530,7 @@ def create_iceberg_table(
323
530
  snowpark_session: snowpark.Session,
324
531
  ):
325
532
  table_schema = [
326
- f"{_spark_to_snowflake_single_id(field.name)} {snowpark_to_iceberg_type(field.datatype)}"
533
+ f"{spark_to_sf_single_id(unquote_if_quoted(field.name), is_column = True)} {snowpark_to_iceberg_type(field.datatype)}"
327
534
  for field in schema.fields
328
535
  ]
329
536
 
@@ -374,26 +581,22 @@ def rewrite_df(input_df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
374
581
  return rewritten_df.select(object_construct(*construct_key_values))
375
582
 
376
583
 
377
- def handle_column_names(df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
584
+ def handle_column_names(
585
+ container: DataFrameContainer, source: str
586
+ ) -> snowpark.DataFrame:
378
587
  """
379
- Handle column names.
380
-
381
- Quote column name in these scenarios:
382
- 0. Not write to table
383
- 1. Customer enabled case sensitivity in config
588
+ Handle column names before write so they match spark schema.
384
589
  """
385
- if not hasattr(df, "_column_map") or source == "jdbc":
590
+ df = container.dataframe
591
+ if source == "jdbc":
386
592
  # don't change column names for jdbc sources as we directly use spark column names for writing to the destination tables.
387
593
  return df
388
- column_map = df._column_map
389
- case_sensitive = global_config.spark_sql_caseSensitive
390
- for column in df.columns:
391
- spark_column_name = unquote_if_quoted(
392
- column_map.get_spark_column_name_from_snowpark_column_name(column)
594
+ column_map = container.column_map
595
+
596
+ for column in column_map.columns:
597
+ df = df.withColumnRenamed(
598
+ column.snowpark_name, quote_name_without_upper_casing(column.spark_name)
393
599
  )
394
- if source in ("csv", "parquet", "json") or case_sensitive:
395
- spark_column_name = f'"{spark_column_name}"'
396
- df = df.withColumnRenamed(column, spark_column_name)
397
600
  return df
398
601
 
399
602
 
@@ -425,7 +628,7 @@ def _truncate_directory(directory_path: Path) -> None:
425
628
  shutil.rmtree(file)
426
629
 
427
630
 
428
- def check_snowflake_table_existance(
631
+ def check_snowflake_table_existence(
429
632
  snowpark_table_name: str,
430
633
  snowpark_session: snowpark.Session,
431
634
  ):
@@ -3,6 +3,7 @@
3
3
  #
4
4
 
5
5
  from snowflake import snowpark
6
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
6
7
  from snowflake.snowpark_connect.relation.read.map_read_jdbc import (
7
8
  close_connection,
8
9
  create_connection,
@@ -14,7 +15,7 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
14
15
 
15
16
 
16
17
  def map_write_jdbc(
17
- input_df: snowpark.DataFrame,
18
+ container: DataFrameContainer,
18
19
  session: snowpark.Session,
19
20
  options: dict[str, str],
20
21
  write_mode: str,
@@ -38,7 +39,7 @@ def map_write_jdbc(
38
39
 
39
40
  try:
40
41
  JdbcDataFrameWriter(session, jdbc_options).jdbc_write_dbapi(
41
- input_df,
42
+ container,
42
43
  create_connection,
43
44
  close_connection,
44
45
  table=dbtable,
@@ -112,10 +112,38 @@ _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = 128 * 1024 * 1024
112
112
  _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE = 64 * 1024 # 64kb
113
113
 
114
114
 
115
+ def _sanitize_file_paths(text: str) -> str:
116
+ """
117
+ Sanitize file paths in error messages by replacing them with placeholders.
118
+ Only matches actual file paths, not module names or class names.
119
+ """
120
+ import re
121
+
122
+ # Pattern to match file paths in traceback "File" lines only
123
+ # This targets the specific format: File "/path/to/file.py", line XX
124
+ file_line_pattern = r'(File\s+["\'])([^"\']+)(["\'],\s+line\s+\d+)'
125
+
126
+ def replace_file_path(match):
127
+ return f"{match.group(1)}<redacted_file_path>{match.group(3)}"
128
+
129
+ return re.sub(file_line_pattern, replace_file_path, text)
130
+
131
+
115
132
  def _handle_exception(context, e: Exception):
116
133
  import traceback
117
134
 
118
- traceback.print_exc()
135
+ # traceback.print_exc()
136
+ # SNOWFLAKE_SHOW_ERROR_TRACE controls sanitized traceback printing (default: false)
137
+ show_traceback = os.getenv("SNOWFLAKE_SHOW_ERROR_TRACE", "false").lower() == "true"
138
+
139
+ if show_traceback:
140
+ # Show detailed traceback (includes error info naturally)
141
+ error_traceback = traceback.format_exc()
142
+ sanitized_traceback = _sanitize_file_paths(error_traceback)
143
+ logger.error(sanitized_traceback)
144
+ else:
145
+ # Show only basic error information, no traceback
146
+ logger.error("Error: %s - %s", type(e).__name__, str(e))
119
147
 
120
148
  telemetry.report_request_failure(e)
121
149
 
@@ -195,12 +223,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
195
223
  telemetry.initialize_request_summary(request)
196
224
  match request.WhichOneof("analyze"):
197
225
  case "schema":
198
- snowpark_df = map_relation(request.schema.plan.root)
226
+ result = map_relation(request.schema.plan.root)
227
+ snowpark_df = result.dataframe
199
228
  snowpark_schema: snowpark.types.StructType = snowpark_df.schema
200
229
  schema = proto_base.AnalyzePlanResponse.Schema(
201
230
  schema=types_proto.DataType(
202
231
  **snowpark_to_proto_type(
203
- snowpark_schema, snowpark_df._column_map, snowpark_df
232
+ snowpark_schema, result.column_map, snowpark_df
204
233
  )
205
234
  )
206
235
  )
@@ -262,7 +291,8 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
262
291
  # Snowflake only exposes simplified execution plans, similar to Spark's optimized logical plans.
263
292
  # Snowpark provides the execution plan IFF the dataframe maps to a single query.
264
293
  # TODO: Do we need to return a Spark-like plan?
265
- snowpark_df = map_relation(request.explain.plan.root)
294
+ result = map_relation(request.explain.plan.root)
295
+ snowpark_df = result.dataframe
266
296
  return proto_base.AnalyzePlanResponse(
267
297
  session_id=request.session_id,
268
298
  explain=proto_base.AnalyzePlanResponse.Explain(
@@ -951,6 +981,7 @@ def start_session(
951
981
  stop_event: threading.Event = None,
952
982
  snowpark_session: Optional[snowpark.Session] = None,
953
983
  connection_parameters: Optional[Dict[str, str]] = None,
984
+ max_grpc_message_size: int = _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
954
985
  ) -> threading.Thread | None:
955
986
  """
956
987
  Starts Spark Connect server connected to Snowflake. No-op if the Server is already running.
@@ -973,6 +1004,14 @@ def start_session(
973
1004
  provided, the `snowpark_session` parameter must be None.
974
1005
  """
975
1006
  try:
1007
+ # Changing the value of our global variable based on the grpc message size provided by the user.
1008
+ global _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE
1009
+ _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
1010
+
1011
+ from pyspark.sql.connect.client import ChannelBuilder
1012
+
1013
+ ChannelBuilder.MAX_MESSAGE_LENGTH = max_grpc_message_size
1014
+
976
1015
  if os.environ.get("SPARK_ENV_LOADED"):
977
1016
  raise RuntimeError(
978
1017
  "Snowpark Connect cannot be run inside of a Spark environment"
@@ -52,10 +52,6 @@ SNOWPARK_TYPE_NAME_TO_PYSPARK_TYPE_NAME = {
52
52
  snowpark.types.TimestampType.__name__: pyspark.sql.types.TimestampType.typeName(),
53
53
  }
54
54
 
55
- _STRUCT_MATCH_PATTERN = re.compile(r"struct<(.+)>", re.IGNORECASE)
56
- _STRUCT_REPLACE_PATTERN = re.compile(r"struct<[^>]*>", re.IGNORECASE)
57
- _MAP_REPLACE_PATTERN = re.compile(r"map<[^>]*>", re.IGNORECASE)
58
-
59
55
 
60
56
  @cache
61
57
  def _get_struct_type_class():
@@ -206,7 +202,7 @@ def snowpark_to_proto_type(
206
202
  if (
207
203
  metadata is None
208
204
  and df
209
- and field.name in df._column_map.get_snowpark_columns()
205
+ and field.name in column_name_map.get_snowpark_columns()
210
206
  ):
211
207
  try:
212
208
  # check for collision using expr_id
@@ -328,6 +324,8 @@ def cast_to_match_snowpark_type(
328
324
  return str(content)
329
325
  case snowpark.types.VariantType:
330
326
  return str(content)
327
+ case snowpark.types.TimestampType:
328
+ return str(content)
331
329
  case _:
332
330
  raise SnowparkConnectNotImplementedError(
333
331
  f"Unsupported snowpark data type in casting: {data_type}"
@@ -499,7 +497,7 @@ def map_snowpark_types_to_pyarrow_types(
499
497
  return pa.string()
500
498
  if pa.types.is_struct(pa_type):
501
499
  return pa.struct(
502
- fields=[
500
+ [
503
501
  pa.field(
504
502
  field.name if not rename_struct_columns else str(i),
505
503
  map_snowpark_types_to_pyarrow_types(
@@ -783,6 +781,8 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
783
781
  return snowpark.types.TimestampType()
784
782
  case "timestamp_ntz":
785
783
  return snowpark.types.TimestampType(snowpark.types.TimestampTimeZone.NTZ)
784
+ case "timestamp_ltz":
785
+ return snowpark.types.TimestampType(snowpark.types.TimestampTimeZone.LTZ)
786
786
  case "day_time_interval":
787
787
  # this is not a column type in snowflake so there won't be a dataframe column
788
788
  # with this, for now this type won't make any sense
@@ -869,23 +869,6 @@ def map_json_schema_to_snowpark(
869
869
  return map_simple_types(schema["type"])
870
870
 
871
871
 
872
- def _replace_complex_patterns(type_string):
873
- # Check if entire string matches struct pattern "struct<col1 int, col2 int, col3 int, col4 int>"
874
- type_string = re.sub(
875
- r"decimal\s*\(\s*\d+\s*,\s*\d+\s*\)",
876
- "decimal",
877
- type_string,
878
- flags=re.IGNORECASE,
879
- )
880
- struct_match = _STRUCT_MATCH_PATTERN.match(type_string)
881
- if struct_match:
882
- return struct_match.group(1).replace(":", " ")
883
- # Replace 'struct<[^>]*>' with 'struct' and map<*> with map as we are only interested in column names.
884
- type_string = _STRUCT_REPLACE_PATTERN.sub("struct", type_string)
885
- type_string = _MAP_REPLACE_PATTERN.sub("map", type_string)
886
- return type_string.replace(":", " ")
887
-
888
-
889
872
  def map_type_string_to_snowpark_type(type_string: str) -> snowpark.types.DataType:
890
873
  """
891
874
  Converts a pyspark type string like x: int or struct<x: int, y: string> etc. to a snowpark type.