snowpark-connect 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (42) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +3 -93
  2. snowflake/snowpark_connect/config.py +99 -4
  3. snowflake/snowpark_connect/dataframe_container.py +0 -6
  4. snowflake/snowpark_connect/expression/map_expression.py +31 -1
  5. snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
  6. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +22 -26
  7. snowflake/snowpark_connect/expression/map_unresolved_function.py +28 -10
  8. snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
  9. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  10. snowflake/snowpark_connect/relation/map_extension.py +7 -1
  11. snowflake/snowpark_connect/relation/map_join.py +62 -258
  12. snowflake/snowpark_connect/relation/map_map_partitions.py +36 -77
  13. snowflake/snowpark_connect/relation/map_relation.py +8 -2
  14. snowflake/snowpark_connect/relation/map_show_string.py +2 -0
  15. snowflake/snowpark_connect/relation/map_sql.py +413 -15
  16. snowflake/snowpark_connect/relation/write/map_write.py +195 -114
  17. snowflake/snowpark_connect/resources_initializer.py +20 -5
  18. snowflake/snowpark_connect/server.py +20 -18
  19. snowflake/snowpark_connect/utils/artifacts.py +4 -5
  20. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  21. snowflake/snowpark_connect/utils/context.py +41 -1
  22. snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
  23. snowflake/snowpark_connect/utils/identifiers.py +120 -0
  24. snowflake/snowpark_connect/utils/io_utils.py +21 -1
  25. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +86 -2
  26. snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
  27. snowflake/snowpark_connect/utils/session.py +16 -26
  28. snowflake/snowpark_connect/utils/telemetry.py +53 -0
  29. snowflake/snowpark_connect/utils/udf_utils.py +66 -103
  30. snowflake/snowpark_connect/utils/udtf_helper.py +17 -7
  31. snowflake/snowpark_connect/version.py +2 -3
  32. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
  33. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +41 -42
  34. snowflake/snowpark_connect/hidden_column.py +0 -39
  35. {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
  36. {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
  37. {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
  38. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
  39. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
  40. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
  41. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
  42. {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
@@ -50,7 +50,9 @@ from snowflake.snowpark_connect.utils.identifiers import (
50
50
  spark_to_sf_single_id,
51
51
  split_fully_qualified_spark_name,
52
52
  )
53
+ from snowflake.snowpark_connect.utils.io_utils import get_table_type
53
54
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
55
+ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
54
56
  from snowflake.snowpark_connect.utils.telemetry import (
55
57
  SnowparkConnectNotImplementedError,
56
58
  telemetry,
@@ -160,6 +162,29 @@ def map_write(request: proto_base.ExecutePlanRequest):
160
162
  compression_option = write_op.options.get("compression", "none")
161
163
 
162
164
  # Generate Spark-compatible filename or prefix
165
+ # we need a random prefix to support "append" mode
166
+ # otherwise copy into with overwrite=False will fail if the file already exists
167
+ overwrite = (
168
+ write_op.mode
169
+ == commands_proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
170
+ )
171
+
172
+ if overwrite:
173
+ try:
174
+ path_after_stage = (
175
+ write_path.split("/", 1)[1] if "/" in write_path else ""
176
+ )
177
+ if not path_after_stage or path_after_stage == "/":
178
+ logger.warning(
179
+ f"Skipping REMOVE for root path {write_path} - too broad scope"
180
+ )
181
+ else:
182
+ remove_command = f"REMOVE {write_path}/"
183
+ session.sql(remove_command).collect()
184
+ logger.info(f"Successfully cleared directory: {write_path}")
185
+ except Exception as e:
186
+ logger.warning(f"Could not clear directory {write_path}: {e}")
187
+
163
188
  if should_write_to_single_file:
164
189
  # Single file: generate complete filename with extension
165
190
  spark_filename = generate_spark_compatible_filename(
@@ -178,10 +203,6 @@ def map_write(request: proto_base.ExecutePlanRequest):
178
203
  format_ext="", # No extension for prefix
179
204
  )
180
205
  temp_file_prefix_on_stage = f"{write_path}/{spark_filename_prefix}"
181
- overwrite = (
182
- write_op.mode
183
- == commands_proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
184
- )
185
206
 
186
207
  default_compression = "NONE" if write_op.source != "parquet" else "snappy"
187
208
  compression = write_op.options.get(
@@ -291,7 +312,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
291
312
 
292
313
  match write_mode:
293
314
  case None | "error" | "errorifexists":
294
- if check_snowflake_table_existence(snowpark_table_name, session):
315
+ table_schema_or_error = _get_table_schema_or_error(
316
+ snowpark_table_name, session
317
+ )
318
+ if isinstance(table_schema_or_error, DataType): # Table exists
295
319
  raise AnalysisException(
296
320
  f"Table {snowpark_table_name} already exists"
297
321
  )
@@ -302,29 +326,45 @@ def map_write(request: proto_base.ExecutePlanRequest):
302
326
  snowpark_session=session,
303
327
  )
304
328
  _validate_schema_and_get_writer(
305
- input_df, "append", snowpark_table_name
329
+ input_df, "append", snowpark_table_name, table_schema_or_error
306
330
  ).saveAsTable(
307
331
  table_name=snowpark_table_name,
308
332
  mode="append",
309
333
  column_order=_column_order_for_write,
310
334
  )
311
335
  case "append":
312
- # TODO: SNOW-2299414 Fix the implementation of table type check
313
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
314
- # raise AnalysisException(
315
- # f"Table {snowpark_table_name} is not an iceberg table"
316
- # )
336
+ table_schema_or_error = _get_table_schema_or_error(
337
+ snowpark_table_name, session
338
+ )
339
+ if isinstance(table_schema_or_error, DataType): # Table exists
340
+ if get_table_type(snowpark_table_name, session) not in (
341
+ "ICEBERG",
342
+ "TABLE",
343
+ ):
344
+ raise AnalysisException(
345
+ f"Table {snowpark_table_name} is not an iceberg table"
346
+ )
347
+ else:
348
+ create_iceberg_table(
349
+ snowpark_table_name=snowpark_table_name,
350
+ location=write_op.options.get("location", None),
351
+ schema=input_df.schema,
352
+ snowpark_session=session,
353
+ )
317
354
  _validate_schema_and_get_writer(
318
- input_df, "append", snowpark_table_name
355
+ input_df, "append", snowpark_table_name, table_schema_or_error
319
356
  ).saveAsTable(
320
357
  table_name=snowpark_table_name,
321
358
  mode="append",
322
359
  column_order=_column_order_for_write,
323
360
  )
324
361
  case "ignore":
325
- if not check_snowflake_table_existence(
362
+ table_schema_or_error = _get_table_schema_or_error(
326
363
  snowpark_table_name, session
327
- ):
364
+ )
365
+ if not isinstance(
366
+ table_schema_or_error, DataType
367
+ ): # Table not exists
328
368
  create_iceberg_table(
329
369
  snowpark_table_name=snowpark_table_name,
330
370
  location=write_op.options.get("location", None),
@@ -339,13 +379,17 @@ def map_write(request: proto_base.ExecutePlanRequest):
339
379
  column_order=_column_order_for_write,
340
380
  )
341
381
  case "overwrite":
342
- if check_snowflake_table_existence(snowpark_table_name, session):
343
- # TODO: SNOW-2299414 Fix the implementation of table type check
344
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
345
- # raise AnalysisException(
346
- # f"Table {snowpark_table_name} is not an iceberg table"
347
- # )
348
- pass
382
+ table_schema_or_error = _get_table_schema_or_error(
383
+ snowpark_table_name, session
384
+ )
385
+ if isinstance(table_schema_or_error, DataType): # Table exists
386
+ if get_table_type(snowpark_table_name, session) not in (
387
+ "ICEBERG",
388
+ "TABLE",
389
+ ):
390
+ raise AnalysisException(
391
+ f"Table {snowpark_table_name} is not an iceberg table"
392
+ )
349
393
  else:
350
394
  create_iceberg_table(
351
395
  snowpark_table_name=snowpark_table_name,
@@ -354,7 +398,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
354
398
  snowpark_session=session,
355
399
  )
356
400
  _validate_schema_and_get_writer(
357
- input_df, "truncate", snowpark_table_name
401
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
358
402
  ).saveAsTable(
359
403
  table_name=snowpark_table_name,
360
404
  mode="truncate",
@@ -373,33 +417,49 @@ def map_write(request: proto_base.ExecutePlanRequest):
373
417
  ):
374
418
  match write_mode:
375
419
  case "overwrite":
376
- if check_snowflake_table_existence(
420
+ table_schema_or_error = _get_table_schema_or_error(
377
421
  snowpark_table_name, session
378
- ):
379
- # TODO: SNOW-2299414 Fix the implementation of table type check
380
- # if (
381
- # check_table_type(snowpark_table_name, session)
382
- # != "TABLE"
383
- # ):
384
- # raise AnalysisException(
385
- # f"Table {snowpark_table_name} is not a FDN table"
386
- # )
422
+ )
423
+ if isinstance(table_schema_or_error, DataType): # Table exists
424
+ if get_table_type(snowpark_table_name, session) not in (
425
+ "NORMAL",
426
+ "TABLE",
427
+ ):
428
+ raise AnalysisException(
429
+ f"Table {snowpark_table_name} is not a FDN table"
430
+ )
387
431
  write_mode = "truncate"
388
432
  _validate_schema_and_get_writer(
389
- input_df, write_mode, snowpark_table_name
433
+ input_df,
434
+ write_mode,
435
+ snowpark_table_name,
436
+ table_schema_or_error,
390
437
  ).saveAsTable(
391
438
  table_name=snowpark_table_name,
392
439
  mode=write_mode,
393
440
  column_order=_column_order_for_write,
394
441
  )
395
442
  case "append":
396
- # TODO: SNOW-2299414 Fix the implementation of table type check
397
- # if check_table_type(snowpark_table_name, session) != "TABLE":
398
- # raise AnalysisException(
399
- # f"Table {snowpark_table_name} is not a FDN table"
400
- # )
443
+ table_schema_or_error = _get_table_schema_or_error(
444
+ snowpark_table_name, session
445
+ )
446
+ if isinstance(
447
+ table_schema_or_error, DataType
448
+ ) and get_table_type( # Table exists
449
+ snowpark_table_name, session
450
+ ) not in (
451
+ "NORMAL",
452
+ "TABLE",
453
+ ):
454
+ raise AnalysisException(
455
+ f"Table {snowpark_table_name} is not a FDN table"
456
+ )
457
+
401
458
  _validate_schema_and_get_writer(
402
- input_df, write_mode, snowpark_table_name
459
+ input_df,
460
+ write_mode,
461
+ snowpark_table_name,
462
+ table_schema_or_error,
403
463
  ).saveAsTable(
404
464
  table_name=snowpark_table_name,
405
465
  mode=write_mode,
@@ -446,7 +506,10 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
446
506
  if write_op.provider.lower() == "iceberg":
447
507
  match write_op.mode:
448
508
  case commands_proto.WriteOperationV2.MODE_CREATE:
449
- if check_snowflake_table_existence(snowpark_table_name, session):
509
+ table_schema_or_error = _get_table_schema_or_error(
510
+ snowpark_table_name, session
511
+ )
512
+ if isinstance(table_schema_or_error, DataType): # Table exists
450
513
  raise AnalysisException(
451
514
  f"Table {snowpark_table_name} already exists"
452
515
  )
@@ -457,24 +520,29 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
457
520
  snowpark_session=session,
458
521
  )
459
522
  _validate_schema_and_get_writer(
460
- input_df, "append", snowpark_table_name
523
+ input_df, "append", snowpark_table_name, table_schema_or_error
461
524
  ).saveAsTable(
462
525
  table_name=snowpark_table_name,
463
526
  mode="append",
464
527
  column_order=_column_order_for_write,
465
528
  )
466
529
  case commands_proto.WriteOperationV2.MODE_APPEND:
467
- if not check_snowflake_table_existence(snowpark_table_name, session):
530
+ table_schema_or_error = _get_table_schema_or_error(
531
+ snowpark_table_name, session
532
+ )
533
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
468
534
  raise AnalysisException(
469
535
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
470
536
  )
471
- # TODO: SNOW-2299414 Fix the implementation of table type check
472
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
473
- # raise AnalysisException(
474
- # f"Table {snowpark_table_name} is not an iceberg table"
475
- # )
537
+ if get_table_type(snowpark_table_name, session) not in (
538
+ "ICEBERG",
539
+ "TABLE",
540
+ ):
541
+ raise AnalysisException(
542
+ f"Table {snowpark_table_name} is not an iceberg table"
543
+ )
476
544
  _validate_schema_and_get_writer(
477
- input_df, "append", snowpark_table_name
545
+ input_df, "append", snowpark_table_name, table_schema_or_error
478
546
  ).saveAsTable(
479
547
  table_name=snowpark_table_name,
480
548
  mode="append",
@@ -482,26 +550,33 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
482
550
  )
483
551
  case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
484
552
  # TODO: handle the filter condition for MODE_OVERWRITE
485
- if check_snowflake_table_existence(snowpark_table_name, session):
486
- # TODO: SNOW-2299414 Fix the implementation of table type check
487
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
488
- # raise AnalysisException(
489
- # f"Table {snowpark_table_name} is not an iceberg table"
490
- # )
491
- pass
553
+ table_schema_or_error = _get_table_schema_or_error(
554
+ snowpark_table_name, session
555
+ )
556
+ if isinstance(table_schema_or_error, DataType): # Table exists
557
+ if get_table_type(snowpark_table_name, session) not in (
558
+ "ICEBERG",
559
+ "TABLE",
560
+ ):
561
+ raise AnalysisException(
562
+ f"Table {snowpark_table_name} is not an iceberg table"
563
+ )
492
564
  else:
493
565
  raise AnalysisException(
494
566
  f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
495
567
  )
496
568
  _validate_schema_and_get_writer(
497
- input_df, "truncate", snowpark_table_name
569
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
498
570
  ).saveAsTable(
499
571
  table_name=snowpark_table_name,
500
572
  mode="truncate",
501
573
  column_order=_column_order_for_write,
502
574
  )
503
575
  case commands_proto.WriteOperationV2.MODE_REPLACE:
504
- if check_snowflake_table_existence(snowpark_table_name, session):
576
+ table_schema_or_error = _get_table_schema_or_error(
577
+ snowpark_table_name, session
578
+ )
579
+ if isinstance(table_schema_or_error, DataType): # Table exists
505
580
  create_iceberg_table(
506
581
  snowpark_table_name=snowpark_table_name,
507
582
  location=write_op.table_properties.get("location"),
@@ -514,7 +589,7 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
514
589
  f"Table {snowpark_table_name} does not exist"
515
590
  )
516
591
  _validate_schema_and_get_writer(
517
- input_df, "replace", snowpark_table_name
592
+ input_df, "replace", snowpark_table_name, table_schema_or_error
518
593
  ).saveAsTable(
519
594
  table_name=snowpark_table_name,
520
595
  mode="append",
@@ -550,17 +625,22 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
550
625
  column_order=_column_order_for_write,
551
626
  )
552
627
  case commands_proto.WriteOperationV2.MODE_APPEND:
553
- if not check_snowflake_table_existence(snowpark_table_name, session):
628
+ table_schema_or_error = _get_table_schema_or_error(
629
+ snowpark_table_name, session
630
+ )
631
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
554
632
  raise AnalysisException(
555
633
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
556
634
  )
557
- # TODO: SNOW-2299414 Fix the implementation of table type check
558
- # if check_table_type(snowpark_table_name, session) != "TABLE":
559
- # raise AnalysisException(
560
- # f"Table {snowpark_table_name} is not a FDN table"
561
- # )
635
+ if get_table_type(snowpark_table_name, session) not in (
636
+ "NORMAL",
637
+ "TABLE",
638
+ ):
639
+ raise AnalysisException(
640
+ f"Table {snowpark_table_name} is not a FDN table"
641
+ )
562
642
  _validate_schema_and_get_writer(
563
- input_df, "append", snowpark_table_name
643
+ input_df, "append", snowpark_table_name, table_schema_or_error
564
644
  ).saveAsTable(
565
645
  table_name=snowpark_table_name,
566
646
  mode="append",
@@ -568,31 +648,38 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
568
648
  )
569
649
  case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
570
650
  # TODO: handle the filter condition for MODE_OVERWRITE
571
- if check_snowflake_table_existence(snowpark_table_name, session):
572
- # TODO: SNOW-2299414 Fix the implementation of table type check
573
- # if check_table_type(snowpark_table_name, session) != "TABLE":
574
- # raise AnalysisException(
575
- # f"Table {snowpark_table_name} is not a FDN table"
576
- # )
577
- pass
651
+ table_schema_or_error = _get_table_schema_or_error(
652
+ snowpark_table_name, session
653
+ )
654
+ if isinstance(table_schema_or_error, DataType): # Table exists
655
+ if get_table_type(snowpark_table_name, session) not in (
656
+ "NORMAL",
657
+ "TABLE",
658
+ ):
659
+ raise AnalysisException(
660
+ f"Table {snowpark_table_name} is not a FDN table"
661
+ )
578
662
  else:
579
663
  raise AnalysisException(
580
664
  f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
581
665
  )
582
666
  _validate_schema_and_get_writer(
583
- input_df, "truncate", snowpark_table_name
667
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
584
668
  ).saveAsTable(
585
669
  table_name=snowpark_table_name,
586
670
  mode="truncate",
587
671
  column_order=_column_order_for_write,
588
672
  )
589
673
  case commands_proto.WriteOperationV2.MODE_REPLACE:
590
- if not check_snowflake_table_existence(snowpark_table_name, session):
674
+ table_schema_or_error = _get_table_schema_or_error(
675
+ snowpark_table_name, session
676
+ )
677
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
591
678
  raise AnalysisException(
592
679
  f"Table {snowpark_table_name} does not exist"
593
680
  )
594
681
  _validate_schema_and_get_writer(
595
- input_df, "replace", snowpark_table_name
682
+ input_df, "replace", snowpark_table_name, table_schema_or_error
596
683
  ).saveAsTable(
597
684
  table_name=snowpark_table_name,
598
685
  mode="overwrite",
@@ -612,8 +699,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
612
699
  )
613
700
 
614
701
 
702
+ def _get_table_schema_or_error(
703
+ snowpark_table_name: str, snowpark_session: snowpark.Session
704
+ ) -> DataType | SnowparkSQLException:
705
+ try:
706
+ return snowpark_session.table(snowpark_table_name).schema
707
+ except SnowparkSQLException as e:
708
+ return e
709
+
710
+
615
711
  def _validate_schema_and_get_writer(
616
- input_df: snowpark.DataFrame, write_mode: str, snowpark_table_name: str
712
+ input_df: snowpark.DataFrame,
713
+ write_mode: str,
714
+ snowpark_table_name: str,
715
+ table_schema_or_error: DataType | SnowparkSQLException | None = None,
617
716
  ) -> snowpark.DataFrameWriter:
618
717
  if write_mode is not None and write_mode.lower() in (
619
718
  "replace",
@@ -622,16 +721,26 @@ def _validate_schema_and_get_writer(
622
721
  return input_df.write
623
722
 
624
723
  table_schema = None
625
- try:
626
- table_schema = (
627
- get_or_create_snowpark_session().table(snowpark_table_name).schema
628
- )
629
- except SnowparkSQLException as e:
630
- msg = e.message
631
- if "SQL compilation error" in msg and "does not exist" in msg:
632
- pass
633
- else:
634
- raise e
724
+ if table_schema_or_error is not None:
725
+ if isinstance(table_schema_or_error, SnowparkSQLException):
726
+ msg = table_schema_or_error.message
727
+ if "SQL compilation error" in msg and "does not exist" in msg:
728
+ pass
729
+ else:
730
+ raise table_schema_or_error
731
+ elif isinstance(table_schema_or_error, DataType):
732
+ table_schema = table_schema_or_error
733
+ else:
734
+ try:
735
+ table_schema = (
736
+ get_or_create_snowpark_session().table(snowpark_table_name).schema
737
+ )
738
+ except SnowparkSQLException as e:
739
+ msg = e.message
740
+ if "SQL compilation error" in msg and "does not exist" in msg:
741
+ pass
742
+ else:
743
+ raise e
635
744
 
636
745
  if table_schema is None:
637
746
  # If table does not exist, we can skip the schema validation
@@ -884,31 +993,3 @@ def _truncate_directory(directory_path: Path) -> None:
884
993
  file.unlink()
885
994
  elif file.is_dir():
886
995
  shutil.rmtree(file)
887
-
888
-
889
- def check_snowflake_table_existence(
890
- snowpark_table_name: str,
891
- snowpark_session: snowpark.Session,
892
- ):
893
- try:
894
- snowpark_session.sql(f"SELECT 1 FROM {snowpark_table_name} LIMIT 1").collect()
895
- return True
896
- except Exception:
897
- return False
898
-
899
-
900
- # TODO: SNOW-2299414 Fix the implementation of table type check
901
- # def check_table_type(
902
- # snowpark_table_name: str,
903
- # snowpark_session: snowpark.Session,
904
- # ) -> str:
905
- # # currently we only support iceberg table and FDN table
906
- # metadata = snowpark_session.sql(
907
- # f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
908
- # ).collect()
909
- # if metadata is None or len(metadata) == 0:
910
- # raise AnalysisException(f"Table {snowpark_table_name} does not exist")
911
- # metadata = metadata[0]
912
- # if metadata.as_dict().get("is_iceberg") == "Y":
913
- # return "ICEBERG"
914
- # return "TABLE"
@@ -12,6 +12,7 @@ _resources_initialized = threading.Event()
12
12
  _initializer_lock = threading.Lock()
13
13
  SPARK_VERSION = "3.5.6"
14
14
  RESOURCE_PATH = "/snowflake/snowpark_connect/resources"
15
+ _upload_jars = True # Flag to control whether to upload jars. Required for Scala UDFs.
15
16
 
16
17
 
17
18
  def initialize_resources() -> None:
@@ -57,10 +58,8 @@ def initialize_resources() -> None:
57
58
  f"spark-sql_2.12-{SPARK_VERSION}.jar",
58
59
  f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
59
60
  f"spark-common-utils_2.12-{SPARK_VERSION}.jar",
61
+ "sas-scala-udf_2.12-0.1.0.jar",
60
62
  "json4s-ast_2.12-3.7.0-M11.jar",
61
- "json4s-native_2.12-3.7.0-M11.jar",
62
- "json4s-core_2.12-3.7.0-M11.jar",
63
- "paranamer-2.8.3.jar",
64
63
  ]
65
64
 
66
65
  for jar in jar_files:
@@ -80,9 +79,11 @@ def initialize_resources() -> None:
80
79
  ("Initialize Session Stage", initialize_session_stage), # Takes about 0.3s
81
80
  ("Initialize Session Catalog", initialize_catalog), # Takes about 1.2s
82
81
  ("Snowflake Connection Warm Up", warm_up_sf_connection), # Takes about 1s
83
- ("Upload Scala UDF Jars", upload_scala_udf_jars),
84
82
  ]
85
83
 
84
+ if _upload_jars:
85
+ resources.append(("Upload Scala UDF Jars", upload_scala_udf_jars))
86
+
86
87
  for name, resource_func in resources:
87
88
  resource_start = time.time()
88
89
  try:
@@ -113,4 +114,18 @@ def initialize_resources_async() -> threading.Thread:
113
114
 
114
115
  def wait_for_resource_initialization() -> None:
115
116
  with _initializer_lock:
116
- _resource_initializer.join()
117
+ _resource_initializer.join(timeout=300) # wait at most 300 seconds
118
+ if _resource_initializer.is_alive():
119
+ logger.error(
120
+ "Resource initialization failed - initializer thread has been running for over 300 seconds."
121
+ )
122
+ raise RuntimeError(
123
+ "Resource initialization failed - initializer thread has been running for over 300 seconds."
124
+ )
125
+
126
+
127
+ def set_upload_jars(upload: bool) -> None:
128
+ """Set whether to upload jars required for Scala UDFs. This should be set to False if Scala UDFs
129
+ are not used, to avoid the overhead of uploading jars."""
130
+ global _upload_jars
131
+ _upload_jars = upload
@@ -531,7 +531,10 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
531
531
  if name.endswith(".class"):
532
532
  # name is <dir>/<package>/<class_name>
533
533
  # we don't need the dir name, but require the package, so only remove dir
534
- class_files[name.split("/", 1)[-1]] = filepath
534
+ if os.name != "nt":
535
+ class_files[name.split("/", 1)[-1]] = filepath
536
+ else:
537
+ class_files[name.split("\\", 1)[-1]] = filepath
535
538
  continue
536
539
  session.file.put(
537
540
  filepath,
@@ -722,30 +725,33 @@ def _serve(
722
725
  # No need to start grpc server in TCM
723
726
  return
724
727
 
728
+ grpc_max_msg_size = get_int_from_env(
729
+ "SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
730
+ _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
731
+ )
732
+ grpc_max_metadata_size = get_int_from_env(
733
+ "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
734
+ _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
735
+ )
725
736
  server_options = [
726
737
  (
727
738
  "grpc.max_receive_message_length",
728
- get_int_from_env(
729
- "SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
730
- _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
731
- ),
739
+ grpc_max_msg_size,
732
740
  ),
733
741
  (
734
742
  "grpc.max_metadata_size",
735
- get_int_from_env(
736
- "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
737
- _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
738
- ),
743
+ grpc_max_metadata_size,
739
744
  ),
740
745
  (
741
746
  "grpc.absolute_max_metadata_size",
742
- get_int_from_env(
743
- "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
744
- _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
745
- )
746
- * 2,
747
+ grpc_max_metadata_size * 2,
747
748
  ),
748
749
  ]
750
+
751
+ from pyspark.sql.connect.client import ChannelBuilder
752
+
753
+ ChannelBuilder.MAX_MESSAGE_LENGTH = grpc_max_msg_size
754
+
749
755
  server = grpc.server(
750
756
  futures.ThreadPoolExecutor(max_workers=10), options=server_options
751
757
  )
@@ -1050,10 +1056,6 @@ def start_session(
1050
1056
  global _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE
1051
1057
  _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
1052
1058
 
1053
- from pyspark.sql.connect.client import ChannelBuilder
1054
-
1055
- ChannelBuilder.MAX_MESSAGE_LENGTH = max_grpc_message_size
1056
-
1057
1059
  if os.environ.get("SPARK_ENV_LOADED"):
1058
1060
  raise RuntimeError(
1059
1061
  "Snowpark Connect cannot be run inside of a Spark environment"
@@ -39,7 +39,7 @@ def write_temporary_artifact(
39
39
  if os.name != "nt":
40
40
  filepath = f"/tmp/sas-{session.session_id}/{name}"
41
41
  else:
42
- filepath = f"{tempfile.gettempdir()}/sas-{session.session_id}/{name}"
42
+ filepath = f"{tempfile.gettempdir()}\\sas-{session.session_id}\\{name}"
43
43
  # The name comes to us as a path (e.g. cache/<name>), so we need to create
44
44
  # the parent directory if it doesn't exist to avoid errors during writing.
45
45
  pathlib.Path(filepath).parent.mkdir(parents=True, exist_ok=True)
@@ -55,11 +55,10 @@ def write_class_files_to_stage(
55
55
  ) -> None:
56
56
  if os.name != "nt":
57
57
  filepath = f"/tmp/sas-{session.session_id}"
58
+ jar_name = f'{filepath}/{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
58
59
  else:
59
- filepath = f"{tempfile.gettempdir()}/sas-{session.session_id}"
60
- jar_name = (
61
- f'{filepath}/{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
62
- )
60
+ filepath = f"{tempfile.gettempdir()}\\sas-{session.session_id}"
61
+ jar_name = f'{filepath}\\{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
63
62
  with zipfile.ZipFile(jar_name, "w", zipfile.ZIP_DEFLATED) as jar:
64
63
  for name, path in files.items():
65
64
  jar.write(path, name)
@@ -52,6 +52,10 @@ class SynchronizedDict(Mapping[K, V]):
52
52
  with self._lock.writer():
53
53
  self._dict[key] = value
54
54
 
55
+ def __delitem__(self, key: K) -> None:
56
+ with self._lock.writer():
57
+ del self._dict[key]
58
+
55
59
  def __contains__(self, key: K) -> bool:
56
60
  with self._lock.reader():
57
61
  return key in self._dict