snowpark-connect 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (35) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +3 -93
  2. snowflake/snowpark_connect/config.py +99 -1
  3. snowflake/snowpark_connect/dataframe_container.py +0 -6
  4. snowflake/snowpark_connect/expression/map_expression.py +22 -7
  5. snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
  6. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +4 -26
  7. snowflake/snowpark_connect/expression/map_unresolved_function.py +12 -3
  8. snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
  9. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  10. snowflake/snowpark_connect/relation/map_extension.py +14 -10
  11. snowflake/snowpark_connect/relation/map_join.py +62 -258
  12. snowflake/snowpark_connect/relation/map_relation.py +5 -1
  13. snowflake/snowpark_connect/relation/map_sql.py +353 -16
  14. snowflake/snowpark_connect/relation/write/map_write.py +171 -110
  15. snowflake/snowpark_connect/resources_initializer.py +20 -5
  16. snowflake/snowpark_connect/server.py +16 -17
  17. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  18. snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
  19. snowflake/snowpark_connect/utils/identifiers.py +120 -0
  20. snowflake/snowpark_connect/utils/io_utils.py +21 -1
  21. snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
  22. snowflake/snowpark_connect/utils/session.py +16 -26
  23. snowflake/snowpark_connect/utils/telemetry.py +53 -0
  24. snowflake/snowpark_connect/version.py +1 -1
  25. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
  26. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +34 -35
  27. snowflake/snowpark_connect/hidden_column.py +0 -39
  28. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
  29. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
  30. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
  31. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
  32. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
  33. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
  34. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
  35. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
@@ -50,6 +50,7 @@ from snowflake.snowpark_connect.utils.identifiers import (
50
50
  spark_to_sf_single_id,
51
51
  split_fully_qualified_spark_name,
52
52
  )
53
+ from snowflake.snowpark_connect.utils.io_utils import get_table_type
53
54
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
54
55
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
55
56
  from snowflake.snowpark_connect.utils.telemetry import (
@@ -311,7 +312,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
311
312
 
312
313
  match write_mode:
313
314
  case None | "error" | "errorifexists":
314
- if check_snowflake_table_existence(snowpark_table_name, session):
315
+ table_schema_or_error = _get_table_schema_or_error(
316
+ snowpark_table_name, session
317
+ )
318
+ if isinstance(table_schema_or_error, DataType): # Table exists
315
319
  raise AnalysisException(
316
320
  f"Table {snowpark_table_name} already exists"
317
321
  )
@@ -322,29 +326,45 @@ def map_write(request: proto_base.ExecutePlanRequest):
322
326
  snowpark_session=session,
323
327
  )
324
328
  _validate_schema_and_get_writer(
325
- input_df, "append", snowpark_table_name
329
+ input_df, "append", snowpark_table_name, table_schema_or_error
326
330
  ).saveAsTable(
327
331
  table_name=snowpark_table_name,
328
332
  mode="append",
329
333
  column_order=_column_order_for_write,
330
334
  )
331
335
  case "append":
332
- # TODO: SNOW-2299414 Fix the implementation of table type check
333
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
334
- # raise AnalysisException(
335
- # f"Table {snowpark_table_name} is not an iceberg table"
336
- # )
336
+ table_schema_or_error = _get_table_schema_or_error(
337
+ snowpark_table_name, session
338
+ )
339
+ if isinstance(table_schema_or_error, DataType): # Table exists
340
+ if get_table_type(snowpark_table_name, session) not in (
341
+ "ICEBERG",
342
+ "TABLE",
343
+ ):
344
+ raise AnalysisException(
345
+ f"Table {snowpark_table_name} is not an iceberg table"
346
+ )
347
+ else:
348
+ create_iceberg_table(
349
+ snowpark_table_name=snowpark_table_name,
350
+ location=write_op.options.get("location", None),
351
+ schema=input_df.schema,
352
+ snowpark_session=session,
353
+ )
337
354
  _validate_schema_and_get_writer(
338
- input_df, "append", snowpark_table_name
355
+ input_df, "append", snowpark_table_name, table_schema_or_error
339
356
  ).saveAsTable(
340
357
  table_name=snowpark_table_name,
341
358
  mode="append",
342
359
  column_order=_column_order_for_write,
343
360
  )
344
361
  case "ignore":
345
- if not check_snowflake_table_existence(
362
+ table_schema_or_error = _get_table_schema_or_error(
346
363
  snowpark_table_name, session
347
- ):
364
+ )
365
+ if not isinstance(
366
+ table_schema_or_error, DataType
367
+ ): # Table not exists
348
368
  create_iceberg_table(
349
369
  snowpark_table_name=snowpark_table_name,
350
370
  location=write_op.options.get("location", None),
@@ -359,13 +379,17 @@ def map_write(request: proto_base.ExecutePlanRequest):
359
379
  column_order=_column_order_for_write,
360
380
  )
361
381
  case "overwrite":
362
- if check_snowflake_table_existence(snowpark_table_name, session):
363
- # TODO: SNOW-2299414 Fix the implementation of table type check
364
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
365
- # raise AnalysisException(
366
- # f"Table {snowpark_table_name} is not an iceberg table"
367
- # )
368
- pass
382
+ table_schema_or_error = _get_table_schema_or_error(
383
+ snowpark_table_name, session
384
+ )
385
+ if isinstance(table_schema_or_error, DataType): # Table exists
386
+ if get_table_type(snowpark_table_name, session) not in (
387
+ "ICEBERG",
388
+ "TABLE",
389
+ ):
390
+ raise AnalysisException(
391
+ f"Table {snowpark_table_name} is not an iceberg table"
392
+ )
369
393
  else:
370
394
  create_iceberg_table(
371
395
  snowpark_table_name=snowpark_table_name,
@@ -374,7 +398,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
374
398
  snowpark_session=session,
375
399
  )
376
400
  _validate_schema_and_get_writer(
377
- input_df, "truncate", snowpark_table_name
401
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
378
402
  ).saveAsTable(
379
403
  table_name=snowpark_table_name,
380
404
  mode="truncate",
@@ -393,33 +417,49 @@ def map_write(request: proto_base.ExecutePlanRequest):
393
417
  ):
394
418
  match write_mode:
395
419
  case "overwrite":
396
- if check_snowflake_table_existence(
420
+ table_schema_or_error = _get_table_schema_or_error(
397
421
  snowpark_table_name, session
398
- ):
399
- # TODO: SNOW-2299414 Fix the implementation of table type check
400
- # if (
401
- # check_table_type(snowpark_table_name, session)
402
- # != "TABLE"
403
- # ):
404
- # raise AnalysisException(
405
- # f"Table {snowpark_table_name} is not a FDN table"
406
- # )
422
+ )
423
+ if isinstance(table_schema_or_error, DataType): # Table exists
424
+ if get_table_type(snowpark_table_name, session) not in (
425
+ "NORMAL",
426
+ "TABLE",
427
+ ):
428
+ raise AnalysisException(
429
+ f"Table {snowpark_table_name} is not a FDN table"
430
+ )
407
431
  write_mode = "truncate"
408
432
  _validate_schema_and_get_writer(
409
- input_df, write_mode, snowpark_table_name
433
+ input_df,
434
+ write_mode,
435
+ snowpark_table_name,
436
+ table_schema_or_error,
410
437
  ).saveAsTable(
411
438
  table_name=snowpark_table_name,
412
439
  mode=write_mode,
413
440
  column_order=_column_order_for_write,
414
441
  )
415
442
  case "append":
416
- # TODO: SNOW-2299414 Fix the implementation of table type check
417
- # if check_table_type(snowpark_table_name, session) != "TABLE":
418
- # raise AnalysisException(
419
- # f"Table {snowpark_table_name} is not a FDN table"
420
- # )
443
+ table_schema_or_error = _get_table_schema_or_error(
444
+ snowpark_table_name, session
445
+ )
446
+ if isinstance(
447
+ table_schema_or_error, DataType
448
+ ) and get_table_type( # Table exists
449
+ snowpark_table_name, session
450
+ ) not in (
451
+ "NORMAL",
452
+ "TABLE",
453
+ ):
454
+ raise AnalysisException(
455
+ f"Table {snowpark_table_name} is not a FDN table"
456
+ )
457
+
421
458
  _validate_schema_and_get_writer(
422
- input_df, write_mode, snowpark_table_name
459
+ input_df,
460
+ write_mode,
461
+ snowpark_table_name,
462
+ table_schema_or_error,
423
463
  ).saveAsTable(
424
464
  table_name=snowpark_table_name,
425
465
  mode=write_mode,
@@ -466,7 +506,10 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
466
506
  if write_op.provider.lower() == "iceberg":
467
507
  match write_op.mode:
468
508
  case commands_proto.WriteOperationV2.MODE_CREATE:
469
- if check_snowflake_table_existence(snowpark_table_name, session):
509
+ table_schema_or_error = _get_table_schema_or_error(
510
+ snowpark_table_name, session
511
+ )
512
+ if isinstance(table_schema_or_error, DataType): # Table exists
470
513
  raise AnalysisException(
471
514
  f"Table {snowpark_table_name} already exists"
472
515
  )
@@ -477,24 +520,29 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
477
520
  snowpark_session=session,
478
521
  )
479
522
  _validate_schema_and_get_writer(
480
- input_df, "append", snowpark_table_name
523
+ input_df, "append", snowpark_table_name, table_schema_or_error
481
524
  ).saveAsTable(
482
525
  table_name=snowpark_table_name,
483
526
  mode="append",
484
527
  column_order=_column_order_for_write,
485
528
  )
486
529
  case commands_proto.WriteOperationV2.MODE_APPEND:
487
- if not check_snowflake_table_existence(snowpark_table_name, session):
530
+ table_schema_or_error = _get_table_schema_or_error(
531
+ snowpark_table_name, session
532
+ )
533
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
488
534
  raise AnalysisException(
489
535
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
490
536
  )
491
- # TODO: SNOW-2299414 Fix the implementation of table type check
492
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
493
- # raise AnalysisException(
494
- # f"Table {snowpark_table_name} is not an iceberg table"
495
- # )
537
+ if get_table_type(snowpark_table_name, session) not in (
538
+ "ICEBERG",
539
+ "TABLE",
540
+ ):
541
+ raise AnalysisException(
542
+ f"Table {snowpark_table_name} is not an iceberg table"
543
+ )
496
544
  _validate_schema_and_get_writer(
497
- input_df, "append", snowpark_table_name
545
+ input_df, "append", snowpark_table_name, table_schema_or_error
498
546
  ).saveAsTable(
499
547
  table_name=snowpark_table_name,
500
548
  mode="append",
@@ -502,26 +550,33 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
502
550
  )
503
551
  case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
504
552
  # TODO: handle the filter condition for MODE_OVERWRITE
505
- if check_snowflake_table_existence(snowpark_table_name, session):
506
- # TODO: SNOW-2299414 Fix the implementation of table type check
507
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
508
- # raise AnalysisException(
509
- # f"Table {snowpark_table_name} is not an iceberg table"
510
- # )
511
- pass
553
+ table_schema_or_error = _get_table_schema_or_error(
554
+ snowpark_table_name, session
555
+ )
556
+ if isinstance(table_schema_or_error, DataType): # Table exists
557
+ if get_table_type(snowpark_table_name, session) not in (
558
+ "ICEBERG",
559
+ "TABLE",
560
+ ):
561
+ raise AnalysisException(
562
+ f"Table {snowpark_table_name} is not an iceberg table"
563
+ )
512
564
  else:
513
565
  raise AnalysisException(
514
566
  f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
515
567
  )
516
568
  _validate_schema_and_get_writer(
517
- input_df, "truncate", snowpark_table_name
569
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
518
570
  ).saveAsTable(
519
571
  table_name=snowpark_table_name,
520
572
  mode="truncate",
521
573
  column_order=_column_order_for_write,
522
574
  )
523
575
  case commands_proto.WriteOperationV2.MODE_REPLACE:
524
- if check_snowflake_table_existence(snowpark_table_name, session):
576
+ table_schema_or_error = _get_table_schema_or_error(
577
+ snowpark_table_name, session
578
+ )
579
+ if isinstance(table_schema_or_error, DataType): # Table exists
525
580
  create_iceberg_table(
526
581
  snowpark_table_name=snowpark_table_name,
527
582
  location=write_op.table_properties.get("location"),
@@ -534,7 +589,7 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
534
589
  f"Table {snowpark_table_name} does not exist"
535
590
  )
536
591
  _validate_schema_and_get_writer(
537
- input_df, "replace", snowpark_table_name
592
+ input_df, "replace", snowpark_table_name, table_schema_or_error
538
593
  ).saveAsTable(
539
594
  table_name=snowpark_table_name,
540
595
  mode="append",
@@ -570,17 +625,22 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
570
625
  column_order=_column_order_for_write,
571
626
  )
572
627
  case commands_proto.WriteOperationV2.MODE_APPEND:
573
- if not check_snowflake_table_existence(snowpark_table_name, session):
628
+ table_schema_or_error = _get_table_schema_or_error(
629
+ snowpark_table_name, session
630
+ )
631
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
574
632
  raise AnalysisException(
575
633
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
576
634
  )
577
- # TODO: SNOW-2299414 Fix the implementation of table type check
578
- # if check_table_type(snowpark_table_name, session) != "TABLE":
579
- # raise AnalysisException(
580
- # f"Table {snowpark_table_name} is not a FDN table"
581
- # )
635
+ if get_table_type(snowpark_table_name, session) not in (
636
+ "NORMAL",
637
+ "TABLE",
638
+ ):
639
+ raise AnalysisException(
640
+ f"Table {snowpark_table_name} is not a FDN table"
641
+ )
582
642
  _validate_schema_and_get_writer(
583
- input_df, "append", snowpark_table_name
643
+ input_df, "append", snowpark_table_name, table_schema_or_error
584
644
  ).saveAsTable(
585
645
  table_name=snowpark_table_name,
586
646
  mode="append",
@@ -588,31 +648,38 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
588
648
  )
589
649
  case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
590
650
  # TODO: handle the filter condition for MODE_OVERWRITE
591
- if check_snowflake_table_existence(snowpark_table_name, session):
592
- # TODO: SNOW-2299414 Fix the implementation of table type check
593
- # if check_table_type(snowpark_table_name, session) != "TABLE":
594
- # raise AnalysisException(
595
- # f"Table {snowpark_table_name} is not a FDN table"
596
- # )
597
- pass
651
+ table_schema_or_error = _get_table_schema_or_error(
652
+ snowpark_table_name, session
653
+ )
654
+ if isinstance(table_schema_or_error, DataType): # Table exists
655
+ if get_table_type(snowpark_table_name, session) not in (
656
+ "NORMAL",
657
+ "TABLE",
658
+ ):
659
+ raise AnalysisException(
660
+ f"Table {snowpark_table_name} is not a FDN table"
661
+ )
598
662
  else:
599
663
  raise AnalysisException(
600
664
  f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
601
665
  )
602
666
  _validate_schema_and_get_writer(
603
- input_df, "truncate", snowpark_table_name
667
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
604
668
  ).saveAsTable(
605
669
  table_name=snowpark_table_name,
606
670
  mode="truncate",
607
671
  column_order=_column_order_for_write,
608
672
  )
609
673
  case commands_proto.WriteOperationV2.MODE_REPLACE:
610
- if not check_snowflake_table_existence(snowpark_table_name, session):
674
+ table_schema_or_error = _get_table_schema_or_error(
675
+ snowpark_table_name, session
676
+ )
677
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
611
678
  raise AnalysisException(
612
679
  f"Table {snowpark_table_name} does not exist"
613
680
  )
614
681
  _validate_schema_and_get_writer(
615
- input_df, "replace", snowpark_table_name
682
+ input_df, "replace", snowpark_table_name, table_schema_or_error
616
683
  ).saveAsTable(
617
684
  table_name=snowpark_table_name,
618
685
  mode="overwrite",
@@ -632,8 +699,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
632
699
  )
633
700
 
634
701
 
702
+ def _get_table_schema_or_error(
703
+ snowpark_table_name: str, snowpark_session: snowpark.Session
704
+ ) -> DataType | SnowparkSQLException:
705
+ try:
706
+ return snowpark_session.table(snowpark_table_name).schema
707
+ except SnowparkSQLException as e:
708
+ return e
709
+
710
+
635
711
  def _validate_schema_and_get_writer(
636
- input_df: snowpark.DataFrame, write_mode: str, snowpark_table_name: str
712
+ input_df: snowpark.DataFrame,
713
+ write_mode: str,
714
+ snowpark_table_name: str,
715
+ table_schema_or_error: DataType | SnowparkSQLException | None = None,
637
716
  ) -> snowpark.DataFrameWriter:
638
717
  if write_mode is not None and write_mode.lower() in (
639
718
  "replace",
@@ -642,16 +721,26 @@ def _validate_schema_and_get_writer(
642
721
  return input_df.write
643
722
 
644
723
  table_schema = None
645
- try:
646
- table_schema = (
647
- get_or_create_snowpark_session().table(snowpark_table_name).schema
648
- )
649
- except SnowparkSQLException as e:
650
- msg = e.message
651
- if "SQL compilation error" in msg and "does not exist" in msg:
652
- pass
653
- else:
654
- raise e
724
+ if table_schema_or_error is not None:
725
+ if isinstance(table_schema_or_error, SnowparkSQLException):
726
+ msg = table_schema_or_error.message
727
+ if "SQL compilation error" in msg and "does not exist" in msg:
728
+ pass
729
+ else:
730
+ raise table_schema_or_error
731
+ elif isinstance(table_schema_or_error, DataType):
732
+ table_schema = table_schema_or_error
733
+ else:
734
+ try:
735
+ table_schema = (
736
+ get_or_create_snowpark_session().table(snowpark_table_name).schema
737
+ )
738
+ except SnowparkSQLException as e:
739
+ msg = e.message
740
+ if "SQL compilation error" in msg and "does not exist" in msg:
741
+ pass
742
+ else:
743
+ raise e
655
744
 
656
745
  if table_schema is None:
657
746
  # If table does not exist, we can skip the schema validation
@@ -904,31 +993,3 @@ def _truncate_directory(directory_path: Path) -> None:
904
993
  file.unlink()
905
994
  elif file.is_dir():
906
995
  shutil.rmtree(file)
907
-
908
-
909
- def check_snowflake_table_existence(
910
- snowpark_table_name: str,
911
- snowpark_session: snowpark.Session,
912
- ):
913
- try:
914
- snowpark_session.sql(f"SELECT 1 FROM {snowpark_table_name} LIMIT 1").collect()
915
- return True
916
- except Exception:
917
- return False
918
-
919
-
920
- # TODO: SNOW-2299414 Fix the implementation of table type check
921
- # def check_table_type(
922
- # snowpark_table_name: str,
923
- # snowpark_session: snowpark.Session,
924
- # ) -> str:
925
- # # currently we only support iceberg table and FDN table
926
- # metadata = snowpark_session.sql(
927
- # f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
928
- # ).collect()
929
- # if metadata is None or len(metadata) == 0:
930
- # raise AnalysisException(f"Table {snowpark_table_name} does not exist")
931
- # metadata = metadata[0]
932
- # if metadata.as_dict().get("is_iceberg") == "Y":
933
- # return "ICEBERG"
934
- # return "TABLE"
@@ -12,6 +12,7 @@ _resources_initialized = threading.Event()
12
12
  _initializer_lock = threading.Lock()
13
13
  SPARK_VERSION = "3.5.6"
14
14
  RESOURCE_PATH = "/snowflake/snowpark_connect/resources"
15
+ _upload_jars = True # Flag to control whether to upload jars. Required for Scala UDFs.
15
16
 
16
17
 
17
18
  def initialize_resources() -> None:
@@ -57,10 +58,8 @@ def initialize_resources() -> None:
57
58
  f"spark-sql_2.12-{SPARK_VERSION}.jar",
58
59
  f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
59
60
  f"spark-common-utils_2.12-{SPARK_VERSION}.jar",
61
+ "sas-scala-udf_2.12-0.1.0.jar",
60
62
  "json4s-ast_2.12-3.7.0-M11.jar",
61
- "json4s-native_2.12-3.7.0-M11.jar",
62
- "json4s-core_2.12-3.7.0-M11.jar",
63
- "paranamer-2.8.3.jar",
64
63
  ]
65
64
 
66
65
  for jar in jar_files:
@@ -80,9 +79,11 @@ def initialize_resources() -> None:
80
79
  ("Initialize Session Stage", initialize_session_stage), # Takes about 0.3s
81
80
  ("Initialize Session Catalog", initialize_catalog), # Takes about 1.2s
82
81
  ("Snowflake Connection Warm Up", warm_up_sf_connection), # Takes about 1s
83
- ("Upload Scala UDF Jars", upload_scala_udf_jars),
84
82
  ]
85
83
 
84
+ if _upload_jars:
85
+ resources.append(("Upload Scala UDF Jars", upload_scala_udf_jars))
86
+
86
87
  for name, resource_func in resources:
87
88
  resource_start = time.time()
88
89
  try:
@@ -113,4 +114,18 @@ def initialize_resources_async() -> threading.Thread:
113
114
 
114
115
  def wait_for_resource_initialization() -> None:
115
116
  with _initializer_lock:
116
- _resource_initializer.join()
117
+ _resource_initializer.join(timeout=300) # wait at most 300 seconds
118
+ if _resource_initializer.is_alive():
119
+ logger.error(
120
+ "Resource initialization failed - initializer thread has been running for over 300 seconds."
121
+ )
122
+ raise RuntimeError(
123
+ "Resource initialization failed - initializer thread has been running for over 300 seconds."
124
+ )
125
+
126
+
127
+ def set_upload_jars(upload: bool) -> None:
128
+ """Set whether to upload jars required for Scala UDFs. This should be set to False if Scala UDFs
129
+ are not used, to avoid the overhead of uploading jars."""
130
+ global _upload_jars
131
+ _upload_jars = upload
@@ -725,30 +725,33 @@ def _serve(
725
725
  # No need to start grpc server in TCM
726
726
  return
727
727
 
728
+ grpc_max_msg_size = get_int_from_env(
729
+ "SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
730
+ _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
731
+ )
732
+ grpc_max_metadata_size = get_int_from_env(
733
+ "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
734
+ _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
735
+ )
728
736
  server_options = [
729
737
  (
730
738
  "grpc.max_receive_message_length",
731
- get_int_from_env(
732
- "SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
733
- _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
734
- ),
739
+ grpc_max_msg_size,
735
740
  ),
736
741
  (
737
742
  "grpc.max_metadata_size",
738
- get_int_from_env(
739
- "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
740
- _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
741
- ),
743
+ grpc_max_metadata_size,
742
744
  ),
743
745
  (
744
746
  "grpc.absolute_max_metadata_size",
745
- get_int_from_env(
746
- "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
747
- _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
748
- )
749
- * 2,
747
+ grpc_max_metadata_size * 2,
750
748
  ),
751
749
  ]
750
+
751
+ from pyspark.sql.connect.client import ChannelBuilder
752
+
753
+ ChannelBuilder.MAX_MESSAGE_LENGTH = grpc_max_msg_size
754
+
752
755
  server = grpc.server(
753
756
  futures.ThreadPoolExecutor(max_workers=10), options=server_options
754
757
  )
@@ -1053,10 +1056,6 @@ def start_session(
1053
1056
  global _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE
1054
1057
  _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
1055
1058
 
1056
- from pyspark.sql.connect.client import ChannelBuilder
1057
-
1058
- ChannelBuilder.MAX_MESSAGE_LENGTH = max_grpc_message_size
1059
-
1060
1059
  if os.environ.get("SPARK_ENV_LOADED"):
1061
1060
  raise RuntimeError(
1062
1061
  "Snowpark Connect cannot be run inside of a Spark environment"
@@ -52,6 +52,10 @@ class SynchronizedDict(Mapping[K, V]):
52
52
  with self._lock.writer():
53
53
  self._dict[key] = value
54
54
 
55
+ def __delitem__(self, key: K) -> None:
56
+ with self._lock.writer():
57
+ del self._dict[key]
58
+
55
59
  def __contains__(self, key: K) -> bool:
56
60
  with self._lock.reader():
57
61
  return key in self._dict