snowpark-connect 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +3 -93
- snowflake/snowpark_connect/config.py +99 -1
- snowflake/snowpark_connect/dataframe_container.py +0 -6
- snowflake/snowpark_connect/expression/map_expression.py +22 -7
- snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +4 -26
- snowflake/snowpark_connect/expression/map_unresolved_function.py +12 -3
- snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/relation/map_extension.py +14 -10
- snowflake/snowpark_connect/relation/map_join.py +62 -258
- snowflake/snowpark_connect/relation/map_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_sql.py +353 -16
- snowflake/snowpark_connect/relation/write/map_write.py +171 -110
- snowflake/snowpark_connect/resources_initializer.py +20 -5
- snowflake/snowpark_connect/server.py +16 -17
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
- snowflake/snowpark_connect/utils/identifiers.py +120 -0
- snowflake/snowpark_connect/utils/io_utils.py +21 -1
- snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
- snowflake/snowpark_connect/utils/session.py +16 -26
- snowflake/snowpark_connect/utils/telemetry.py +53 -0
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +34 -35
- snowflake/snowpark_connect/hidden_column.py +0 -39
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
|
@@ -50,6 +50,7 @@ from snowflake.snowpark_connect.utils.identifiers import (
|
|
|
50
50
|
spark_to_sf_single_id,
|
|
51
51
|
split_fully_qualified_spark_name,
|
|
52
52
|
)
|
|
53
|
+
from snowflake.snowpark_connect.utils.io_utils import get_table_type
|
|
53
54
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
54
55
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
55
56
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
@@ -311,7 +312,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
311
312
|
|
|
312
313
|
match write_mode:
|
|
313
314
|
case None | "error" | "errorifexists":
|
|
314
|
-
|
|
315
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
316
|
+
snowpark_table_name, session
|
|
317
|
+
)
|
|
318
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
315
319
|
raise AnalysisException(
|
|
316
320
|
f"Table {snowpark_table_name} already exists"
|
|
317
321
|
)
|
|
@@ -322,29 +326,45 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
322
326
|
snowpark_session=session,
|
|
323
327
|
)
|
|
324
328
|
_validate_schema_and_get_writer(
|
|
325
|
-
input_df, "append", snowpark_table_name
|
|
329
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
326
330
|
).saveAsTable(
|
|
327
331
|
table_name=snowpark_table_name,
|
|
328
332
|
mode="append",
|
|
329
333
|
column_order=_column_order_for_write,
|
|
330
334
|
)
|
|
331
335
|
case "append":
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
336
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
337
|
+
snowpark_table_name, session
|
|
338
|
+
)
|
|
339
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
340
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
341
|
+
"ICEBERG",
|
|
342
|
+
"TABLE",
|
|
343
|
+
):
|
|
344
|
+
raise AnalysisException(
|
|
345
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
346
|
+
)
|
|
347
|
+
else:
|
|
348
|
+
create_iceberg_table(
|
|
349
|
+
snowpark_table_name=snowpark_table_name,
|
|
350
|
+
location=write_op.options.get("location", None),
|
|
351
|
+
schema=input_df.schema,
|
|
352
|
+
snowpark_session=session,
|
|
353
|
+
)
|
|
337
354
|
_validate_schema_and_get_writer(
|
|
338
|
-
input_df, "append", snowpark_table_name
|
|
355
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
339
356
|
).saveAsTable(
|
|
340
357
|
table_name=snowpark_table_name,
|
|
341
358
|
mode="append",
|
|
342
359
|
column_order=_column_order_for_write,
|
|
343
360
|
)
|
|
344
361
|
case "ignore":
|
|
345
|
-
|
|
362
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
346
363
|
snowpark_table_name, session
|
|
347
|
-
)
|
|
364
|
+
)
|
|
365
|
+
if not isinstance(
|
|
366
|
+
table_schema_or_error, DataType
|
|
367
|
+
): # Table not exists
|
|
348
368
|
create_iceberg_table(
|
|
349
369
|
snowpark_table_name=snowpark_table_name,
|
|
350
370
|
location=write_op.options.get("location", None),
|
|
@@ -359,13 +379,17 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
359
379
|
column_order=_column_order_for_write,
|
|
360
380
|
)
|
|
361
381
|
case "overwrite":
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
382
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
383
|
+
snowpark_table_name, session
|
|
384
|
+
)
|
|
385
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
386
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
387
|
+
"ICEBERG",
|
|
388
|
+
"TABLE",
|
|
389
|
+
):
|
|
390
|
+
raise AnalysisException(
|
|
391
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
392
|
+
)
|
|
369
393
|
else:
|
|
370
394
|
create_iceberg_table(
|
|
371
395
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -374,7 +398,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
374
398
|
snowpark_session=session,
|
|
375
399
|
)
|
|
376
400
|
_validate_schema_and_get_writer(
|
|
377
|
-
input_df, "truncate", snowpark_table_name
|
|
401
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
378
402
|
).saveAsTable(
|
|
379
403
|
table_name=snowpark_table_name,
|
|
380
404
|
mode="truncate",
|
|
@@ -393,33 +417,49 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
393
417
|
):
|
|
394
418
|
match write_mode:
|
|
395
419
|
case "overwrite":
|
|
396
|
-
|
|
420
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
397
421
|
snowpark_table_name, session
|
|
398
|
-
)
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
422
|
+
)
|
|
423
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
424
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
425
|
+
"NORMAL",
|
|
426
|
+
"TABLE",
|
|
427
|
+
):
|
|
428
|
+
raise AnalysisException(
|
|
429
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
430
|
+
)
|
|
407
431
|
write_mode = "truncate"
|
|
408
432
|
_validate_schema_and_get_writer(
|
|
409
|
-
input_df,
|
|
433
|
+
input_df,
|
|
434
|
+
write_mode,
|
|
435
|
+
snowpark_table_name,
|
|
436
|
+
table_schema_or_error,
|
|
410
437
|
).saveAsTable(
|
|
411
438
|
table_name=snowpark_table_name,
|
|
412
439
|
mode=write_mode,
|
|
413
440
|
column_order=_column_order_for_write,
|
|
414
441
|
)
|
|
415
442
|
case "append":
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
443
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
444
|
+
snowpark_table_name, session
|
|
445
|
+
)
|
|
446
|
+
if isinstance(
|
|
447
|
+
table_schema_or_error, DataType
|
|
448
|
+
) and get_table_type( # Table exists
|
|
449
|
+
snowpark_table_name, session
|
|
450
|
+
) not in (
|
|
451
|
+
"NORMAL",
|
|
452
|
+
"TABLE",
|
|
453
|
+
):
|
|
454
|
+
raise AnalysisException(
|
|
455
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
456
|
+
)
|
|
457
|
+
|
|
421
458
|
_validate_schema_and_get_writer(
|
|
422
|
-
input_df,
|
|
459
|
+
input_df,
|
|
460
|
+
write_mode,
|
|
461
|
+
snowpark_table_name,
|
|
462
|
+
table_schema_or_error,
|
|
423
463
|
).saveAsTable(
|
|
424
464
|
table_name=snowpark_table_name,
|
|
425
465
|
mode=write_mode,
|
|
@@ -466,7 +506,10 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
466
506
|
if write_op.provider.lower() == "iceberg":
|
|
467
507
|
match write_op.mode:
|
|
468
508
|
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
469
|
-
|
|
509
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
510
|
+
snowpark_table_name, session
|
|
511
|
+
)
|
|
512
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
470
513
|
raise AnalysisException(
|
|
471
514
|
f"Table {snowpark_table_name} already exists"
|
|
472
515
|
)
|
|
@@ -477,24 +520,29 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
477
520
|
snowpark_session=session,
|
|
478
521
|
)
|
|
479
522
|
_validate_schema_and_get_writer(
|
|
480
|
-
input_df, "append", snowpark_table_name
|
|
523
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
481
524
|
).saveAsTable(
|
|
482
525
|
table_name=snowpark_table_name,
|
|
483
526
|
mode="append",
|
|
484
527
|
column_order=_column_order_for_write,
|
|
485
528
|
)
|
|
486
529
|
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
487
|
-
|
|
530
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
531
|
+
snowpark_table_name, session
|
|
532
|
+
)
|
|
533
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
488
534
|
raise AnalysisException(
|
|
489
535
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
490
536
|
)
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
537
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
538
|
+
"ICEBERG",
|
|
539
|
+
"TABLE",
|
|
540
|
+
):
|
|
541
|
+
raise AnalysisException(
|
|
542
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
543
|
+
)
|
|
496
544
|
_validate_schema_and_get_writer(
|
|
497
|
-
input_df, "append", snowpark_table_name
|
|
545
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
498
546
|
).saveAsTable(
|
|
499
547
|
table_name=snowpark_table_name,
|
|
500
548
|
mode="append",
|
|
@@ -502,26 +550,33 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
502
550
|
)
|
|
503
551
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
504
552
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
553
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
554
|
+
snowpark_table_name, session
|
|
555
|
+
)
|
|
556
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
557
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
558
|
+
"ICEBERG",
|
|
559
|
+
"TABLE",
|
|
560
|
+
):
|
|
561
|
+
raise AnalysisException(
|
|
562
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
563
|
+
)
|
|
512
564
|
else:
|
|
513
565
|
raise AnalysisException(
|
|
514
566
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
515
567
|
)
|
|
516
568
|
_validate_schema_and_get_writer(
|
|
517
|
-
input_df, "truncate", snowpark_table_name
|
|
569
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
518
570
|
).saveAsTable(
|
|
519
571
|
table_name=snowpark_table_name,
|
|
520
572
|
mode="truncate",
|
|
521
573
|
column_order=_column_order_for_write,
|
|
522
574
|
)
|
|
523
575
|
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
524
|
-
|
|
576
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
577
|
+
snowpark_table_name, session
|
|
578
|
+
)
|
|
579
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
525
580
|
create_iceberg_table(
|
|
526
581
|
snowpark_table_name=snowpark_table_name,
|
|
527
582
|
location=write_op.table_properties.get("location"),
|
|
@@ -534,7 +589,7 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
534
589
|
f"Table {snowpark_table_name} does not exist"
|
|
535
590
|
)
|
|
536
591
|
_validate_schema_and_get_writer(
|
|
537
|
-
input_df, "replace", snowpark_table_name
|
|
592
|
+
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
538
593
|
).saveAsTable(
|
|
539
594
|
table_name=snowpark_table_name,
|
|
540
595
|
mode="append",
|
|
@@ -570,17 +625,22 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
570
625
|
column_order=_column_order_for_write,
|
|
571
626
|
)
|
|
572
627
|
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
573
|
-
|
|
628
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
629
|
+
snowpark_table_name, session
|
|
630
|
+
)
|
|
631
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
574
632
|
raise AnalysisException(
|
|
575
633
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
576
634
|
)
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
635
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
636
|
+
"NORMAL",
|
|
637
|
+
"TABLE",
|
|
638
|
+
):
|
|
639
|
+
raise AnalysisException(
|
|
640
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
641
|
+
)
|
|
582
642
|
_validate_schema_and_get_writer(
|
|
583
|
-
input_df, "append", snowpark_table_name
|
|
643
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
584
644
|
).saveAsTable(
|
|
585
645
|
table_name=snowpark_table_name,
|
|
586
646
|
mode="append",
|
|
@@ -588,31 +648,38 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
588
648
|
)
|
|
589
649
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
590
650
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
651
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
652
|
+
snowpark_table_name, session
|
|
653
|
+
)
|
|
654
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
655
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
656
|
+
"NORMAL",
|
|
657
|
+
"TABLE",
|
|
658
|
+
):
|
|
659
|
+
raise AnalysisException(
|
|
660
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
661
|
+
)
|
|
598
662
|
else:
|
|
599
663
|
raise AnalysisException(
|
|
600
664
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
601
665
|
)
|
|
602
666
|
_validate_schema_and_get_writer(
|
|
603
|
-
input_df, "truncate", snowpark_table_name
|
|
667
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
604
668
|
).saveAsTable(
|
|
605
669
|
table_name=snowpark_table_name,
|
|
606
670
|
mode="truncate",
|
|
607
671
|
column_order=_column_order_for_write,
|
|
608
672
|
)
|
|
609
673
|
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
610
|
-
|
|
674
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
675
|
+
snowpark_table_name, session
|
|
676
|
+
)
|
|
677
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
611
678
|
raise AnalysisException(
|
|
612
679
|
f"Table {snowpark_table_name} does not exist"
|
|
613
680
|
)
|
|
614
681
|
_validate_schema_and_get_writer(
|
|
615
|
-
input_df, "replace", snowpark_table_name
|
|
682
|
+
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
616
683
|
).saveAsTable(
|
|
617
684
|
table_name=snowpark_table_name,
|
|
618
685
|
mode="overwrite",
|
|
@@ -632,8 +699,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
632
699
|
)
|
|
633
700
|
|
|
634
701
|
|
|
702
|
+
def _get_table_schema_or_error(
|
|
703
|
+
snowpark_table_name: str, snowpark_session: snowpark.Session
|
|
704
|
+
) -> DataType | SnowparkSQLException:
|
|
705
|
+
try:
|
|
706
|
+
return snowpark_session.table(snowpark_table_name).schema
|
|
707
|
+
except SnowparkSQLException as e:
|
|
708
|
+
return e
|
|
709
|
+
|
|
710
|
+
|
|
635
711
|
def _validate_schema_and_get_writer(
|
|
636
|
-
input_df: snowpark.DataFrame,
|
|
712
|
+
input_df: snowpark.DataFrame,
|
|
713
|
+
write_mode: str,
|
|
714
|
+
snowpark_table_name: str,
|
|
715
|
+
table_schema_or_error: DataType | SnowparkSQLException | None = None,
|
|
637
716
|
) -> snowpark.DataFrameWriter:
|
|
638
717
|
if write_mode is not None and write_mode.lower() in (
|
|
639
718
|
"replace",
|
|
@@ -642,16 +721,26 @@ def _validate_schema_and_get_writer(
|
|
|
642
721
|
return input_df.write
|
|
643
722
|
|
|
644
723
|
table_schema = None
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
724
|
+
if table_schema_or_error is not None:
|
|
725
|
+
if isinstance(table_schema_or_error, SnowparkSQLException):
|
|
726
|
+
msg = table_schema_or_error.message
|
|
727
|
+
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
728
|
+
pass
|
|
729
|
+
else:
|
|
730
|
+
raise table_schema_or_error
|
|
731
|
+
elif isinstance(table_schema_or_error, DataType):
|
|
732
|
+
table_schema = table_schema_or_error
|
|
733
|
+
else:
|
|
734
|
+
try:
|
|
735
|
+
table_schema = (
|
|
736
|
+
get_or_create_snowpark_session().table(snowpark_table_name).schema
|
|
737
|
+
)
|
|
738
|
+
except SnowparkSQLException as e:
|
|
739
|
+
msg = e.message
|
|
740
|
+
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
741
|
+
pass
|
|
742
|
+
else:
|
|
743
|
+
raise e
|
|
655
744
|
|
|
656
745
|
if table_schema is None:
|
|
657
746
|
# If table does not exist, we can skip the schema validation
|
|
@@ -904,31 +993,3 @@ def _truncate_directory(directory_path: Path) -> None:
|
|
|
904
993
|
file.unlink()
|
|
905
994
|
elif file.is_dir():
|
|
906
995
|
shutil.rmtree(file)
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
def check_snowflake_table_existence(
|
|
910
|
-
snowpark_table_name: str,
|
|
911
|
-
snowpark_session: snowpark.Session,
|
|
912
|
-
):
|
|
913
|
-
try:
|
|
914
|
-
snowpark_session.sql(f"SELECT 1 FROM {snowpark_table_name} LIMIT 1").collect()
|
|
915
|
-
return True
|
|
916
|
-
except Exception:
|
|
917
|
-
return False
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
921
|
-
# def check_table_type(
|
|
922
|
-
# snowpark_table_name: str,
|
|
923
|
-
# snowpark_session: snowpark.Session,
|
|
924
|
-
# ) -> str:
|
|
925
|
-
# # currently we only support iceberg table and FDN table
|
|
926
|
-
# metadata = snowpark_session.sql(
|
|
927
|
-
# f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
|
|
928
|
-
# ).collect()
|
|
929
|
-
# if metadata is None or len(metadata) == 0:
|
|
930
|
-
# raise AnalysisException(f"Table {snowpark_table_name} does not exist")
|
|
931
|
-
# metadata = metadata[0]
|
|
932
|
-
# if metadata.as_dict().get("is_iceberg") == "Y":
|
|
933
|
-
# return "ICEBERG"
|
|
934
|
-
# return "TABLE"
|
|
@@ -12,6 +12,7 @@ _resources_initialized = threading.Event()
|
|
|
12
12
|
_initializer_lock = threading.Lock()
|
|
13
13
|
SPARK_VERSION = "3.5.6"
|
|
14
14
|
RESOURCE_PATH = "/snowflake/snowpark_connect/resources"
|
|
15
|
+
_upload_jars = True # Flag to control whether to upload jars. Required for Scala UDFs.
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def initialize_resources() -> None:
|
|
@@ -57,10 +58,8 @@ def initialize_resources() -> None:
|
|
|
57
58
|
f"spark-sql_2.12-{SPARK_VERSION}.jar",
|
|
58
59
|
f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
|
|
59
60
|
f"spark-common-utils_2.12-{SPARK_VERSION}.jar",
|
|
61
|
+
"sas-scala-udf_2.12-0.1.0.jar",
|
|
60
62
|
"json4s-ast_2.12-3.7.0-M11.jar",
|
|
61
|
-
"json4s-native_2.12-3.7.0-M11.jar",
|
|
62
|
-
"json4s-core_2.12-3.7.0-M11.jar",
|
|
63
|
-
"paranamer-2.8.3.jar",
|
|
64
63
|
]
|
|
65
64
|
|
|
66
65
|
for jar in jar_files:
|
|
@@ -80,9 +79,11 @@ def initialize_resources() -> None:
|
|
|
80
79
|
("Initialize Session Stage", initialize_session_stage), # Takes about 0.3s
|
|
81
80
|
("Initialize Session Catalog", initialize_catalog), # Takes about 1.2s
|
|
82
81
|
("Snowflake Connection Warm Up", warm_up_sf_connection), # Takes about 1s
|
|
83
|
-
("Upload Scala UDF Jars", upload_scala_udf_jars),
|
|
84
82
|
]
|
|
85
83
|
|
|
84
|
+
if _upload_jars:
|
|
85
|
+
resources.append(("Upload Scala UDF Jars", upload_scala_udf_jars))
|
|
86
|
+
|
|
86
87
|
for name, resource_func in resources:
|
|
87
88
|
resource_start = time.time()
|
|
88
89
|
try:
|
|
@@ -113,4 +114,18 @@ def initialize_resources_async() -> threading.Thread:
|
|
|
113
114
|
|
|
114
115
|
def wait_for_resource_initialization() -> None:
|
|
115
116
|
with _initializer_lock:
|
|
116
|
-
_resource_initializer.join()
|
|
117
|
+
_resource_initializer.join(timeout=300) # wait at most 300 seconds
|
|
118
|
+
if _resource_initializer.is_alive():
|
|
119
|
+
logger.error(
|
|
120
|
+
"Resource initialization failed - initializer thread has been running for over 300 seconds."
|
|
121
|
+
)
|
|
122
|
+
raise RuntimeError(
|
|
123
|
+
"Resource initialization failed - initializer thread has been running for over 300 seconds."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def set_upload_jars(upload: bool) -> None:
|
|
128
|
+
"""Set whether to upload jars required for Scala UDFs. This should be set to False if Scala UDFs
|
|
129
|
+
are not used, to avoid the overhead of uploading jars."""
|
|
130
|
+
global _upload_jars
|
|
131
|
+
_upload_jars = upload
|
|
@@ -725,30 +725,33 @@ def _serve(
|
|
|
725
725
|
# No need to start grpc server in TCM
|
|
726
726
|
return
|
|
727
727
|
|
|
728
|
+
grpc_max_msg_size = get_int_from_env(
|
|
729
|
+
"SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
|
|
730
|
+
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
|
|
731
|
+
)
|
|
732
|
+
grpc_max_metadata_size = get_int_from_env(
|
|
733
|
+
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
734
|
+
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
735
|
+
)
|
|
728
736
|
server_options = [
|
|
729
737
|
(
|
|
730
738
|
"grpc.max_receive_message_length",
|
|
731
|
-
|
|
732
|
-
"SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
|
|
733
|
-
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
|
|
734
|
-
),
|
|
739
|
+
grpc_max_msg_size,
|
|
735
740
|
),
|
|
736
741
|
(
|
|
737
742
|
"grpc.max_metadata_size",
|
|
738
|
-
|
|
739
|
-
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
740
|
-
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
741
|
-
),
|
|
743
|
+
grpc_max_metadata_size,
|
|
742
744
|
),
|
|
743
745
|
(
|
|
744
746
|
"grpc.absolute_max_metadata_size",
|
|
745
|
-
|
|
746
|
-
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
747
|
-
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
748
|
-
)
|
|
749
|
-
* 2,
|
|
747
|
+
grpc_max_metadata_size * 2,
|
|
750
748
|
),
|
|
751
749
|
]
|
|
750
|
+
|
|
751
|
+
from pyspark.sql.connect.client import ChannelBuilder
|
|
752
|
+
|
|
753
|
+
ChannelBuilder.MAX_MESSAGE_LENGTH = grpc_max_msg_size
|
|
754
|
+
|
|
752
755
|
server = grpc.server(
|
|
753
756
|
futures.ThreadPoolExecutor(max_workers=10), options=server_options
|
|
754
757
|
)
|
|
@@ -1053,10 +1056,6 @@ def start_session(
|
|
|
1053
1056
|
global _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE
|
|
1054
1057
|
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
|
|
1055
1058
|
|
|
1056
|
-
from pyspark.sql.connect.client import ChannelBuilder
|
|
1057
|
-
|
|
1058
|
-
ChannelBuilder.MAX_MESSAGE_LENGTH = max_grpc_message_size
|
|
1059
|
-
|
|
1060
1059
|
if os.environ.get("SPARK_ENV_LOADED"):
|
|
1061
1060
|
raise RuntimeError(
|
|
1062
1061
|
"Snowpark Connect cannot be run inside of a Spark environment"
|
|
@@ -52,6 +52,10 @@ class SynchronizedDict(Mapping[K, V]):
|
|
|
52
52
|
with self._lock.writer():
|
|
53
53
|
self._dict[key] = value
|
|
54
54
|
|
|
55
|
+
def __delitem__(self, key: K) -> None:
|
|
56
|
+
with self._lock.writer():
|
|
57
|
+
del self._dict[key]
|
|
58
|
+
|
|
55
59
|
def __contains__(self, key: K) -> bool:
|
|
56
60
|
with self._lock.reader():
|
|
57
61
|
return key in self._dict
|