snowpark-connect 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +3 -93
- snowflake/snowpark_connect/config.py +99 -4
- snowflake/snowpark_connect/dataframe_container.py +0 -6
- snowflake/snowpark_connect/expression/map_expression.py +31 -1
- snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +22 -26
- snowflake/snowpark_connect/expression/map_unresolved_function.py +28 -10
- snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/relation/map_extension.py +7 -1
- snowflake/snowpark_connect/relation/map_join.py +62 -258
- snowflake/snowpark_connect/relation/map_map_partitions.py +36 -77
- snowflake/snowpark_connect/relation/map_relation.py +8 -2
- snowflake/snowpark_connect/relation/map_show_string.py +2 -0
- snowflake/snowpark_connect/relation/map_sql.py +413 -15
- snowflake/snowpark_connect/relation/write/map_write.py +195 -114
- snowflake/snowpark_connect/resources_initializer.py +20 -5
- snowflake/snowpark_connect/server.py +20 -18
- snowflake/snowpark_connect/utils/artifacts.py +4 -5
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/context.py +41 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
- snowflake/snowpark_connect/utils/identifiers.py +120 -0
- snowflake/snowpark_connect/utils/io_utils.py +21 -1
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +86 -2
- snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
- snowflake/snowpark_connect/utils/session.py +16 -26
- snowflake/snowpark_connect/utils/telemetry.py +53 -0
- snowflake/snowpark_connect/utils/udf_utils.py +66 -103
- snowflake/snowpark_connect/utils/udtf_helper.py +17 -7
- snowflake/snowpark_connect/version.py +2 -3
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +41 -42
- snowflake/snowpark_connect/hidden_column.py +0 -39
- {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
|
@@ -50,7 +50,9 @@ from snowflake.snowpark_connect.utils.identifiers import (
|
|
|
50
50
|
spark_to_sf_single_id,
|
|
51
51
|
split_fully_qualified_spark_name,
|
|
52
52
|
)
|
|
53
|
+
from snowflake.snowpark_connect.utils.io_utils import get_table_type
|
|
53
54
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
55
|
+
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
54
56
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
55
57
|
SnowparkConnectNotImplementedError,
|
|
56
58
|
telemetry,
|
|
@@ -160,6 +162,29 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
160
162
|
compression_option = write_op.options.get("compression", "none")
|
|
161
163
|
|
|
162
164
|
# Generate Spark-compatible filename or prefix
|
|
165
|
+
# we need a random prefix to support "append" mode
|
|
166
|
+
# otherwise copy into with overwrite=False will fail if the file already exists
|
|
167
|
+
overwrite = (
|
|
168
|
+
write_op.mode
|
|
169
|
+
== commands_proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if overwrite:
|
|
173
|
+
try:
|
|
174
|
+
path_after_stage = (
|
|
175
|
+
write_path.split("/", 1)[1] if "/" in write_path else ""
|
|
176
|
+
)
|
|
177
|
+
if not path_after_stage or path_after_stage == "/":
|
|
178
|
+
logger.warning(
|
|
179
|
+
f"Skipping REMOVE for root path {write_path} - too broad scope"
|
|
180
|
+
)
|
|
181
|
+
else:
|
|
182
|
+
remove_command = f"REMOVE {write_path}/"
|
|
183
|
+
session.sql(remove_command).collect()
|
|
184
|
+
logger.info(f"Successfully cleared directory: {write_path}")
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.warning(f"Could not clear directory {write_path}: {e}")
|
|
187
|
+
|
|
163
188
|
if should_write_to_single_file:
|
|
164
189
|
# Single file: generate complete filename with extension
|
|
165
190
|
spark_filename = generate_spark_compatible_filename(
|
|
@@ -178,10 +203,6 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
178
203
|
format_ext="", # No extension for prefix
|
|
179
204
|
)
|
|
180
205
|
temp_file_prefix_on_stage = f"{write_path}/{spark_filename_prefix}"
|
|
181
|
-
overwrite = (
|
|
182
|
-
write_op.mode
|
|
183
|
-
== commands_proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
|
|
184
|
-
)
|
|
185
206
|
|
|
186
207
|
default_compression = "NONE" if write_op.source != "parquet" else "snappy"
|
|
187
208
|
compression = write_op.options.get(
|
|
@@ -291,7 +312,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
291
312
|
|
|
292
313
|
match write_mode:
|
|
293
314
|
case None | "error" | "errorifexists":
|
|
294
|
-
|
|
315
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
316
|
+
snowpark_table_name, session
|
|
317
|
+
)
|
|
318
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
295
319
|
raise AnalysisException(
|
|
296
320
|
f"Table {snowpark_table_name} already exists"
|
|
297
321
|
)
|
|
@@ -302,29 +326,45 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
302
326
|
snowpark_session=session,
|
|
303
327
|
)
|
|
304
328
|
_validate_schema_and_get_writer(
|
|
305
|
-
input_df, "append", snowpark_table_name
|
|
329
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
306
330
|
).saveAsTable(
|
|
307
331
|
table_name=snowpark_table_name,
|
|
308
332
|
mode="append",
|
|
309
333
|
column_order=_column_order_for_write,
|
|
310
334
|
)
|
|
311
335
|
case "append":
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
336
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
337
|
+
snowpark_table_name, session
|
|
338
|
+
)
|
|
339
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
340
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
341
|
+
"ICEBERG",
|
|
342
|
+
"TABLE",
|
|
343
|
+
):
|
|
344
|
+
raise AnalysisException(
|
|
345
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
346
|
+
)
|
|
347
|
+
else:
|
|
348
|
+
create_iceberg_table(
|
|
349
|
+
snowpark_table_name=snowpark_table_name,
|
|
350
|
+
location=write_op.options.get("location", None),
|
|
351
|
+
schema=input_df.schema,
|
|
352
|
+
snowpark_session=session,
|
|
353
|
+
)
|
|
317
354
|
_validate_schema_and_get_writer(
|
|
318
|
-
input_df, "append", snowpark_table_name
|
|
355
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
319
356
|
).saveAsTable(
|
|
320
357
|
table_name=snowpark_table_name,
|
|
321
358
|
mode="append",
|
|
322
359
|
column_order=_column_order_for_write,
|
|
323
360
|
)
|
|
324
361
|
case "ignore":
|
|
325
|
-
|
|
362
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
326
363
|
snowpark_table_name, session
|
|
327
|
-
)
|
|
364
|
+
)
|
|
365
|
+
if not isinstance(
|
|
366
|
+
table_schema_or_error, DataType
|
|
367
|
+
): # Table not exists
|
|
328
368
|
create_iceberg_table(
|
|
329
369
|
snowpark_table_name=snowpark_table_name,
|
|
330
370
|
location=write_op.options.get("location", None),
|
|
@@ -339,13 +379,17 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
339
379
|
column_order=_column_order_for_write,
|
|
340
380
|
)
|
|
341
381
|
case "overwrite":
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
382
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
383
|
+
snowpark_table_name, session
|
|
384
|
+
)
|
|
385
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
386
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
387
|
+
"ICEBERG",
|
|
388
|
+
"TABLE",
|
|
389
|
+
):
|
|
390
|
+
raise AnalysisException(
|
|
391
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
392
|
+
)
|
|
349
393
|
else:
|
|
350
394
|
create_iceberg_table(
|
|
351
395
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -354,7 +398,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
354
398
|
snowpark_session=session,
|
|
355
399
|
)
|
|
356
400
|
_validate_schema_and_get_writer(
|
|
357
|
-
input_df, "truncate", snowpark_table_name
|
|
401
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
358
402
|
).saveAsTable(
|
|
359
403
|
table_name=snowpark_table_name,
|
|
360
404
|
mode="truncate",
|
|
@@ -373,33 +417,49 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
373
417
|
):
|
|
374
418
|
match write_mode:
|
|
375
419
|
case "overwrite":
|
|
376
|
-
|
|
420
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
377
421
|
snowpark_table_name, session
|
|
378
|
-
)
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
422
|
+
)
|
|
423
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
424
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
425
|
+
"NORMAL",
|
|
426
|
+
"TABLE",
|
|
427
|
+
):
|
|
428
|
+
raise AnalysisException(
|
|
429
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
430
|
+
)
|
|
387
431
|
write_mode = "truncate"
|
|
388
432
|
_validate_schema_and_get_writer(
|
|
389
|
-
input_df,
|
|
433
|
+
input_df,
|
|
434
|
+
write_mode,
|
|
435
|
+
snowpark_table_name,
|
|
436
|
+
table_schema_or_error,
|
|
390
437
|
).saveAsTable(
|
|
391
438
|
table_name=snowpark_table_name,
|
|
392
439
|
mode=write_mode,
|
|
393
440
|
column_order=_column_order_for_write,
|
|
394
441
|
)
|
|
395
442
|
case "append":
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
443
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
444
|
+
snowpark_table_name, session
|
|
445
|
+
)
|
|
446
|
+
if isinstance(
|
|
447
|
+
table_schema_or_error, DataType
|
|
448
|
+
) and get_table_type( # Table exists
|
|
449
|
+
snowpark_table_name, session
|
|
450
|
+
) not in (
|
|
451
|
+
"NORMAL",
|
|
452
|
+
"TABLE",
|
|
453
|
+
):
|
|
454
|
+
raise AnalysisException(
|
|
455
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
456
|
+
)
|
|
457
|
+
|
|
401
458
|
_validate_schema_and_get_writer(
|
|
402
|
-
input_df,
|
|
459
|
+
input_df,
|
|
460
|
+
write_mode,
|
|
461
|
+
snowpark_table_name,
|
|
462
|
+
table_schema_or_error,
|
|
403
463
|
).saveAsTable(
|
|
404
464
|
table_name=snowpark_table_name,
|
|
405
465
|
mode=write_mode,
|
|
@@ -446,7 +506,10 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
446
506
|
if write_op.provider.lower() == "iceberg":
|
|
447
507
|
match write_op.mode:
|
|
448
508
|
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
449
|
-
|
|
509
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
510
|
+
snowpark_table_name, session
|
|
511
|
+
)
|
|
512
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
450
513
|
raise AnalysisException(
|
|
451
514
|
f"Table {snowpark_table_name} already exists"
|
|
452
515
|
)
|
|
@@ -457,24 +520,29 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
457
520
|
snowpark_session=session,
|
|
458
521
|
)
|
|
459
522
|
_validate_schema_and_get_writer(
|
|
460
|
-
input_df, "append", snowpark_table_name
|
|
523
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
461
524
|
).saveAsTable(
|
|
462
525
|
table_name=snowpark_table_name,
|
|
463
526
|
mode="append",
|
|
464
527
|
column_order=_column_order_for_write,
|
|
465
528
|
)
|
|
466
529
|
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
467
|
-
|
|
530
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
531
|
+
snowpark_table_name, session
|
|
532
|
+
)
|
|
533
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
468
534
|
raise AnalysisException(
|
|
469
535
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
470
536
|
)
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
537
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
538
|
+
"ICEBERG",
|
|
539
|
+
"TABLE",
|
|
540
|
+
):
|
|
541
|
+
raise AnalysisException(
|
|
542
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
543
|
+
)
|
|
476
544
|
_validate_schema_and_get_writer(
|
|
477
|
-
input_df, "append", snowpark_table_name
|
|
545
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
478
546
|
).saveAsTable(
|
|
479
547
|
table_name=snowpark_table_name,
|
|
480
548
|
mode="append",
|
|
@@ -482,26 +550,33 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
482
550
|
)
|
|
483
551
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
484
552
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
553
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
554
|
+
snowpark_table_name, session
|
|
555
|
+
)
|
|
556
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
557
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
558
|
+
"ICEBERG",
|
|
559
|
+
"TABLE",
|
|
560
|
+
):
|
|
561
|
+
raise AnalysisException(
|
|
562
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
563
|
+
)
|
|
492
564
|
else:
|
|
493
565
|
raise AnalysisException(
|
|
494
566
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
495
567
|
)
|
|
496
568
|
_validate_schema_and_get_writer(
|
|
497
|
-
input_df, "truncate", snowpark_table_name
|
|
569
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
498
570
|
).saveAsTable(
|
|
499
571
|
table_name=snowpark_table_name,
|
|
500
572
|
mode="truncate",
|
|
501
573
|
column_order=_column_order_for_write,
|
|
502
574
|
)
|
|
503
575
|
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
504
|
-
|
|
576
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
577
|
+
snowpark_table_name, session
|
|
578
|
+
)
|
|
579
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
505
580
|
create_iceberg_table(
|
|
506
581
|
snowpark_table_name=snowpark_table_name,
|
|
507
582
|
location=write_op.table_properties.get("location"),
|
|
@@ -514,7 +589,7 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
514
589
|
f"Table {snowpark_table_name} does not exist"
|
|
515
590
|
)
|
|
516
591
|
_validate_schema_and_get_writer(
|
|
517
|
-
input_df, "replace", snowpark_table_name
|
|
592
|
+
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
518
593
|
).saveAsTable(
|
|
519
594
|
table_name=snowpark_table_name,
|
|
520
595
|
mode="append",
|
|
@@ -550,17 +625,22 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
550
625
|
column_order=_column_order_for_write,
|
|
551
626
|
)
|
|
552
627
|
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
553
|
-
|
|
628
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
629
|
+
snowpark_table_name, session
|
|
630
|
+
)
|
|
631
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
554
632
|
raise AnalysisException(
|
|
555
633
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
556
634
|
)
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
635
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
636
|
+
"NORMAL",
|
|
637
|
+
"TABLE",
|
|
638
|
+
):
|
|
639
|
+
raise AnalysisException(
|
|
640
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
641
|
+
)
|
|
562
642
|
_validate_schema_and_get_writer(
|
|
563
|
-
input_df, "append", snowpark_table_name
|
|
643
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
564
644
|
).saveAsTable(
|
|
565
645
|
table_name=snowpark_table_name,
|
|
566
646
|
mode="append",
|
|
@@ -568,31 +648,38 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
568
648
|
)
|
|
569
649
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
570
650
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
651
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
652
|
+
snowpark_table_name, session
|
|
653
|
+
)
|
|
654
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
655
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
656
|
+
"NORMAL",
|
|
657
|
+
"TABLE",
|
|
658
|
+
):
|
|
659
|
+
raise AnalysisException(
|
|
660
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
661
|
+
)
|
|
578
662
|
else:
|
|
579
663
|
raise AnalysisException(
|
|
580
664
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
581
665
|
)
|
|
582
666
|
_validate_schema_and_get_writer(
|
|
583
|
-
input_df, "truncate", snowpark_table_name
|
|
667
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
584
668
|
).saveAsTable(
|
|
585
669
|
table_name=snowpark_table_name,
|
|
586
670
|
mode="truncate",
|
|
587
671
|
column_order=_column_order_for_write,
|
|
588
672
|
)
|
|
589
673
|
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
590
|
-
|
|
674
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
675
|
+
snowpark_table_name, session
|
|
676
|
+
)
|
|
677
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
591
678
|
raise AnalysisException(
|
|
592
679
|
f"Table {snowpark_table_name} does not exist"
|
|
593
680
|
)
|
|
594
681
|
_validate_schema_and_get_writer(
|
|
595
|
-
input_df, "replace", snowpark_table_name
|
|
682
|
+
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
596
683
|
).saveAsTable(
|
|
597
684
|
table_name=snowpark_table_name,
|
|
598
685
|
mode="overwrite",
|
|
@@ -612,8 +699,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
612
699
|
)
|
|
613
700
|
|
|
614
701
|
|
|
702
|
+
def _get_table_schema_or_error(
|
|
703
|
+
snowpark_table_name: str, snowpark_session: snowpark.Session
|
|
704
|
+
) -> DataType | SnowparkSQLException:
|
|
705
|
+
try:
|
|
706
|
+
return snowpark_session.table(snowpark_table_name).schema
|
|
707
|
+
except SnowparkSQLException as e:
|
|
708
|
+
return e
|
|
709
|
+
|
|
710
|
+
|
|
615
711
|
def _validate_schema_and_get_writer(
|
|
616
|
-
input_df: snowpark.DataFrame,
|
|
712
|
+
input_df: snowpark.DataFrame,
|
|
713
|
+
write_mode: str,
|
|
714
|
+
snowpark_table_name: str,
|
|
715
|
+
table_schema_or_error: DataType | SnowparkSQLException | None = None,
|
|
617
716
|
) -> snowpark.DataFrameWriter:
|
|
618
717
|
if write_mode is not None and write_mode.lower() in (
|
|
619
718
|
"replace",
|
|
@@ -622,16 +721,26 @@ def _validate_schema_and_get_writer(
|
|
|
622
721
|
return input_df.write
|
|
623
722
|
|
|
624
723
|
table_schema = None
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
724
|
+
if table_schema_or_error is not None:
|
|
725
|
+
if isinstance(table_schema_or_error, SnowparkSQLException):
|
|
726
|
+
msg = table_schema_or_error.message
|
|
727
|
+
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
728
|
+
pass
|
|
729
|
+
else:
|
|
730
|
+
raise table_schema_or_error
|
|
731
|
+
elif isinstance(table_schema_or_error, DataType):
|
|
732
|
+
table_schema = table_schema_or_error
|
|
733
|
+
else:
|
|
734
|
+
try:
|
|
735
|
+
table_schema = (
|
|
736
|
+
get_or_create_snowpark_session().table(snowpark_table_name).schema
|
|
737
|
+
)
|
|
738
|
+
except SnowparkSQLException as e:
|
|
739
|
+
msg = e.message
|
|
740
|
+
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
741
|
+
pass
|
|
742
|
+
else:
|
|
743
|
+
raise e
|
|
635
744
|
|
|
636
745
|
if table_schema is None:
|
|
637
746
|
# If table does not exist, we can skip the schema validation
|
|
@@ -884,31 +993,3 @@ def _truncate_directory(directory_path: Path) -> None:
|
|
|
884
993
|
file.unlink()
|
|
885
994
|
elif file.is_dir():
|
|
886
995
|
shutil.rmtree(file)
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
def check_snowflake_table_existence(
|
|
890
|
-
snowpark_table_name: str,
|
|
891
|
-
snowpark_session: snowpark.Session,
|
|
892
|
-
):
|
|
893
|
-
try:
|
|
894
|
-
snowpark_session.sql(f"SELECT 1 FROM {snowpark_table_name} LIMIT 1").collect()
|
|
895
|
-
return True
|
|
896
|
-
except Exception:
|
|
897
|
-
return False
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
901
|
-
# def check_table_type(
|
|
902
|
-
# snowpark_table_name: str,
|
|
903
|
-
# snowpark_session: snowpark.Session,
|
|
904
|
-
# ) -> str:
|
|
905
|
-
# # currently we only support iceberg table and FDN table
|
|
906
|
-
# metadata = snowpark_session.sql(
|
|
907
|
-
# f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
|
|
908
|
-
# ).collect()
|
|
909
|
-
# if metadata is None or len(metadata) == 0:
|
|
910
|
-
# raise AnalysisException(f"Table {snowpark_table_name} does not exist")
|
|
911
|
-
# metadata = metadata[0]
|
|
912
|
-
# if metadata.as_dict().get("is_iceberg") == "Y":
|
|
913
|
-
# return "ICEBERG"
|
|
914
|
-
# return "TABLE"
|
|
@@ -12,6 +12,7 @@ _resources_initialized = threading.Event()
|
|
|
12
12
|
_initializer_lock = threading.Lock()
|
|
13
13
|
SPARK_VERSION = "3.5.6"
|
|
14
14
|
RESOURCE_PATH = "/snowflake/snowpark_connect/resources"
|
|
15
|
+
_upload_jars = True # Flag to control whether to upload jars. Required for Scala UDFs.
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def initialize_resources() -> None:
|
|
@@ -57,10 +58,8 @@ def initialize_resources() -> None:
|
|
|
57
58
|
f"spark-sql_2.12-{SPARK_VERSION}.jar",
|
|
58
59
|
f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
|
|
59
60
|
f"spark-common-utils_2.12-{SPARK_VERSION}.jar",
|
|
61
|
+
"sas-scala-udf_2.12-0.1.0.jar",
|
|
60
62
|
"json4s-ast_2.12-3.7.0-M11.jar",
|
|
61
|
-
"json4s-native_2.12-3.7.0-M11.jar",
|
|
62
|
-
"json4s-core_2.12-3.7.0-M11.jar",
|
|
63
|
-
"paranamer-2.8.3.jar",
|
|
64
63
|
]
|
|
65
64
|
|
|
66
65
|
for jar in jar_files:
|
|
@@ -80,9 +79,11 @@ def initialize_resources() -> None:
|
|
|
80
79
|
("Initialize Session Stage", initialize_session_stage), # Takes about 0.3s
|
|
81
80
|
("Initialize Session Catalog", initialize_catalog), # Takes about 1.2s
|
|
82
81
|
("Snowflake Connection Warm Up", warm_up_sf_connection), # Takes about 1s
|
|
83
|
-
("Upload Scala UDF Jars", upload_scala_udf_jars),
|
|
84
82
|
]
|
|
85
83
|
|
|
84
|
+
if _upload_jars:
|
|
85
|
+
resources.append(("Upload Scala UDF Jars", upload_scala_udf_jars))
|
|
86
|
+
|
|
86
87
|
for name, resource_func in resources:
|
|
87
88
|
resource_start = time.time()
|
|
88
89
|
try:
|
|
@@ -113,4 +114,18 @@ def initialize_resources_async() -> threading.Thread:
|
|
|
113
114
|
|
|
114
115
|
def wait_for_resource_initialization() -> None:
|
|
115
116
|
with _initializer_lock:
|
|
116
|
-
_resource_initializer.join()
|
|
117
|
+
_resource_initializer.join(timeout=300) # wait at most 300 seconds
|
|
118
|
+
if _resource_initializer.is_alive():
|
|
119
|
+
logger.error(
|
|
120
|
+
"Resource initialization failed - initializer thread has been running for over 300 seconds."
|
|
121
|
+
)
|
|
122
|
+
raise RuntimeError(
|
|
123
|
+
"Resource initialization failed - initializer thread has been running for over 300 seconds."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def set_upload_jars(upload: bool) -> None:
|
|
128
|
+
"""Set whether to upload jars required for Scala UDFs. This should be set to False if Scala UDFs
|
|
129
|
+
are not used, to avoid the overhead of uploading jars."""
|
|
130
|
+
global _upload_jars
|
|
131
|
+
_upload_jars = upload
|
|
@@ -531,7 +531,10 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
531
531
|
if name.endswith(".class"):
|
|
532
532
|
# name is <dir>/<package>/<class_name>
|
|
533
533
|
# we don't need the dir name, but require the package, so only remove dir
|
|
534
|
-
|
|
534
|
+
if os.name != "nt":
|
|
535
|
+
class_files[name.split("/", 1)[-1]] = filepath
|
|
536
|
+
else:
|
|
537
|
+
class_files[name.split("\\", 1)[-1]] = filepath
|
|
535
538
|
continue
|
|
536
539
|
session.file.put(
|
|
537
540
|
filepath,
|
|
@@ -722,30 +725,33 @@ def _serve(
|
|
|
722
725
|
# No need to start grpc server in TCM
|
|
723
726
|
return
|
|
724
727
|
|
|
728
|
+
grpc_max_msg_size = get_int_from_env(
|
|
729
|
+
"SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
|
|
730
|
+
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
|
|
731
|
+
)
|
|
732
|
+
grpc_max_metadata_size = get_int_from_env(
|
|
733
|
+
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
734
|
+
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
735
|
+
)
|
|
725
736
|
server_options = [
|
|
726
737
|
(
|
|
727
738
|
"grpc.max_receive_message_length",
|
|
728
|
-
|
|
729
|
-
"SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
|
|
730
|
-
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
|
|
731
|
-
),
|
|
739
|
+
grpc_max_msg_size,
|
|
732
740
|
),
|
|
733
741
|
(
|
|
734
742
|
"grpc.max_metadata_size",
|
|
735
|
-
|
|
736
|
-
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
737
|
-
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
738
|
-
),
|
|
743
|
+
grpc_max_metadata_size,
|
|
739
744
|
),
|
|
740
745
|
(
|
|
741
746
|
"grpc.absolute_max_metadata_size",
|
|
742
|
-
|
|
743
|
-
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
744
|
-
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
745
|
-
)
|
|
746
|
-
* 2,
|
|
747
|
+
grpc_max_metadata_size * 2,
|
|
747
748
|
),
|
|
748
749
|
]
|
|
750
|
+
|
|
751
|
+
from pyspark.sql.connect.client import ChannelBuilder
|
|
752
|
+
|
|
753
|
+
ChannelBuilder.MAX_MESSAGE_LENGTH = grpc_max_msg_size
|
|
754
|
+
|
|
749
755
|
server = grpc.server(
|
|
750
756
|
futures.ThreadPoolExecutor(max_workers=10), options=server_options
|
|
751
757
|
)
|
|
@@ -1050,10 +1056,6 @@ def start_session(
|
|
|
1050
1056
|
global _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE
|
|
1051
1057
|
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
|
|
1052
1058
|
|
|
1053
|
-
from pyspark.sql.connect.client import ChannelBuilder
|
|
1054
|
-
|
|
1055
|
-
ChannelBuilder.MAX_MESSAGE_LENGTH = max_grpc_message_size
|
|
1056
|
-
|
|
1057
1059
|
if os.environ.get("SPARK_ENV_LOADED"):
|
|
1058
1060
|
raise RuntimeError(
|
|
1059
1061
|
"Snowpark Connect cannot be run inside of a Spark environment"
|
|
@@ -39,7 +39,7 @@ def write_temporary_artifact(
|
|
|
39
39
|
if os.name != "nt":
|
|
40
40
|
filepath = f"/tmp/sas-{session.session_id}/{name}"
|
|
41
41
|
else:
|
|
42
|
-
filepath = f"{tempfile.gettempdir()}
|
|
42
|
+
filepath = f"{tempfile.gettempdir()}\\sas-{session.session_id}\\{name}"
|
|
43
43
|
# The name comes to us as a path (e.g. cache/<name>), so we need to create
|
|
44
44
|
# the parent directory if it doesn't exist to avoid errors during writing.
|
|
45
45
|
pathlib.Path(filepath).parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -55,11 +55,10 @@ def write_class_files_to_stage(
|
|
|
55
55
|
) -> None:
|
|
56
56
|
if os.name != "nt":
|
|
57
57
|
filepath = f"/tmp/sas-{session.session_id}"
|
|
58
|
+
jar_name = f'{filepath}/{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
|
|
58
59
|
else:
|
|
59
|
-
filepath = f"{tempfile.gettempdir()}
|
|
60
|
-
|
|
61
|
-
f'{filepath}/{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
|
|
62
|
-
)
|
|
60
|
+
filepath = f"{tempfile.gettempdir()}\\sas-{session.session_id}"
|
|
61
|
+
jar_name = f'{filepath}\\{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
|
|
63
62
|
with zipfile.ZipFile(jar_name, "w", zipfile.ZIP_DEFLATED) as jar:
|
|
64
63
|
for name, path in files.items():
|
|
65
64
|
jar.write(path, name)
|
|
@@ -52,6 +52,10 @@ class SynchronizedDict(Mapping[K, V]):
|
|
|
52
52
|
with self._lock.writer():
|
|
53
53
|
self._dict[key] = value
|
|
54
54
|
|
|
55
|
+
def __delitem__(self, key: K) -> None:
|
|
56
|
+
with self._lock.writer():
|
|
57
|
+
del self._dict[key]
|
|
58
|
+
|
|
55
59
|
def __contains__(self, key: K) -> bool:
|
|
56
60
|
with self._lock.reader():
|
|
57
61
|
return key in self._dict
|