snowpark-connect 0.22.1__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/config.py +0 -11
- snowflake/snowpark_connect/error/error_utils.py +7 -0
- snowflake/snowpark_connect/error/exceptions.py +4 -0
- snowflake/snowpark_connect/expression/function_defaults.py +207 -0
- snowflake/snowpark_connect/expression/hybrid_column_map.py +192 -0
- snowflake/snowpark_connect/expression/literal.py +14 -12
- snowflake/snowpark_connect/expression/map_cast.py +20 -4
- snowflake/snowpark_connect/expression/map_expression.py +18 -2
- snowflake/snowpark_connect/expression/map_extension.py +12 -2
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +32 -5
- snowflake/snowpark_connect/expression/map_unresolved_function.py +69 -10
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +1281 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +202 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +8 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +4 -2
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +127 -21
- snowflake/snowpark_connect/relation/map_aggregate.py +57 -5
- snowflake/snowpark_connect/relation/map_column_ops.py +6 -5
- snowflake/snowpark_connect/relation/map_extension.py +65 -31
- snowflake/snowpark_connect/relation/map_local_relation.py +8 -1
- snowflake/snowpark_connect/relation/map_row_ops.py +2 -0
- snowflake/snowpark_connect/relation/map_sql.py +22 -5
- snowflake/snowpark_connect/relation/read/map_read.py +2 -1
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +8 -1
- snowflake/snowpark_connect/relation/read/reader_config.py +9 -0
- snowflake/snowpark_connect/relation/write/map_write.py +243 -68
- snowflake/snowpark_connect/server.py +25 -5
- snowflake/snowpark_connect/type_mapping.py +2 -2
- snowflake/snowpark_connect/utils/env_utils.py +55 -0
- snowflake/snowpark_connect/utils/session.py +21 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/spark_decoder.py +1 -1
- {snowpark_connect-0.22.1.dist-info → snowpark_connect-0.24.0.dist-info}/METADATA +2 -2
- {snowpark_connect-0.22.1.dist-info → snowpark_connect-0.24.0.dist-info}/RECORD +44 -39
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.22.1.data → snowpark_connect-0.24.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.22.1.data → snowpark_connect-0.24.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.22.1.data → snowpark_connect-0.24.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.22.1.dist-info → snowpark_connect-0.24.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.22.1.dist-info → snowpark_connect-0.24.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.22.1.dist-info → snowpark_connect-0.24.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.22.1.dist-info → snowpark_connect-0.24.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.22.1.dist-info → snowpark_connect-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -234,6 +234,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
234
234
|
column_order=_column_order_for_write,
|
|
235
235
|
)
|
|
236
236
|
case "append":
|
|
237
|
+
if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
238
|
+
raise AnalysisException(
|
|
239
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
240
|
+
)
|
|
237
241
|
_validate_schema_and_get_writer(
|
|
238
242
|
input_df, "append", snowpark_table_name
|
|
239
243
|
).saveAsTable(
|
|
@@ -260,7 +264,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
260
264
|
)
|
|
261
265
|
case "overwrite":
|
|
262
266
|
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
263
|
-
|
|
267
|
+
if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
268
|
+
raise AnalysisException(
|
|
269
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
270
|
+
)
|
|
264
271
|
else:
|
|
265
272
|
create_iceberg_table(
|
|
266
273
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -269,10 +276,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
269
276
|
snowpark_session=session,
|
|
270
277
|
)
|
|
271
278
|
_validate_schema_and_get_writer(
|
|
272
|
-
input_df, "
|
|
279
|
+
input_df, "truncate", snowpark_table_name
|
|
273
280
|
).saveAsTable(
|
|
274
281
|
table_name=snowpark_table_name,
|
|
275
|
-
mode="
|
|
282
|
+
mode="truncate",
|
|
276
283
|
column_order=_column_order_for_write,
|
|
277
284
|
)
|
|
278
285
|
case _:
|
|
@@ -286,13 +293,46 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
286
293
|
write_op.table.save_method
|
|
287
294
|
== commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
|
|
288
295
|
):
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
+
match write_mode:
|
|
297
|
+
case "overwrite":
|
|
298
|
+
if check_snowflake_table_existence(
|
|
299
|
+
snowpark_table_name, session
|
|
300
|
+
):
|
|
301
|
+
if (
|
|
302
|
+
check_table_type(snowpark_table_name, session)
|
|
303
|
+
!= "TABLE"
|
|
304
|
+
):
|
|
305
|
+
raise AnalysisException(
|
|
306
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
307
|
+
)
|
|
308
|
+
write_mode = "truncate"
|
|
309
|
+
_validate_schema_and_get_writer(
|
|
310
|
+
input_df, write_mode, snowpark_table_name
|
|
311
|
+
).saveAsTable(
|
|
312
|
+
table_name=snowpark_table_name,
|
|
313
|
+
mode=write_mode,
|
|
314
|
+
column_order=_column_order_for_write,
|
|
315
|
+
)
|
|
316
|
+
case "append":
|
|
317
|
+
if check_table_type(snowpark_table_name, session) != "TABLE":
|
|
318
|
+
raise AnalysisException(
|
|
319
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
320
|
+
)
|
|
321
|
+
_validate_schema_and_get_writer(
|
|
322
|
+
input_df, write_mode, snowpark_table_name
|
|
323
|
+
).saveAsTable(
|
|
324
|
+
table_name=snowpark_table_name,
|
|
325
|
+
mode=write_mode,
|
|
326
|
+
column_order=_column_order_for_write,
|
|
327
|
+
)
|
|
328
|
+
case _:
|
|
329
|
+
_validate_schema_and_get_writer(
|
|
330
|
+
input_df, write_mode, snowpark_table_name
|
|
331
|
+
).saveAsTable(
|
|
332
|
+
table_name=snowpark_table_name,
|
|
333
|
+
mode=write_mode,
|
|
334
|
+
column_order=_column_order_for_write,
|
|
335
|
+
)
|
|
296
336
|
elif (
|
|
297
337
|
write_op.table.save_method
|
|
298
338
|
== commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
|
|
@@ -312,21 +352,6 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
312
352
|
|
|
313
353
|
def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
314
354
|
write_op = request.plan.command.write_operation_v2
|
|
315
|
-
match write_op.mode:
|
|
316
|
-
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
317
|
-
write_mode = "append"
|
|
318
|
-
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
319
|
-
write_mode = "errorifexists"
|
|
320
|
-
case commands_proto.WriteOperationV2.MODE_OVERWRITE:
|
|
321
|
-
write_mode = "overwrite"
|
|
322
|
-
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
323
|
-
write_mode = "overwrite"
|
|
324
|
-
case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
|
|
325
|
-
write_mode = "overwrite"
|
|
326
|
-
case _:
|
|
327
|
-
raise SnowparkConnectNotImplementedError(
|
|
328
|
-
f"Write operation {write_op.mode} not implemented."
|
|
329
|
-
)
|
|
330
355
|
|
|
331
356
|
snowpark_table_name = _spark_to_snowflake(write_op.table_name)
|
|
332
357
|
result = map_relation(write_op.input)
|
|
@@ -338,55 +363,176 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
338
363
|
"Write operation V2 only support table writing now"
|
|
339
364
|
)
|
|
340
365
|
|
|
341
|
-
# For OVERWRITE and APPEND modes, check if table exists first - Spark requires table to exist for these operations
|
|
342
|
-
if write_op.mode in (
|
|
343
|
-
commands_proto.WriteOperationV2.MODE_OVERWRITE,
|
|
344
|
-
commands_proto.WriteOperationV2.MODE_APPEND,
|
|
345
|
-
):
|
|
346
|
-
if not check_snowflake_table_existence(snowpark_table_name, session):
|
|
347
|
-
raise AnalysisException(
|
|
348
|
-
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found. "
|
|
349
|
-
f"Verify the spelling and correctness of the schema and catalog.\n"
|
|
350
|
-
)
|
|
351
|
-
|
|
352
366
|
if write_op.provider.lower() == "iceberg":
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
367
|
+
match write_op.mode:
|
|
368
|
+
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
369
|
+
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
370
|
+
raise AnalysisException(
|
|
371
|
+
f"Table {snowpark_table_name} already exists"
|
|
372
|
+
)
|
|
373
|
+
create_iceberg_table(
|
|
374
|
+
snowpark_table_name=snowpark_table_name,
|
|
375
|
+
location=write_op.table_properties.get("location"),
|
|
376
|
+
schema=input_df.schema,
|
|
377
|
+
snowpark_session=session,
|
|
378
|
+
)
|
|
379
|
+
_validate_schema_and_get_writer(
|
|
380
|
+
input_df, "append", snowpark_table_name
|
|
381
|
+
).saveAsTable(
|
|
382
|
+
table_name=snowpark_table_name,
|
|
383
|
+
mode="append",
|
|
384
|
+
column_order=_column_order_for_write,
|
|
385
|
+
)
|
|
386
|
+
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
387
|
+
if not check_snowflake_table_existence(snowpark_table_name, session):
|
|
388
|
+
raise AnalysisException(
|
|
389
|
+
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
390
|
+
)
|
|
391
|
+
if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
392
|
+
raise AnalysisException(
|
|
393
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
394
|
+
)
|
|
395
|
+
_validate_schema_and_get_writer(
|
|
396
|
+
input_df, "append", snowpark_table_name
|
|
397
|
+
).saveAsTable(
|
|
398
|
+
table_name=snowpark_table_name,
|
|
399
|
+
mode="append",
|
|
400
|
+
column_order=_column_order_for_write,
|
|
401
|
+
)
|
|
402
|
+
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
403
|
+
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
404
|
+
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
405
|
+
if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
406
|
+
raise AnalysisException(
|
|
407
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
408
|
+
)
|
|
409
|
+
else:
|
|
410
|
+
raise AnalysisException(
|
|
411
|
+
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
412
|
+
)
|
|
413
|
+
_validate_schema_and_get_writer(
|
|
414
|
+
input_df, "truncate", snowpark_table_name
|
|
415
|
+
).saveAsTable(
|
|
416
|
+
table_name=snowpark_table_name,
|
|
417
|
+
mode="truncate",
|
|
418
|
+
column_order=_column_order_for_write,
|
|
419
|
+
)
|
|
420
|
+
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
421
|
+
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
422
|
+
create_iceberg_table(
|
|
423
|
+
snowpark_table_name=snowpark_table_name,
|
|
424
|
+
location=write_op.table_properties.get("location"),
|
|
425
|
+
schema=input_df.schema,
|
|
426
|
+
snowpark_session=session,
|
|
427
|
+
mode="replace",
|
|
428
|
+
)
|
|
429
|
+
else:
|
|
430
|
+
raise AnalysisException(
|
|
431
|
+
f"Table {snowpark_table_name} does not exist"
|
|
432
|
+
)
|
|
433
|
+
_validate_schema_and_get_writer(
|
|
434
|
+
input_df, "replace", snowpark_table_name
|
|
435
|
+
).saveAsTable(
|
|
436
|
+
table_name=snowpark_table_name,
|
|
437
|
+
mode="append",
|
|
438
|
+
column_order=_column_order_for_write,
|
|
439
|
+
)
|
|
440
|
+
case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
|
|
441
|
+
create_iceberg_table(
|
|
442
|
+
snowpark_table_name=snowpark_table_name,
|
|
443
|
+
location=write_op.table_properties.get("location"),
|
|
444
|
+
schema=input_df.schema,
|
|
445
|
+
snowpark_session=session,
|
|
446
|
+
mode="create_or_replace",
|
|
447
|
+
)
|
|
448
|
+
_validate_schema_and_get_writer(
|
|
449
|
+
input_df, "create_or_replace", snowpark_table_name
|
|
450
|
+
).saveAsTable(
|
|
451
|
+
table_name=snowpark_table_name,
|
|
452
|
+
mode="append",
|
|
453
|
+
column_order=_column_order_for_write,
|
|
454
|
+
)
|
|
455
|
+
case _:
|
|
456
|
+
raise SnowparkConnectNotImplementedError(
|
|
457
|
+
f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
|
|
458
|
+
)
|
|
376
459
|
else:
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
460
|
+
match write_op.mode:
|
|
461
|
+
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
462
|
+
_validate_schema_and_get_writer(
|
|
463
|
+
input_df, "errorifexists", snowpark_table_name
|
|
464
|
+
).saveAsTable(
|
|
465
|
+
table_name=snowpark_table_name,
|
|
466
|
+
mode="errorifexists",
|
|
467
|
+
column_order=_column_order_for_write,
|
|
468
|
+
)
|
|
469
|
+
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
470
|
+
if not check_snowflake_table_existence(snowpark_table_name, session):
|
|
471
|
+
raise AnalysisException(
|
|
472
|
+
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
473
|
+
)
|
|
474
|
+
if check_table_type(snowpark_table_name, session) != "TABLE":
|
|
475
|
+
raise AnalysisException(
|
|
476
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
477
|
+
)
|
|
478
|
+
_validate_schema_and_get_writer(
|
|
479
|
+
input_df, "append", snowpark_table_name
|
|
480
|
+
).saveAsTable(
|
|
481
|
+
table_name=snowpark_table_name,
|
|
482
|
+
mode="append",
|
|
483
|
+
column_order=_column_order_for_write,
|
|
484
|
+
)
|
|
485
|
+
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
486
|
+
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
487
|
+
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
488
|
+
if check_table_type(snowpark_table_name, session) != "TABLE":
|
|
489
|
+
raise AnalysisException(
|
|
490
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
491
|
+
)
|
|
492
|
+
else:
|
|
493
|
+
raise AnalysisException(
|
|
494
|
+
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
495
|
+
)
|
|
496
|
+
_validate_schema_and_get_writer(
|
|
497
|
+
input_df, "truncate", snowpark_table_name
|
|
498
|
+
).saveAsTable(
|
|
499
|
+
table_name=snowpark_table_name,
|
|
500
|
+
mode="truncate",
|
|
501
|
+
column_order=_column_order_for_write,
|
|
502
|
+
)
|
|
503
|
+
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
504
|
+
if not check_snowflake_table_existence(snowpark_table_name, session):
|
|
505
|
+
raise AnalysisException(
|
|
506
|
+
f"Table {snowpark_table_name} does not exist"
|
|
507
|
+
)
|
|
508
|
+
_validate_schema_and_get_writer(
|
|
509
|
+
input_df, "replace", snowpark_table_name
|
|
510
|
+
).saveAsTable(
|
|
511
|
+
table_name=snowpark_table_name,
|
|
512
|
+
mode="overwrite",
|
|
513
|
+
column_order=_column_order_for_write,
|
|
514
|
+
)
|
|
515
|
+
case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
|
|
516
|
+
_validate_schema_and_get_writer(
|
|
517
|
+
input_df, "create_or_replace", snowpark_table_name
|
|
518
|
+
).saveAsTable(
|
|
519
|
+
table_name=snowpark_table_name,
|
|
520
|
+
mode="overwrite",
|
|
521
|
+
column_order=_column_order_for_write,
|
|
522
|
+
)
|
|
523
|
+
case _:
|
|
524
|
+
raise SnowparkConnectNotImplementedError(
|
|
525
|
+
f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
|
|
526
|
+
)
|
|
384
527
|
|
|
385
528
|
|
|
386
529
|
def _validate_schema_and_get_writer(
|
|
387
530
|
input_df: snowpark.DataFrame, write_mode: str, snowpark_table_name: str
|
|
388
531
|
) -> snowpark.DataFrameWriter:
|
|
389
|
-
if write_mode
|
|
532
|
+
if write_mode is not None and write_mode.lower() in (
|
|
533
|
+
"replace",
|
|
534
|
+
"create_or_replace",
|
|
535
|
+
):
|
|
390
536
|
return input_df.write
|
|
391
537
|
|
|
392
538
|
table_schema = None
|
|
@@ -528,6 +674,7 @@ def create_iceberg_table(
|
|
|
528
674
|
location: str,
|
|
529
675
|
schema: StructType,
|
|
530
676
|
snowpark_session: snowpark.Session,
|
|
677
|
+
mode: str = "create",
|
|
531
678
|
):
|
|
532
679
|
table_schema = [
|
|
533
680
|
f"{spark_to_sf_single_id(unquote_if_quoted(field.name), is_column = True)} {snowpark_to_iceberg_type(field.datatype)}"
|
|
@@ -550,8 +697,20 @@ def create_iceberg_table(
|
|
|
550
697
|
else f"EXTERNAL_VOLUME = '{config_external_volume}'"
|
|
551
698
|
)
|
|
552
699
|
|
|
700
|
+
match mode:
|
|
701
|
+
case "create":
|
|
702
|
+
create_sql = "CREATE"
|
|
703
|
+
case "replace":
|
|
704
|
+
# There's no replace for iceberg table, so we use create or replace
|
|
705
|
+
create_sql = "CREATE OR REPLACE"
|
|
706
|
+
case "create_or_replace":
|
|
707
|
+
create_sql = "CREATE OR REPLACE"
|
|
708
|
+
case _:
|
|
709
|
+
raise SnowparkConnectNotImplementedError(
|
|
710
|
+
f"Write mode {mode} is not supported for iceberg table"
|
|
711
|
+
)
|
|
553
712
|
sql = f"""
|
|
554
|
-
|
|
713
|
+
{create_sql} ICEBERG TABLE {snowpark_table_name} ({",".join(table_schema)})
|
|
555
714
|
CATALOG = 'SNOWFLAKE'
|
|
556
715
|
{external_volume}
|
|
557
716
|
{base_location};
|
|
@@ -637,3 +796,19 @@ def check_snowflake_table_existence(
|
|
|
637
796
|
return True
|
|
638
797
|
except Exception:
|
|
639
798
|
return False
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def check_table_type(
|
|
802
|
+
snowpark_table_name: str,
|
|
803
|
+
snowpark_session: snowpark.Session,
|
|
804
|
+
) -> str:
|
|
805
|
+
# currently we only support iceberg table and FDN table
|
|
806
|
+
metadata = snowpark_session.sql(
|
|
807
|
+
f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
|
|
808
|
+
).collect()
|
|
809
|
+
if metadata is None or len(metadata) == 0:
|
|
810
|
+
raise AnalysisException(f"Table {snowpark_table_name} does not exist")
|
|
811
|
+
metadata = metadata[0]
|
|
812
|
+
if metadata.as_dict().get("is_iceberg") == "Y":
|
|
813
|
+
return "ICEBERG"
|
|
814
|
+
return "TABLE"
|
|
@@ -83,6 +83,7 @@ from snowflake.snowpark_connect.utils.context import (
|
|
|
83
83
|
set_session_id,
|
|
84
84
|
set_spark_version,
|
|
85
85
|
)
|
|
86
|
+
from snowflake.snowpark_connect.utils.env_utils import get_int_from_env
|
|
86
87
|
from snowflake.snowpark_connect.utils.interrupt import (
|
|
87
88
|
interrupt_all_queries,
|
|
88
89
|
interrupt_queries_with_tag,
|
|
@@ -700,11 +701,27 @@ def _serve(
|
|
|
700
701
|
return
|
|
701
702
|
|
|
702
703
|
server_options = [
|
|
703
|
-
(
|
|
704
|
-
|
|
704
|
+
(
|
|
705
|
+
"grpc.max_receive_message_length",
|
|
706
|
+
get_int_from_env(
|
|
707
|
+
"SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
|
|
708
|
+
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
|
|
709
|
+
),
|
|
710
|
+
),
|
|
711
|
+
(
|
|
712
|
+
"grpc.max_metadata_size",
|
|
713
|
+
get_int_from_env(
|
|
714
|
+
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
715
|
+
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
716
|
+
),
|
|
717
|
+
),
|
|
705
718
|
(
|
|
706
719
|
"grpc.absolute_max_metadata_size",
|
|
707
|
-
|
|
720
|
+
get_int_from_env(
|
|
721
|
+
"SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
|
|
722
|
+
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
723
|
+
)
|
|
724
|
+
* 2,
|
|
708
725
|
),
|
|
709
726
|
]
|
|
710
727
|
server = grpc.server(
|
|
@@ -812,8 +829,11 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
|
|
|
812
829
|
Spark Connect gRPC channel builder for Unix domain sockets
|
|
813
830
|
"""
|
|
814
831
|
|
|
815
|
-
def __init__(
|
|
816
|
-
url: str =
|
|
832
|
+
def __init__(
|
|
833
|
+
self, url: str = None, channelOptions: Optional[List[Tuple[str, Any]]] = None
|
|
834
|
+
) -> None:
|
|
835
|
+
if url is None:
|
|
836
|
+
url = get_client_url()
|
|
817
837
|
if url[:6] != "unix:/" or len(url) < 7:
|
|
818
838
|
raise PySparkValueError(
|
|
819
839
|
error_class="INVALID_CONNECT_URL",
|
|
@@ -59,7 +59,7 @@ def _get_struct_type_class():
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
@cache
|
|
62
|
-
def
|
|
62
|
+
def get_python_sql_utils_class():
|
|
63
63
|
return jpype.JClass("org.apache.spark.sql.api.python.PythonSQLUtils")
|
|
64
64
|
|
|
65
65
|
|
|
@@ -70,7 +70,7 @@ def parse_ddl_with_spark_scala(ddl_string: str) -> pyspark.sql.types.DataType:
|
|
|
70
70
|
This mimics pysparks.ddl parsing logic pyspark.sql.types._py_parse_datatype_string
|
|
71
71
|
"""
|
|
72
72
|
struct_type_class = _get_struct_type_class()
|
|
73
|
-
python_sql_utils =
|
|
73
|
+
python_sql_utils = get_python_sql_utils_class()
|
|
74
74
|
|
|
75
75
|
try:
|
|
76
76
|
# DDL format, "fieldname datatype, fieldname datatype".
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Environment variable utilities for Snowpark Connect.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_int_from_env(env_var: str, default: int) -> int:
|
|
15
|
+
"""
|
|
16
|
+
Safely get integer value from environment variable with fallback to default.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
env_var: Environment variable name
|
|
20
|
+
default: Default integer value if env var is not set or invalid
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Integer value from environment variable or default
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
TypeError: If default is not an integer
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
>>> get_int_from_env("MAX_WORKERS", 10)
|
|
30
|
+
10
|
|
31
|
+
>>> os.environ["MAX_WORKERS"] = "20"
|
|
32
|
+
>>> get_int_from_env("MAX_WORKERS", 10)
|
|
33
|
+
20
|
|
34
|
+
>>> os.environ["MAX_WORKERS"] = "invalid"
|
|
35
|
+
>>> get_int_from_env("MAX_WORKERS", 10) # logs warning, returns 10
|
|
36
|
+
10
|
|
37
|
+
"""
|
|
38
|
+
# Validate that default is actually an integer
|
|
39
|
+
if not isinstance(default, int):
|
|
40
|
+
raise TypeError(
|
|
41
|
+
f"Default value must be an integer, got {type(default).__name__}: {default}"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
value = os.getenv(env_var)
|
|
45
|
+
if value is None:
|
|
46
|
+
return default
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
return int(value)
|
|
50
|
+
except ValueError:
|
|
51
|
+
logger.warning(
|
|
52
|
+
f"Invalid integer value for environment variable {env_var}: '{value}', "
|
|
53
|
+
f"using default: {default}"
|
|
54
|
+
)
|
|
55
|
+
return default
|
|
@@ -73,6 +73,27 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
73
73
|
session.connection.arrow_number_to_decimal_setter = True
|
|
74
74
|
session.custom_package_usage_config["enabled"] = True
|
|
75
75
|
|
|
76
|
+
default_fallback_timezone = "UTC"
|
|
77
|
+
if global_config.spark_sql_session_timeZone is None:
|
|
78
|
+
try:
|
|
79
|
+
result = session.sql("SHOW PARAMETERS LIKE 'TIMEZONE'").collect()
|
|
80
|
+
if result and len(result) > 0:
|
|
81
|
+
value = result[0]["value"]
|
|
82
|
+
logger.warning(
|
|
83
|
+
f"Using Snowflake session timezone parameter as fallback: {value}"
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
value = default_fallback_timezone
|
|
87
|
+
logger.warning(
|
|
88
|
+
f"Could not determine timezone from parameters, defaulting to {default_fallback_timezone}"
|
|
89
|
+
)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
value = default_fallback_timezone
|
|
92
|
+
logger.warning(
|
|
93
|
+
f"Could not query Snowflake timezone parameter ({e}), defaulting to {default_fallback_timezone}"
|
|
94
|
+
)
|
|
95
|
+
global_config.spark_sql_session_timeZone = value
|
|
96
|
+
|
|
76
97
|
session_params = {
|
|
77
98
|
"TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
|
|
78
99
|
"TIMEZONE": f"'{global_config.spark_sql_session_timeZone}'",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowpark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.24.0
|
|
4
4
|
Summary: Snowpark Connect for Spark
|
|
5
5
|
Author: Snowflake, Inc
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -16,7 +16,7 @@ Requires-Dist: jpype1
|
|
|
16
16
|
Requires-Dist: protobuf<5.0,>=4.25.3
|
|
17
17
|
Requires-Dist: s3fs>=2025.3.0
|
|
18
18
|
Requires-Dist: snowflake.core<2,>=1.0.5
|
|
19
|
-
Requires-Dist: snowflake-snowpark-python[pandas]
|
|
19
|
+
Requires-Dist: snowflake-snowpark-python[pandas]<1.38.0,>=1.37.0
|
|
20
20
|
Requires-Dist: sqlglot>=26.3.8
|
|
21
21
|
Requires-Dist: jaydebeapi
|
|
22
22
|
Requires-Dist: aiobotocore~=2.23.0
|