snowpark-connect 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +3 -93
- snowflake/snowpark_connect/config.py +99 -4
- snowflake/snowpark_connect/dataframe_container.py +0 -6
- snowflake/snowpark_connect/expression/map_expression.py +31 -1
- snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +22 -26
- snowflake/snowpark_connect/expression/map_unresolved_function.py +28 -10
- snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/relation/map_extension.py +7 -1
- snowflake/snowpark_connect/relation/map_join.py +62 -258
- snowflake/snowpark_connect/relation/map_map_partitions.py +36 -77
- snowflake/snowpark_connect/relation/map_relation.py +8 -2
- snowflake/snowpark_connect/relation/map_show_string.py +2 -0
- snowflake/snowpark_connect/relation/map_sql.py +413 -15
- snowflake/snowpark_connect/relation/write/map_write.py +195 -114
- snowflake/snowpark_connect/resources_initializer.py +20 -5
- snowflake/snowpark_connect/server.py +20 -18
- snowflake/snowpark_connect/utils/artifacts.py +4 -5
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/context.py +41 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
- snowflake/snowpark_connect/utils/identifiers.py +120 -0
- snowflake/snowpark_connect/utils/io_utils.py +21 -1
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +86 -2
- snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
- snowflake/snowpark_connect/utils/session.py +16 -26
- snowflake/snowpark_connect/utils/telemetry.py +53 -0
- snowflake/snowpark_connect/utils/udf_utils.py +66 -103
- snowflake/snowpark_connect/utils/udtf_helper.py +17 -7
- snowflake/snowpark_connect/version.py +2 -3
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +41 -42
- snowflake/snowpark_connect/hidden_column.py +0 -39
- {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.26.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@ from snowflake.snowpark._internal.analyzer import analyzer_utils
|
|
|
12
12
|
from snowflake.snowpark.functions import col
|
|
13
13
|
from snowflake.snowpark.types import DateType, StringType, StructField, StructType
|
|
14
14
|
from snowflake.snowpark_connect.column_name_handler import set_schema_getter
|
|
15
|
+
from snowflake.snowpark_connect.config import global_config
|
|
15
16
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
16
17
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
17
18
|
|
|
@@ -33,6 +34,7 @@ def map_show_string(rel: relation_proto.Relation) -> pandas.DataFrame:
|
|
|
33
34
|
truncate=rel.show_string.truncate,
|
|
34
35
|
vertical=rel.show_string.vertical,
|
|
35
36
|
_spark_column_names=input_df_container.column_map.get_spark_columns(),
|
|
37
|
+
_spark_session_tz=global_config.spark_sql_session_timeZone,
|
|
36
38
|
)
|
|
37
39
|
return pandas.DataFrame({"show_string": [show_string]})
|
|
38
40
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
6
|
from collections.abc import MutableMapping, MutableSequence
|
|
7
|
-
from contextlib import contextmanager
|
|
7
|
+
from contextlib import contextmanager, suppress
|
|
8
8
|
from contextvars import ContextVar
|
|
9
9
|
from functools import reduce
|
|
10
10
|
|
|
@@ -30,10 +30,13 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
30
30
|
)
|
|
31
31
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
|
32
32
|
from snowflake.snowpark._internal.utils import is_sql_select_statement, quote_name
|
|
33
|
+
from snowflake.snowpark.functions import when_matched, when_not_matched
|
|
33
34
|
from snowflake.snowpark_connect.config import (
|
|
34
35
|
auto_uppercase_non_column_identifiers,
|
|
36
|
+
check_table_supports_operation,
|
|
35
37
|
get_boolean_session_config_param,
|
|
36
38
|
global_config,
|
|
39
|
+
record_table_metadata,
|
|
37
40
|
set_config_param,
|
|
38
41
|
unset_config_param,
|
|
39
42
|
)
|
|
@@ -56,8 +59,10 @@ from snowflake.snowpark_connect.utils.context import (
|
|
|
56
59
|
_accessing_temp_object,
|
|
57
60
|
gen_sql_plan_id,
|
|
58
61
|
get_session_id,
|
|
62
|
+
get_sql_plan,
|
|
59
63
|
push_evaluating_sql_scope,
|
|
60
64
|
push_sql_scope,
|
|
65
|
+
set_plan_id_map,
|
|
61
66
|
set_sql_args,
|
|
62
67
|
set_sql_plan_name,
|
|
63
68
|
)
|
|
@@ -67,6 +72,7 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
67
72
|
telemetry,
|
|
68
73
|
)
|
|
69
74
|
|
|
75
|
+
from .. import column_name_handler
|
|
70
76
|
from ..expression.map_sql_expression import (
|
|
71
77
|
_window_specs,
|
|
72
78
|
as_java_list,
|
|
@@ -202,6 +208,9 @@ def _rename_columns(
|
|
|
202
208
|
def _create_table_as_select(logical_plan, mode: str) -> None:
|
|
203
209
|
# TODO: for as select create tables we'd map multi layer identifier here
|
|
204
210
|
name = get_relation_identifier_name(logical_plan.name())
|
|
211
|
+
full_table_identifier = get_relation_identifier_name(
|
|
212
|
+
logical_plan.name(), is_multi_part=True
|
|
213
|
+
)
|
|
205
214
|
comment = logical_plan.tableSpec().comment()
|
|
206
215
|
|
|
207
216
|
container = execute_logical_plan(logical_plan.query())
|
|
@@ -222,6 +231,15 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
|
|
|
222
231
|
mode=mode,
|
|
223
232
|
)
|
|
224
233
|
|
|
234
|
+
# Record table metadata for CREATE TABLE AS SELECT
|
|
235
|
+
# These are typically considered v2 tables and support RENAME COLUMN
|
|
236
|
+
record_table_metadata(
|
|
237
|
+
table_identifier=full_table_identifier,
|
|
238
|
+
table_type="v2",
|
|
239
|
+
data_source="default",
|
|
240
|
+
supports_column_rename=True,
|
|
241
|
+
)
|
|
242
|
+
|
|
225
243
|
|
|
226
244
|
def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
|
|
227
245
|
# Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
|
|
@@ -299,6 +317,65 @@ def _remove_column_data_type(node):
|
|
|
299
317
|
return node
|
|
300
318
|
|
|
301
319
|
|
|
320
|
+
def _get_condition_from_action(action, column_mapping, typer):
|
|
321
|
+
condition = None
|
|
322
|
+
if action.condition().isDefined():
|
|
323
|
+
(_, condition_typed_col,) = map_single_column_expression(
|
|
324
|
+
map_logical_plan_expression(action.condition().get()),
|
|
325
|
+
column_mapping,
|
|
326
|
+
typer,
|
|
327
|
+
)
|
|
328
|
+
condition = condition_typed_col.col
|
|
329
|
+
return condition
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _get_assignments_from_action(
|
|
333
|
+
action,
|
|
334
|
+
column_mapping_source,
|
|
335
|
+
column_mapping_target,
|
|
336
|
+
typer_source,
|
|
337
|
+
typer_target,
|
|
338
|
+
):
|
|
339
|
+
assignments = dict()
|
|
340
|
+
if (
|
|
341
|
+
action.getClass().getSimpleName() == "InsertAction"
|
|
342
|
+
or action.getClass().getSimpleName() == "UpdateAction"
|
|
343
|
+
):
|
|
344
|
+
incoming_assignments = as_java_list(action.assignments())
|
|
345
|
+
for assignment in incoming_assignments:
|
|
346
|
+
(_, key_typ_col) = map_single_column_expression(
|
|
347
|
+
map_logical_plan_expression(assignment.key()),
|
|
348
|
+
column_mapping=column_mapping_target,
|
|
349
|
+
typer=typer_target,
|
|
350
|
+
)
|
|
351
|
+
key_name = typer_target.df.select(key_typ_col.col).columns[0]
|
|
352
|
+
|
|
353
|
+
(_, val_typ_col) = map_single_column_expression(
|
|
354
|
+
map_logical_plan_expression(assignment.value()),
|
|
355
|
+
column_mapping=column_mapping_source,
|
|
356
|
+
typer=typer_source,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
assignments[key_name] = val_typ_col.col
|
|
360
|
+
elif (
|
|
361
|
+
action.getClass().getSimpleName() == "InsertStarAction"
|
|
362
|
+
or action.getClass().getSimpleName() == "UpdateStarAction"
|
|
363
|
+
):
|
|
364
|
+
if len(column_mapping_source.columns) != len(column_mapping_target.columns):
|
|
365
|
+
raise ValueError(
|
|
366
|
+
"source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
|
|
367
|
+
)
|
|
368
|
+
for i, col in enumerate(column_mapping_target.columns):
|
|
369
|
+
if assignments.get(col.snowpark_name) is not None:
|
|
370
|
+
raise SnowparkConnectNotImplementedError(
|
|
371
|
+
"UpdateStarAction or InsertStarAction is not supported with duplicate columns."
|
|
372
|
+
)
|
|
373
|
+
assignments[col.snowpark_name] = snowpark_fn.col(
|
|
374
|
+
column_mapping_source.columns[i].snowpark_name
|
|
375
|
+
)
|
|
376
|
+
return assignments
|
|
377
|
+
|
|
378
|
+
|
|
302
379
|
def map_sql_to_pandas_df(
|
|
303
380
|
sql_string: str,
|
|
304
381
|
named_args: MutableMapping[str, expressions_proto.Expression.Literal],
|
|
@@ -420,6 +497,9 @@ def map_sql_to_pandas_df(
|
|
|
420
497
|
)
|
|
421
498
|
|
|
422
499
|
name = get_relation_identifier_name(logical_plan.name())
|
|
500
|
+
full_table_identifier = get_relation_identifier_name(
|
|
501
|
+
logical_plan.name(), is_multi_part=True
|
|
502
|
+
)
|
|
423
503
|
columns = ", ".join(
|
|
424
504
|
_spark_field_to_sql(f, True)
|
|
425
505
|
for f in logical_plan.tableSchema().fields()
|
|
@@ -430,10 +510,48 @@ def map_sql_to_pandas_df(
|
|
|
430
510
|
if comment_opt.isDefined()
|
|
431
511
|
else ""
|
|
432
512
|
)
|
|
513
|
+
|
|
514
|
+
# Extract data source for metadata tracking
|
|
515
|
+
data_source = "default"
|
|
516
|
+
|
|
517
|
+
with suppress(Exception):
|
|
518
|
+
# Get data source from tableSpec.provider() (for USING clause)
|
|
519
|
+
if hasattr(logical_plan, "tableSpec"):
|
|
520
|
+
table_spec = logical_plan.tableSpec()
|
|
521
|
+
if hasattr(table_spec, "provider"):
|
|
522
|
+
provider_opt = table_spec.provider()
|
|
523
|
+
if provider_opt.isDefined():
|
|
524
|
+
data_source = str(provider_opt.get()).lower()
|
|
525
|
+
else:
|
|
526
|
+
# Fall back to checking properties for FORMAT
|
|
527
|
+
table_properties = table_spec.properties()
|
|
528
|
+
if not table_properties.isEmpty():
|
|
529
|
+
for prop in table_properties.get():
|
|
530
|
+
if str(prop.key()) == "FORMAT":
|
|
531
|
+
data_source = str(prop.value()).lower()
|
|
532
|
+
break
|
|
533
|
+
|
|
433
534
|
# NOTE: We are intentionally ignoring any FORMAT=... parameters here.
|
|
434
535
|
session.sql(
|
|
435
536
|
f"CREATE {replace_table} TABLE {if_not_exists}{name} ({columns}) {comment}"
|
|
436
537
|
).collect()
|
|
538
|
+
|
|
539
|
+
# Record table metadata for Spark compatibility
|
|
540
|
+
# Tables created with explicit schema are considered v1 tables
|
|
541
|
+
# v1 tables with certain data sources don't support RENAME COLUMN in OSS Spark
|
|
542
|
+
supports_rename = data_source not in (
|
|
543
|
+
"parquet",
|
|
544
|
+
"csv",
|
|
545
|
+
"json",
|
|
546
|
+
"orc",
|
|
547
|
+
"avro",
|
|
548
|
+
)
|
|
549
|
+
record_table_metadata(
|
|
550
|
+
table_identifier=full_table_identifier,
|
|
551
|
+
table_type="v1",
|
|
552
|
+
data_source=data_source,
|
|
553
|
+
supports_column_rename=supports_rename,
|
|
554
|
+
)
|
|
437
555
|
case "CreateTableAsSelect":
|
|
438
556
|
mode = "ignore" if logical_plan.ignoreIfExists() else "errorifexists"
|
|
439
557
|
_create_table_as_select(logical_plan, mode=mode)
|
|
@@ -727,15 +845,147 @@ def map_sql_to_pandas_df(
|
|
|
727
845
|
f"INSERT {overwrite_str} INTO {name} {cols_str} {final_query}",
|
|
728
846
|
).collect()
|
|
729
847
|
case "MergeIntoTable":
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
848
|
+
source_df_container = map_relation(
|
|
849
|
+
map_logical_plan_relation(logical_plan.sourceTable())
|
|
733
850
|
)
|
|
851
|
+
source_df = source_df_container.dataframe
|
|
852
|
+
plan_id = gen_sql_plan_id()
|
|
853
|
+
target_df_container = map_relation(
|
|
854
|
+
map_logical_plan_relation(logical_plan.targetTable(), plan_id)
|
|
855
|
+
)
|
|
856
|
+
target_df = target_df_container.dataframe
|
|
857
|
+
|
|
858
|
+
if (
|
|
859
|
+
logical_plan.targetTable().getClass().getSimpleName()
|
|
860
|
+
== "UnresolvedRelation"
|
|
861
|
+
):
|
|
862
|
+
target_table_name = _spark_to_snowflake(
|
|
863
|
+
logical_plan.targetTable().multipartIdentifier()
|
|
864
|
+
)
|
|
865
|
+
else:
|
|
866
|
+
target_table_name = _spark_to_snowflake(
|
|
867
|
+
logical_plan.targetTable().child().multipartIdentifier()
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
target_table = session.table(target_table_name)
|
|
871
|
+
target_table_columns = target_table.columns
|
|
872
|
+
target_df_spark_names = []
|
|
873
|
+
for target_table_col, target_df_col in zip(
|
|
874
|
+
target_table_columns, target_df_container.column_map.columns
|
|
875
|
+
):
|
|
876
|
+
target_df = target_df.with_column_renamed(
|
|
877
|
+
target_df_col.snowpark_name,
|
|
878
|
+
target_table_col,
|
|
879
|
+
)
|
|
880
|
+
target_df_spark_names.append(target_df_col.spark_name)
|
|
881
|
+
target_df_container = DataFrameContainer.create_with_column_mapping(
|
|
882
|
+
dataframe=target_df,
|
|
883
|
+
spark_column_names=target_df_spark_names,
|
|
884
|
+
snowpark_column_names=target_table_columns,
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
set_plan_id_map(plan_id, target_df_container)
|
|
888
|
+
|
|
889
|
+
joined_df_before_condition: snowpark.DataFrame = source_df.join(
|
|
890
|
+
target_df
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
column_mapping_for_conditions = column_name_handler.JoinColumnNameMap(
|
|
894
|
+
source_df_container.column_map,
|
|
895
|
+
target_df_container.column_map,
|
|
896
|
+
)
|
|
897
|
+
typer_for_expressions = ExpressionTyper(joined_df_before_condition)
|
|
898
|
+
|
|
899
|
+
(_, merge_condition_typed_col,) = map_single_column_expression(
|
|
900
|
+
map_logical_plan_expression(logical_plan.mergeCondition()),
|
|
901
|
+
column_mapping=column_mapping_for_conditions,
|
|
902
|
+
typer=typer_for_expressions,
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
clauses = []
|
|
906
|
+
|
|
907
|
+
for matched_action in as_java_list(logical_plan.matchedActions()):
|
|
908
|
+
condition = _get_condition_from_action(
|
|
909
|
+
matched_action,
|
|
910
|
+
column_mapping_for_conditions,
|
|
911
|
+
typer_for_expressions,
|
|
912
|
+
)
|
|
913
|
+
if matched_action.getClass().getSimpleName() == "DeleteAction":
|
|
914
|
+
clauses.append(when_matched(condition).delete())
|
|
915
|
+
elif (
|
|
916
|
+
matched_action.getClass().getSimpleName() == "UpdateAction"
|
|
917
|
+
or matched_action.getClass().getSimpleName()
|
|
918
|
+
== "UpdateStarAction"
|
|
919
|
+
):
|
|
920
|
+
assignments = _get_assignments_from_action(
|
|
921
|
+
matched_action,
|
|
922
|
+
source_df_container.column_map,
|
|
923
|
+
target_df_container.column_map,
|
|
924
|
+
ExpressionTyper(source_df),
|
|
925
|
+
ExpressionTyper(target_df),
|
|
926
|
+
)
|
|
927
|
+
clauses.append(when_matched(condition).update(assignments))
|
|
928
|
+
|
|
929
|
+
for not_matched_action in as_java_list(
|
|
930
|
+
logical_plan.notMatchedActions()
|
|
931
|
+
):
|
|
932
|
+
condition = _get_condition_from_action(
|
|
933
|
+
not_matched_action,
|
|
934
|
+
column_mapping_for_conditions,
|
|
935
|
+
typer_for_expressions,
|
|
936
|
+
)
|
|
937
|
+
if (
|
|
938
|
+
not_matched_action.getClass().getSimpleName() == "InsertAction"
|
|
939
|
+
or not_matched_action.getClass().getSimpleName()
|
|
940
|
+
== "InsertStarAction"
|
|
941
|
+
):
|
|
942
|
+
assignments = _get_assignments_from_action(
|
|
943
|
+
not_matched_action,
|
|
944
|
+
source_df_container.column_map,
|
|
945
|
+
target_df_container.column_map,
|
|
946
|
+
ExpressionTyper(source_df),
|
|
947
|
+
ExpressionTyper(target_df),
|
|
948
|
+
)
|
|
949
|
+
clauses.append(when_not_matched(condition).insert(assignments))
|
|
950
|
+
|
|
951
|
+
if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
|
|
952
|
+
raise SnowparkConnectNotImplementedError(
|
|
953
|
+
"Snowflake does not support 'not matched by source' actions in MERGE statements."
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
target_table.merge(source_df, merge_condition_typed_col.col, clauses)
|
|
734
957
|
case "DeleteFromTable":
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
958
|
+
df_container = map_relation(
|
|
959
|
+
map_logical_plan_relation(logical_plan.table())
|
|
960
|
+
)
|
|
961
|
+
name = get_relation_identifier_name(logical_plan.table(), True)
|
|
962
|
+
table = session.table(name)
|
|
963
|
+
table_columns = table.columns
|
|
964
|
+
df = df_container.dataframe
|
|
965
|
+
spark_names = []
|
|
966
|
+
for table_col, df_col in zip(
|
|
967
|
+
table_columns, df_container.column_map.columns
|
|
968
|
+
):
|
|
969
|
+
df = df.with_column_renamed(
|
|
970
|
+
df_col.snowpark_name,
|
|
971
|
+
table_col,
|
|
972
|
+
)
|
|
973
|
+
spark_names.append(df_col.spark_name)
|
|
974
|
+
df_container = DataFrameContainer.create_with_column_mapping(
|
|
975
|
+
dataframe=df,
|
|
976
|
+
spark_column_names=spark_names,
|
|
977
|
+
snowpark_column_names=table_columns,
|
|
978
|
+
)
|
|
979
|
+
df = df_container.dataframe
|
|
980
|
+
(
|
|
981
|
+
condition_column_name,
|
|
982
|
+
condition_typed_col,
|
|
983
|
+
) = map_single_column_expression(
|
|
984
|
+
map_logical_plan_expression(logical_plan.condition()),
|
|
985
|
+
df_container.column_map,
|
|
986
|
+
ExpressionTyper(df),
|
|
738
987
|
)
|
|
988
|
+
table.delete(condition_typed_col.col)
|
|
739
989
|
case "UpdateTable":
|
|
740
990
|
# Databricks/Delta-specific extension not supported by SAS.
|
|
741
991
|
# Provide an actionable, clear error.
|
|
@@ -744,7 +994,20 @@ def map_sql_to_pandas_df(
|
|
|
744
994
|
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
745
995
|
)
|
|
746
996
|
case "RenameColumn":
|
|
747
|
-
|
|
997
|
+
full_table_identifier = get_relation_identifier_name(
|
|
998
|
+
logical_plan.table(), True
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
# Check Spark compatibility for RENAME COLUMN operation
|
|
1002
|
+
if not check_table_supports_operation(
|
|
1003
|
+
full_table_identifier, "rename_column"
|
|
1004
|
+
):
|
|
1005
|
+
raise AnalysisException(
|
|
1006
|
+
f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
|
|
1007
|
+
f"This table was created as a v1 table with a data source that doesn't support column renaming. "
|
|
1008
|
+
f"To enable this operation, set 'enable_snowflake_extension_behavior' to 'true'."
|
|
1009
|
+
)
|
|
1010
|
+
|
|
748
1011
|
column_obj = logical_plan.column()
|
|
749
1012
|
old_column_name = ".".join(
|
|
750
1013
|
spark_to_sf_single_id(str(part), is_column=True)
|
|
@@ -754,7 +1017,7 @@ def map_sql_to_pandas_df(
|
|
|
754
1017
|
case_insensitive_name = next(
|
|
755
1018
|
(
|
|
756
1019
|
f.name
|
|
757
|
-
for f in session.table(
|
|
1020
|
+
for f in session.table(full_table_identifier).schema.fields
|
|
758
1021
|
if f.name.lower() == old_column_name.lower()
|
|
759
1022
|
),
|
|
760
1023
|
None,
|
|
@@ -766,7 +1029,7 @@ def map_sql_to_pandas_df(
|
|
|
766
1029
|
)
|
|
767
1030
|
|
|
768
1031
|
# Pass through to Snowflake
|
|
769
|
-
snowflake_sql = f"ALTER TABLE {
|
|
1032
|
+
snowflake_sql = f"ALTER TABLE {full_table_identifier} RENAME COLUMN {old_column_name} TO {new_column_name}"
|
|
770
1033
|
session.sql(snowflake_sql).collect()
|
|
771
1034
|
case "RenameTable":
|
|
772
1035
|
name = get_relation_identifier_name(logical_plan.child(), True)
|
|
@@ -997,6 +1260,90 @@ def get_sql_passthrough() -> bool:
|
|
|
997
1260
|
return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
|
|
998
1261
|
|
|
999
1262
|
|
|
1263
|
+
def change_default_to_public(name: str) -> str:
|
|
1264
|
+
"""
|
|
1265
|
+
Change the namespace to PUBLIC when given name is DEFAULT
|
|
1266
|
+
:param name: Given namespace
|
|
1267
|
+
:return: if name is DEFAULT return PUBLIC otherwise name
|
|
1268
|
+
"""
|
|
1269
|
+
if name.startswith('"'):
|
|
1270
|
+
if name.upper() == '"DEFAULT"':
|
|
1271
|
+
return name.replace("DEFAULT", "PUBLIC")
|
|
1272
|
+
elif name.upper() == "DEFAULT":
|
|
1273
|
+
return "PUBLIC"
|
|
1274
|
+
return name
|
|
1275
|
+
|
|
1276
|
+
|
|
1277
|
+
def _preprocess_identifier_calls(sql_query: str) -> str:
|
|
1278
|
+
"""
|
|
1279
|
+
Pre-process SQL query to resolve IDENTIFIER() calls before Spark parsing.
|
|
1280
|
+
|
|
1281
|
+
Transforms: IDENTIFIER('abs')(c2) -> abs(c2)
|
|
1282
|
+
Transforms: IDENTIFIER('COAL' || 'ESCE')(NULL, 1) -> COALESCE(NULL, 1)
|
|
1283
|
+
|
|
1284
|
+
This preserves all function arguments in their original positions, eliminating
|
|
1285
|
+
the need to reconstruct them at the expression level.
|
|
1286
|
+
"""
|
|
1287
|
+
import re
|
|
1288
|
+
|
|
1289
|
+
# Pattern to match IDENTIFIER(...) followed by optional function call arguments
|
|
1290
|
+
# This captures both the identifier expression and any trailing arguments
|
|
1291
|
+
# Note: We need to be careful about whitespace preservation
|
|
1292
|
+
identifier_pattern = r"IDENTIFIER\s*\(\s*([^)]+)\s*\)(\s*)(\([^)]*\))?"
|
|
1293
|
+
|
|
1294
|
+
def resolve_identifier_match(match):
|
|
1295
|
+
identifier_expr_str = match.group(1).strip()
|
|
1296
|
+
whitespace = match.group(2) if match.group(2) else ""
|
|
1297
|
+
function_args = match.group(3) if match.group(3) else ""
|
|
1298
|
+
|
|
1299
|
+
try:
|
|
1300
|
+
# Handle string concatenation FIRST: IDENTIFIER('COAL' || 'ESCE')
|
|
1301
|
+
# (Must check this before simple strings since it also starts/ends with quotes)
|
|
1302
|
+
if "||" in identifier_expr_str:
|
|
1303
|
+
# Parse basic string concatenation with proper quote handling
|
|
1304
|
+
parts = []
|
|
1305
|
+
split_parts = identifier_expr_str.split("||")
|
|
1306
|
+
for part in split_parts:
|
|
1307
|
+
part = part.strip()
|
|
1308
|
+
if part.startswith("'") and part.endswith("'"):
|
|
1309
|
+
unquoted = part[1:-1] # Remove quotes from each part
|
|
1310
|
+
parts.append(unquoted)
|
|
1311
|
+
else:
|
|
1312
|
+
# Non-string parts - return original for safety
|
|
1313
|
+
return match.group(0)
|
|
1314
|
+
resolved_name = "".join(parts) # Concatenate the unquoted parts
|
|
1315
|
+
|
|
1316
|
+
# Handle simple string literals: IDENTIFIER('abs')
|
|
1317
|
+
elif identifier_expr_str.startswith("'") and identifier_expr_str.endswith(
|
|
1318
|
+
"'"
|
|
1319
|
+
):
|
|
1320
|
+
resolved_name = identifier_expr_str[1:-1] # Remove quotes
|
|
1321
|
+
|
|
1322
|
+
else:
|
|
1323
|
+
# Complex expressions not supported yet - return original
|
|
1324
|
+
return match.group(0)
|
|
1325
|
+
|
|
1326
|
+
# Return resolved function call with preserved arguments and whitespace
|
|
1327
|
+
if function_args:
|
|
1328
|
+
# Function call case: IDENTIFIER('abs')(c1) -> abs(c1)
|
|
1329
|
+
result = f"{resolved_name}{function_args}"
|
|
1330
|
+
else:
|
|
1331
|
+
# Column reference case: IDENTIFIER('c1') FROM -> c1 FROM (preserve whitespace)
|
|
1332
|
+
result = f"{resolved_name}{whitespace}"
|
|
1333
|
+
return result
|
|
1334
|
+
|
|
1335
|
+
except Exception:
|
|
1336
|
+
# Return original to avoid breaking the query
|
|
1337
|
+
return match.group(0)
|
|
1338
|
+
|
|
1339
|
+
# Apply the transformation
|
|
1340
|
+
processed_query = re.sub(
|
|
1341
|
+
identifier_pattern, resolve_identifier_match, sql_query, flags=re.IGNORECASE
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
return processed_query
|
|
1345
|
+
|
|
1346
|
+
|
|
1000
1347
|
def map_sql(
|
|
1001
1348
|
rel: relation_proto.Relation,
|
|
1002
1349
|
) -> DataFrameContainer:
|
|
@@ -1008,7 +1355,6 @@ def map_sql(
|
|
|
1008
1355
|
In passthough mode as True, SAS calls session.sql() and not calling Spark Parser.
|
|
1009
1356
|
This is to mitigate any issue not covered by spark logical plan to protobuf conversion.
|
|
1010
1357
|
"""
|
|
1011
|
-
|
|
1012
1358
|
snowpark_connect_sql_passthrough = get_sql_passthrough()
|
|
1013
1359
|
|
|
1014
1360
|
if not snowpark_connect_sql_passthrough:
|
|
@@ -1353,6 +1699,7 @@ def map_logical_plan_relation(
|
|
|
1353
1699
|
left_input=map_logical_plan_relation(children[0]),
|
|
1354
1700
|
right_input=map_logical_plan_relation(children[1]),
|
|
1355
1701
|
set_op_type=relation_proto.SetOperation.SET_OP_TYPE_UNION,
|
|
1702
|
+
is_all=True,
|
|
1356
1703
|
by_name=rel.byName(),
|
|
1357
1704
|
allow_missing_columns=rel.allowMissingCol(),
|
|
1358
1705
|
)
|
|
@@ -1701,7 +2048,50 @@ def map_logical_plan_relation(
|
|
|
1701
2048
|
_window_specs.get()[key] = window_spec
|
|
1702
2049
|
proto = map_logical_plan_relation(rel.child())
|
|
1703
2050
|
case "Generate":
|
|
1704
|
-
|
|
2051
|
+
# Generate creates a nested Project relation (see lines 1785-1790) without
|
|
2052
|
+
# setting its plan_id field. When this Project is later processed by map_project
|
|
2053
|
+
# (map_column_ops.py), it uses rel.common.plan_id which defaults to 0 for unset
|
|
2054
|
+
# protobuf fields. This means all columns from the Generate operation (both exploded
|
|
2055
|
+
# columns and passthrough columns) will have plan_id=0 in their names.
|
|
2056
|
+
#
|
|
2057
|
+
# If Generate's child is a SubqueryAlias whose inner relation was processed
|
|
2058
|
+
# with a non-zero plan_id, there will be a mismatch between:
|
|
2059
|
+
# - The columns referenced in the Project (expecting plan_id from SubqueryAlias's child)
|
|
2060
|
+
# - The actual column names created by Generate's Project (using plan_id=0)
|
|
2061
|
+
|
|
2062
|
+
# Therefore, when Generate has a SubqueryAlias child, we explicitly process the inner
|
|
2063
|
+
# relation with plan_id=0 to match what Generate's Project will use. This only applies when
|
|
2064
|
+
# the immediate child of Generate is a SubqueryAlias and preserves existing registrations (like CTEs),
|
|
2065
|
+
# so it won't affect other patterns.
|
|
2066
|
+
|
|
2067
|
+
child_class = str(rel.child().getClass().getSimpleName())
|
|
2068
|
+
|
|
2069
|
+
if child_class == "SubqueryAlias":
|
|
2070
|
+
alias = str(rel.child().alias())
|
|
2071
|
+
|
|
2072
|
+
# Check if this alias was already registered during initial SQL parsing
|
|
2073
|
+
existing_plan_id = get_sql_plan(alias)
|
|
2074
|
+
|
|
2075
|
+
if existing_plan_id is not None:
|
|
2076
|
+
# Use the existing plan_id to maintain consistency with prior registration
|
|
2077
|
+
used_plan_id = existing_plan_id
|
|
2078
|
+
else:
|
|
2079
|
+
# Use plan_id=0 to match what the nested Project will use (protobuf default)
|
|
2080
|
+
used_plan_id = 0
|
|
2081
|
+
set_sql_plan_name(alias, used_plan_id)
|
|
2082
|
+
|
|
2083
|
+
# Process the inner child with the determined plan_id
|
|
2084
|
+
inner_child = map_logical_plan_relation(
|
|
2085
|
+
rel.child().child(), plan_id=used_plan_id
|
|
2086
|
+
)
|
|
2087
|
+
input_relation = relation_proto.Relation(
|
|
2088
|
+
subquery_alias=relation_proto.SubqueryAlias(
|
|
2089
|
+
input=inner_child,
|
|
2090
|
+
alias=alias,
|
|
2091
|
+
)
|
|
2092
|
+
)
|
|
2093
|
+
else:
|
|
2094
|
+
input_relation = map_logical_plan_relation(rel.child())
|
|
1705
2095
|
generator_output_list = as_java_list(rel.generatorOutput())
|
|
1706
2096
|
generator_output_list_expressions = [
|
|
1707
2097
|
map_logical_plan_expression(e) for e in generator_output_list
|
|
@@ -1784,8 +2174,11 @@ def map_logical_plan_relation(
|
|
|
1784
2174
|
|
|
1785
2175
|
|
|
1786
2176
|
def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
|
|
1787
|
-
if name_obj.getClass().getSimpleName()
|
|
1788
|
-
|
|
2177
|
+
if name_obj.getClass().getSimpleName() in (
|
|
2178
|
+
"PlanWithUnresolvedIdentifier",
|
|
2179
|
+
"ExpressionWithUnresolvedIdentifier",
|
|
2180
|
+
):
|
|
2181
|
+
# IDENTIFIER(<table_name>), or IDENTIFIER(<method name>)
|
|
1789
2182
|
expr_proto = map_logical_plan_expression(name_obj.identifierExpr())
|
|
1790
2183
|
session = snowpark.Session.get_active_session()
|
|
1791
2184
|
m = ColumnNameMap([], [], None)
|
|
@@ -1797,7 +2190,12 @@ def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
|
|
|
1797
2190
|
)
|
|
1798
2191
|
else:
|
|
1799
2192
|
if is_multi_part:
|
|
1800
|
-
|
|
2193
|
+
try:
|
|
2194
|
+
# Try multipartIdentifier first for full catalog.database.table
|
|
2195
|
+
name = _spark_to_snowflake(name_obj.multipartIdentifier())
|
|
2196
|
+
except AttributeError:
|
|
2197
|
+
# Fallback to nameParts if multipartIdentifier not available
|
|
2198
|
+
name = _spark_to_snowflake(name_obj.nameParts())
|
|
1801
2199
|
else:
|
|
1802
2200
|
name = _spark_to_snowflake(name_obj.nameParts())
|
|
1803
2201
|
|