snowpark-connect 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +3 -93
- snowflake/snowpark_connect/config.py +99 -1
- snowflake/snowpark_connect/dataframe_container.py +0 -6
- snowflake/snowpark_connect/expression/map_expression.py +22 -7
- snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +4 -26
- snowflake/snowpark_connect/expression/map_unresolved_function.py +12 -3
- snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/relation/map_extension.py +14 -10
- snowflake/snowpark_connect/relation/map_join.py +62 -258
- snowflake/snowpark_connect/relation/map_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_sql.py +353 -16
- snowflake/snowpark_connect/relation/write/map_write.py +171 -110
- snowflake/snowpark_connect/resources_initializer.py +20 -5
- snowflake/snowpark_connect/server.py +16 -17
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
- snowflake/snowpark_connect/utils/identifiers.py +120 -0
- snowflake/snowpark_connect/utils/io_utils.py +21 -1
- snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
- snowflake/snowpark_connect/utils/session.py +16 -26
- snowflake/snowpark_connect/utils/telemetry.py +53 -0
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +34 -35
- snowflake/snowpark_connect/hidden_column.py +0 -39
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
6
|
from collections.abc import MutableMapping, MutableSequence
|
|
7
|
-
from contextlib import contextmanager
|
|
7
|
+
from contextlib import contextmanager, suppress
|
|
8
8
|
from contextvars import ContextVar
|
|
9
9
|
from functools import reduce
|
|
10
10
|
|
|
@@ -30,10 +30,13 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
30
30
|
)
|
|
31
31
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
|
32
32
|
from snowflake.snowpark._internal.utils import is_sql_select_statement, quote_name
|
|
33
|
+
from snowflake.snowpark.functions import when_matched, when_not_matched
|
|
33
34
|
from snowflake.snowpark_connect.config import (
|
|
34
35
|
auto_uppercase_non_column_identifiers,
|
|
36
|
+
check_table_supports_operation,
|
|
35
37
|
get_boolean_session_config_param,
|
|
36
38
|
global_config,
|
|
39
|
+
record_table_metadata,
|
|
37
40
|
set_config_param,
|
|
38
41
|
unset_config_param,
|
|
39
42
|
)
|
|
@@ -59,6 +62,7 @@ from snowflake.snowpark_connect.utils.context import (
|
|
|
59
62
|
get_sql_plan,
|
|
60
63
|
push_evaluating_sql_scope,
|
|
61
64
|
push_sql_scope,
|
|
65
|
+
set_plan_id_map,
|
|
62
66
|
set_sql_args,
|
|
63
67
|
set_sql_plan_name,
|
|
64
68
|
)
|
|
@@ -68,6 +72,7 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
68
72
|
telemetry,
|
|
69
73
|
)
|
|
70
74
|
|
|
75
|
+
from .. import column_name_handler
|
|
71
76
|
from ..expression.map_sql_expression import (
|
|
72
77
|
_window_specs,
|
|
73
78
|
as_java_list,
|
|
@@ -203,6 +208,9 @@ def _rename_columns(
|
|
|
203
208
|
def _create_table_as_select(logical_plan, mode: str) -> None:
|
|
204
209
|
# TODO: for as select create tables we'd map multi layer identifier here
|
|
205
210
|
name = get_relation_identifier_name(logical_plan.name())
|
|
211
|
+
full_table_identifier = get_relation_identifier_name(
|
|
212
|
+
logical_plan.name(), is_multi_part=True
|
|
213
|
+
)
|
|
206
214
|
comment = logical_plan.tableSpec().comment()
|
|
207
215
|
|
|
208
216
|
container = execute_logical_plan(logical_plan.query())
|
|
@@ -223,6 +231,15 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
|
|
|
223
231
|
mode=mode,
|
|
224
232
|
)
|
|
225
233
|
|
|
234
|
+
# Record table metadata for CREATE TABLE AS SELECT
|
|
235
|
+
# These are typically considered v2 tables and support RENAME COLUMN
|
|
236
|
+
record_table_metadata(
|
|
237
|
+
table_identifier=full_table_identifier,
|
|
238
|
+
table_type="v2",
|
|
239
|
+
data_source="default",
|
|
240
|
+
supports_column_rename=True,
|
|
241
|
+
)
|
|
242
|
+
|
|
226
243
|
|
|
227
244
|
def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
|
|
228
245
|
# Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
|
|
@@ -300,6 +317,65 @@ def _remove_column_data_type(node):
|
|
|
300
317
|
return node
|
|
301
318
|
|
|
302
319
|
|
|
320
|
+
def _get_condition_from_action(action, column_mapping, typer):
|
|
321
|
+
condition = None
|
|
322
|
+
if action.condition().isDefined():
|
|
323
|
+
(_, condition_typed_col,) = map_single_column_expression(
|
|
324
|
+
map_logical_plan_expression(action.condition().get()),
|
|
325
|
+
column_mapping,
|
|
326
|
+
typer,
|
|
327
|
+
)
|
|
328
|
+
condition = condition_typed_col.col
|
|
329
|
+
return condition
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _get_assignments_from_action(
|
|
333
|
+
action,
|
|
334
|
+
column_mapping_source,
|
|
335
|
+
column_mapping_target,
|
|
336
|
+
typer_source,
|
|
337
|
+
typer_target,
|
|
338
|
+
):
|
|
339
|
+
assignments = dict()
|
|
340
|
+
if (
|
|
341
|
+
action.getClass().getSimpleName() == "InsertAction"
|
|
342
|
+
or action.getClass().getSimpleName() == "UpdateAction"
|
|
343
|
+
):
|
|
344
|
+
incoming_assignments = as_java_list(action.assignments())
|
|
345
|
+
for assignment in incoming_assignments:
|
|
346
|
+
(_, key_typ_col) = map_single_column_expression(
|
|
347
|
+
map_logical_plan_expression(assignment.key()),
|
|
348
|
+
column_mapping=column_mapping_target,
|
|
349
|
+
typer=typer_target,
|
|
350
|
+
)
|
|
351
|
+
key_name = typer_target.df.select(key_typ_col.col).columns[0]
|
|
352
|
+
|
|
353
|
+
(_, val_typ_col) = map_single_column_expression(
|
|
354
|
+
map_logical_plan_expression(assignment.value()),
|
|
355
|
+
column_mapping=column_mapping_source,
|
|
356
|
+
typer=typer_source,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
assignments[key_name] = val_typ_col.col
|
|
360
|
+
elif (
|
|
361
|
+
action.getClass().getSimpleName() == "InsertStarAction"
|
|
362
|
+
or action.getClass().getSimpleName() == "UpdateStarAction"
|
|
363
|
+
):
|
|
364
|
+
if len(column_mapping_source.columns) != len(column_mapping_target.columns):
|
|
365
|
+
raise ValueError(
|
|
366
|
+
"source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
|
|
367
|
+
)
|
|
368
|
+
for i, col in enumerate(column_mapping_target.columns):
|
|
369
|
+
if assignments.get(col.snowpark_name) is not None:
|
|
370
|
+
raise SnowparkConnectNotImplementedError(
|
|
371
|
+
"UpdateStarAction or InsertStarAction is not supported with duplicate columns."
|
|
372
|
+
)
|
|
373
|
+
assignments[col.snowpark_name] = snowpark_fn.col(
|
|
374
|
+
column_mapping_source.columns[i].snowpark_name
|
|
375
|
+
)
|
|
376
|
+
return assignments
|
|
377
|
+
|
|
378
|
+
|
|
303
379
|
def map_sql_to_pandas_df(
|
|
304
380
|
sql_string: str,
|
|
305
381
|
named_args: MutableMapping[str, expressions_proto.Expression.Literal],
|
|
@@ -421,6 +497,9 @@ def map_sql_to_pandas_df(
|
|
|
421
497
|
)
|
|
422
498
|
|
|
423
499
|
name = get_relation_identifier_name(logical_plan.name())
|
|
500
|
+
full_table_identifier = get_relation_identifier_name(
|
|
501
|
+
logical_plan.name(), is_multi_part=True
|
|
502
|
+
)
|
|
424
503
|
columns = ", ".join(
|
|
425
504
|
_spark_field_to_sql(f, True)
|
|
426
505
|
for f in logical_plan.tableSchema().fields()
|
|
@@ -431,10 +510,48 @@ def map_sql_to_pandas_df(
|
|
|
431
510
|
if comment_opt.isDefined()
|
|
432
511
|
else ""
|
|
433
512
|
)
|
|
513
|
+
|
|
514
|
+
# Extract data source for metadata tracking
|
|
515
|
+
data_source = "default"
|
|
516
|
+
|
|
517
|
+
with suppress(Exception):
|
|
518
|
+
# Get data source from tableSpec.provider() (for USING clause)
|
|
519
|
+
if hasattr(logical_plan, "tableSpec"):
|
|
520
|
+
table_spec = logical_plan.tableSpec()
|
|
521
|
+
if hasattr(table_spec, "provider"):
|
|
522
|
+
provider_opt = table_spec.provider()
|
|
523
|
+
if provider_opt.isDefined():
|
|
524
|
+
data_source = str(provider_opt.get()).lower()
|
|
525
|
+
else:
|
|
526
|
+
# Fall back to checking properties for FORMAT
|
|
527
|
+
table_properties = table_spec.properties()
|
|
528
|
+
if not table_properties.isEmpty():
|
|
529
|
+
for prop in table_properties.get():
|
|
530
|
+
if str(prop.key()) == "FORMAT":
|
|
531
|
+
data_source = str(prop.value()).lower()
|
|
532
|
+
break
|
|
533
|
+
|
|
434
534
|
# NOTE: We are intentionally ignoring any FORMAT=... parameters here.
|
|
435
535
|
session.sql(
|
|
436
536
|
f"CREATE {replace_table} TABLE {if_not_exists}{name} ({columns}) {comment}"
|
|
437
537
|
).collect()
|
|
538
|
+
|
|
539
|
+
# Record table metadata for Spark compatibility
|
|
540
|
+
# Tables created with explicit schema are considered v1 tables
|
|
541
|
+
# v1 tables with certain data sources don't support RENAME COLUMN in OSS Spark
|
|
542
|
+
supports_rename = data_source not in (
|
|
543
|
+
"parquet",
|
|
544
|
+
"csv",
|
|
545
|
+
"json",
|
|
546
|
+
"orc",
|
|
547
|
+
"avro",
|
|
548
|
+
)
|
|
549
|
+
record_table_metadata(
|
|
550
|
+
table_identifier=full_table_identifier,
|
|
551
|
+
table_type="v1",
|
|
552
|
+
data_source=data_source,
|
|
553
|
+
supports_column_rename=supports_rename,
|
|
554
|
+
)
|
|
438
555
|
case "CreateTableAsSelect":
|
|
439
556
|
mode = "ignore" if logical_plan.ignoreIfExists() else "errorifexists"
|
|
440
557
|
_create_table_as_select(logical_plan, mode=mode)
|
|
@@ -543,7 +660,6 @@ def map_sql_to_pandas_df(
|
|
|
543
660
|
rows = session.sql(f"DESCRIBE TABLE {name}").collect()
|
|
544
661
|
case "DescribeNamespace":
|
|
545
662
|
name = get_relation_identifier_name(logical_plan.namespace(), True)
|
|
546
|
-
name = change_default_to_public(name)
|
|
547
663
|
rows = session.sql(f"DESCRIBE SCHEMA {name}").collect()
|
|
548
664
|
if not rows:
|
|
549
665
|
rows = None
|
|
@@ -729,15 +845,147 @@ def map_sql_to_pandas_df(
|
|
|
729
845
|
f"INSERT {overwrite_str} INTO {name} {cols_str} {final_query}",
|
|
730
846
|
).collect()
|
|
731
847
|
case "MergeIntoTable":
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
848
|
+
source_df_container = map_relation(
|
|
849
|
+
map_logical_plan_relation(logical_plan.sourceTable())
|
|
735
850
|
)
|
|
851
|
+
source_df = source_df_container.dataframe
|
|
852
|
+
plan_id = gen_sql_plan_id()
|
|
853
|
+
target_df_container = map_relation(
|
|
854
|
+
map_logical_plan_relation(logical_plan.targetTable(), plan_id)
|
|
855
|
+
)
|
|
856
|
+
target_df = target_df_container.dataframe
|
|
857
|
+
|
|
858
|
+
if (
|
|
859
|
+
logical_plan.targetTable().getClass().getSimpleName()
|
|
860
|
+
== "UnresolvedRelation"
|
|
861
|
+
):
|
|
862
|
+
target_table_name = _spark_to_snowflake(
|
|
863
|
+
logical_plan.targetTable().multipartIdentifier()
|
|
864
|
+
)
|
|
865
|
+
else:
|
|
866
|
+
target_table_name = _spark_to_snowflake(
|
|
867
|
+
logical_plan.targetTable().child().multipartIdentifier()
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
target_table = session.table(target_table_name)
|
|
871
|
+
target_table_columns = target_table.columns
|
|
872
|
+
target_df_spark_names = []
|
|
873
|
+
for target_table_col, target_df_col in zip(
|
|
874
|
+
target_table_columns, target_df_container.column_map.columns
|
|
875
|
+
):
|
|
876
|
+
target_df = target_df.with_column_renamed(
|
|
877
|
+
target_df_col.snowpark_name,
|
|
878
|
+
target_table_col,
|
|
879
|
+
)
|
|
880
|
+
target_df_spark_names.append(target_df_col.spark_name)
|
|
881
|
+
target_df_container = DataFrameContainer.create_with_column_mapping(
|
|
882
|
+
dataframe=target_df,
|
|
883
|
+
spark_column_names=target_df_spark_names,
|
|
884
|
+
snowpark_column_names=target_table_columns,
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
set_plan_id_map(plan_id, target_df_container)
|
|
888
|
+
|
|
889
|
+
joined_df_before_condition: snowpark.DataFrame = source_df.join(
|
|
890
|
+
target_df
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
column_mapping_for_conditions = column_name_handler.JoinColumnNameMap(
|
|
894
|
+
source_df_container.column_map,
|
|
895
|
+
target_df_container.column_map,
|
|
896
|
+
)
|
|
897
|
+
typer_for_expressions = ExpressionTyper(joined_df_before_condition)
|
|
898
|
+
|
|
899
|
+
(_, merge_condition_typed_col,) = map_single_column_expression(
|
|
900
|
+
map_logical_plan_expression(logical_plan.mergeCondition()),
|
|
901
|
+
column_mapping=column_mapping_for_conditions,
|
|
902
|
+
typer=typer_for_expressions,
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
clauses = []
|
|
906
|
+
|
|
907
|
+
for matched_action in as_java_list(logical_plan.matchedActions()):
|
|
908
|
+
condition = _get_condition_from_action(
|
|
909
|
+
matched_action,
|
|
910
|
+
column_mapping_for_conditions,
|
|
911
|
+
typer_for_expressions,
|
|
912
|
+
)
|
|
913
|
+
if matched_action.getClass().getSimpleName() == "DeleteAction":
|
|
914
|
+
clauses.append(when_matched(condition).delete())
|
|
915
|
+
elif (
|
|
916
|
+
matched_action.getClass().getSimpleName() == "UpdateAction"
|
|
917
|
+
or matched_action.getClass().getSimpleName()
|
|
918
|
+
== "UpdateStarAction"
|
|
919
|
+
):
|
|
920
|
+
assignments = _get_assignments_from_action(
|
|
921
|
+
matched_action,
|
|
922
|
+
source_df_container.column_map,
|
|
923
|
+
target_df_container.column_map,
|
|
924
|
+
ExpressionTyper(source_df),
|
|
925
|
+
ExpressionTyper(target_df),
|
|
926
|
+
)
|
|
927
|
+
clauses.append(when_matched(condition).update(assignments))
|
|
928
|
+
|
|
929
|
+
for not_matched_action in as_java_list(
|
|
930
|
+
logical_plan.notMatchedActions()
|
|
931
|
+
):
|
|
932
|
+
condition = _get_condition_from_action(
|
|
933
|
+
not_matched_action,
|
|
934
|
+
column_mapping_for_conditions,
|
|
935
|
+
typer_for_expressions,
|
|
936
|
+
)
|
|
937
|
+
if (
|
|
938
|
+
not_matched_action.getClass().getSimpleName() == "InsertAction"
|
|
939
|
+
or not_matched_action.getClass().getSimpleName()
|
|
940
|
+
== "InsertStarAction"
|
|
941
|
+
):
|
|
942
|
+
assignments = _get_assignments_from_action(
|
|
943
|
+
not_matched_action,
|
|
944
|
+
source_df_container.column_map,
|
|
945
|
+
target_df_container.column_map,
|
|
946
|
+
ExpressionTyper(source_df),
|
|
947
|
+
ExpressionTyper(target_df),
|
|
948
|
+
)
|
|
949
|
+
clauses.append(when_not_matched(condition).insert(assignments))
|
|
950
|
+
|
|
951
|
+
if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
|
|
952
|
+
raise SnowparkConnectNotImplementedError(
|
|
953
|
+
"Snowflake does not support 'not matched by source' actions in MERGE statements."
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
target_table.merge(source_df, merge_condition_typed_col.col, clauses)
|
|
736
957
|
case "DeleteFromTable":
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
958
|
+
df_container = map_relation(
|
|
959
|
+
map_logical_plan_relation(logical_plan.table())
|
|
740
960
|
)
|
|
961
|
+
name = get_relation_identifier_name(logical_plan.table(), True)
|
|
962
|
+
table = session.table(name)
|
|
963
|
+
table_columns = table.columns
|
|
964
|
+
df = df_container.dataframe
|
|
965
|
+
spark_names = []
|
|
966
|
+
for table_col, df_col in zip(
|
|
967
|
+
table_columns, df_container.column_map.columns
|
|
968
|
+
):
|
|
969
|
+
df = df.with_column_renamed(
|
|
970
|
+
df_col.snowpark_name,
|
|
971
|
+
table_col,
|
|
972
|
+
)
|
|
973
|
+
spark_names.append(df_col.spark_name)
|
|
974
|
+
df_container = DataFrameContainer.create_with_column_mapping(
|
|
975
|
+
dataframe=df,
|
|
976
|
+
spark_column_names=spark_names,
|
|
977
|
+
snowpark_column_names=table_columns,
|
|
978
|
+
)
|
|
979
|
+
df = df_container.dataframe
|
|
980
|
+
(
|
|
981
|
+
condition_column_name,
|
|
982
|
+
condition_typed_col,
|
|
983
|
+
) = map_single_column_expression(
|
|
984
|
+
map_logical_plan_expression(logical_plan.condition()),
|
|
985
|
+
df_container.column_map,
|
|
986
|
+
ExpressionTyper(df),
|
|
987
|
+
)
|
|
988
|
+
table.delete(condition_typed_col.col)
|
|
741
989
|
case "UpdateTable":
|
|
742
990
|
# Databricks/Delta-specific extension not supported by SAS.
|
|
743
991
|
# Provide an actionable, clear error.
|
|
@@ -746,7 +994,20 @@ def map_sql_to_pandas_df(
|
|
|
746
994
|
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
747
995
|
)
|
|
748
996
|
case "RenameColumn":
|
|
749
|
-
|
|
997
|
+
full_table_identifier = get_relation_identifier_name(
|
|
998
|
+
logical_plan.table(), True
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
# Check Spark compatibility for RENAME COLUMN operation
|
|
1002
|
+
if not check_table_supports_operation(
|
|
1003
|
+
full_table_identifier, "rename_column"
|
|
1004
|
+
):
|
|
1005
|
+
raise AnalysisException(
|
|
1006
|
+
f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
|
|
1007
|
+
f"This table was created as a v1 table with a data source that doesn't support column renaming. "
|
|
1008
|
+
f"To enable this operation, set 'enable_snowflake_extension_behavior' to 'true'."
|
|
1009
|
+
)
|
|
1010
|
+
|
|
750
1011
|
column_obj = logical_plan.column()
|
|
751
1012
|
old_column_name = ".".join(
|
|
752
1013
|
spark_to_sf_single_id(str(part), is_column=True)
|
|
@@ -756,7 +1017,7 @@ def map_sql_to_pandas_df(
|
|
|
756
1017
|
case_insensitive_name = next(
|
|
757
1018
|
(
|
|
758
1019
|
f.name
|
|
759
|
-
for f in session.table(
|
|
1020
|
+
for f in session.table(full_table_identifier).schema.fields
|
|
760
1021
|
if f.name.lower() == old_column_name.lower()
|
|
761
1022
|
),
|
|
762
1023
|
None,
|
|
@@ -768,7 +1029,7 @@ def map_sql_to_pandas_df(
|
|
|
768
1029
|
)
|
|
769
1030
|
|
|
770
1031
|
# Pass through to Snowflake
|
|
771
|
-
snowflake_sql = f"ALTER TABLE {
|
|
1032
|
+
snowflake_sql = f"ALTER TABLE {full_table_identifier} RENAME COLUMN {old_column_name} TO {new_column_name}"
|
|
772
1033
|
session.sql(snowflake_sql).collect()
|
|
773
1034
|
case "RenameTable":
|
|
774
1035
|
name = get_relation_identifier_name(logical_plan.child(), True)
|
|
@@ -795,7 +1056,6 @@ def map_sql_to_pandas_df(
|
|
|
795
1056
|
case "SetCatalogAndNamespace":
|
|
796
1057
|
# TODO: add catalog setting here
|
|
797
1058
|
name = get_relation_identifier_name(logical_plan.child(), True)
|
|
798
|
-
name = change_default_to_public(name)
|
|
799
1059
|
session.sql(f"USE SCHEMA {name}").collect()
|
|
800
1060
|
case "SetCommand":
|
|
801
1061
|
kv_result_tuple = logical_plan.kv().get()
|
|
@@ -804,7 +1064,6 @@ def map_sql_to_pandas_df(
|
|
|
804
1064
|
set_config_param(get_session_id(), key, val, session)
|
|
805
1065
|
case "SetNamespaceCommand":
|
|
806
1066
|
name = _spark_to_snowflake(logical_plan.namespace())
|
|
807
|
-
name = change_default_to_public(name)
|
|
808
1067
|
session.sql(f"USE SCHEMA {name}").collect()
|
|
809
1068
|
case "SetNamespaceLocation" | "SetNamespaceProperties":
|
|
810
1069
|
raise SnowparkConnectNotImplementedError(
|
|
@@ -1015,6 +1274,76 @@ def change_default_to_public(name: str) -> str:
|
|
|
1015
1274
|
return name
|
|
1016
1275
|
|
|
1017
1276
|
|
|
1277
|
+
def _preprocess_identifier_calls(sql_query: str) -> str:
|
|
1278
|
+
"""
|
|
1279
|
+
Pre-process SQL query to resolve IDENTIFIER() calls before Spark parsing.
|
|
1280
|
+
|
|
1281
|
+
Transforms: IDENTIFIER('abs')(c2) -> abs(c2)
|
|
1282
|
+
Transforms: IDENTIFIER('COAL' || 'ESCE')(NULL, 1) -> COALESCE(NULL, 1)
|
|
1283
|
+
|
|
1284
|
+
This preserves all function arguments in their original positions, eliminating
|
|
1285
|
+
the need to reconstruct them at the expression level.
|
|
1286
|
+
"""
|
|
1287
|
+
import re
|
|
1288
|
+
|
|
1289
|
+
# Pattern to match IDENTIFIER(...) followed by optional function call arguments
|
|
1290
|
+
# This captures both the identifier expression and any trailing arguments
|
|
1291
|
+
# Note: We need to be careful about whitespace preservation
|
|
1292
|
+
identifier_pattern = r"IDENTIFIER\s*\(\s*([^)]+)\s*\)(\s*)(\([^)]*\))?"
|
|
1293
|
+
|
|
1294
|
+
def resolve_identifier_match(match):
|
|
1295
|
+
identifier_expr_str = match.group(1).strip()
|
|
1296
|
+
whitespace = match.group(2) if match.group(2) else ""
|
|
1297
|
+
function_args = match.group(3) if match.group(3) else ""
|
|
1298
|
+
|
|
1299
|
+
try:
|
|
1300
|
+
# Handle string concatenation FIRST: IDENTIFIER('COAL' || 'ESCE')
|
|
1301
|
+
# (Must check this before simple strings since it also starts/ends with quotes)
|
|
1302
|
+
if "||" in identifier_expr_str:
|
|
1303
|
+
# Parse basic string concatenation with proper quote handling
|
|
1304
|
+
parts = []
|
|
1305
|
+
split_parts = identifier_expr_str.split("||")
|
|
1306
|
+
for part in split_parts:
|
|
1307
|
+
part = part.strip()
|
|
1308
|
+
if part.startswith("'") and part.endswith("'"):
|
|
1309
|
+
unquoted = part[1:-1] # Remove quotes from each part
|
|
1310
|
+
parts.append(unquoted)
|
|
1311
|
+
else:
|
|
1312
|
+
# Non-string parts - return original for safety
|
|
1313
|
+
return match.group(0)
|
|
1314
|
+
resolved_name = "".join(parts) # Concatenate the unquoted parts
|
|
1315
|
+
|
|
1316
|
+
# Handle simple string literals: IDENTIFIER('abs')
|
|
1317
|
+
elif identifier_expr_str.startswith("'") and identifier_expr_str.endswith(
|
|
1318
|
+
"'"
|
|
1319
|
+
):
|
|
1320
|
+
resolved_name = identifier_expr_str[1:-1] # Remove quotes
|
|
1321
|
+
|
|
1322
|
+
else:
|
|
1323
|
+
# Complex expressions not supported yet - return original
|
|
1324
|
+
return match.group(0)
|
|
1325
|
+
|
|
1326
|
+
# Return resolved function call with preserved arguments and whitespace
|
|
1327
|
+
if function_args:
|
|
1328
|
+
# Function call case: IDENTIFIER('abs')(c1) -> abs(c1)
|
|
1329
|
+
result = f"{resolved_name}{function_args}"
|
|
1330
|
+
else:
|
|
1331
|
+
# Column reference case: IDENTIFIER('c1') FROM -> c1 FROM (preserve whitespace)
|
|
1332
|
+
result = f"{resolved_name}{whitespace}"
|
|
1333
|
+
return result
|
|
1334
|
+
|
|
1335
|
+
except Exception:
|
|
1336
|
+
# Return original to avoid breaking the query
|
|
1337
|
+
return match.group(0)
|
|
1338
|
+
|
|
1339
|
+
# Apply the transformation
|
|
1340
|
+
processed_query = re.sub(
|
|
1341
|
+
identifier_pattern, resolve_identifier_match, sql_query, flags=re.IGNORECASE
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
return processed_query
|
|
1345
|
+
|
|
1346
|
+
|
|
1018
1347
|
def map_sql(
|
|
1019
1348
|
rel: relation_proto.Relation,
|
|
1020
1349
|
) -> DataFrameContainer:
|
|
@@ -1845,8 +2174,11 @@ def map_logical_plan_relation(
|
|
|
1845
2174
|
|
|
1846
2175
|
|
|
1847
2176
|
def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
|
|
1848
|
-
if name_obj.getClass().getSimpleName()
|
|
1849
|
-
|
|
2177
|
+
if name_obj.getClass().getSimpleName() in (
|
|
2178
|
+
"PlanWithUnresolvedIdentifier",
|
|
2179
|
+
"ExpressionWithUnresolvedIdentifier",
|
|
2180
|
+
):
|
|
2181
|
+
# IDENTIFIER(<table_name>), or IDENTIFIER(<method name>)
|
|
1850
2182
|
expr_proto = map_logical_plan_expression(name_obj.identifierExpr())
|
|
1851
2183
|
session = snowpark.Session.get_active_session()
|
|
1852
2184
|
m = ColumnNameMap([], [], None)
|
|
@@ -1858,7 +2190,12 @@ def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
|
|
|
1858
2190
|
)
|
|
1859
2191
|
else:
|
|
1860
2192
|
if is_multi_part:
|
|
1861
|
-
|
|
2193
|
+
try:
|
|
2194
|
+
# Try multipartIdentifier first for full catalog.database.table
|
|
2195
|
+
name = _spark_to_snowflake(name_obj.multipartIdentifier())
|
|
2196
|
+
except AttributeError:
|
|
2197
|
+
# Fallback to nameParts if multipartIdentifier not available
|
|
2198
|
+
name = _spark_to_snowflake(name_obj.nameParts())
|
|
1862
2199
|
else:
|
|
1863
2200
|
name = _spark_to_snowflake(name_obj.nameParts())
|
|
1864
2201
|
|