snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
- snowflake/snowpark_connect/column_name_handler.py +6 -65
- snowflake/snowpark_connect/config.py +47 -17
- snowflake/snowpark_connect/dataframe_container.py +242 -0
- snowflake/snowpark_connect/error/error_utils.py +25 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
- snowflake/snowpark_connect/expression/map_extension.py +2 -1
- snowflake/snowpark_connect/expression/map_udf.py +4 -4
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
- snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
- snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
- snowflake/snowpark_connect/expression/typer.py +6 -6
- snowflake/snowpark_connect/proto/control_pb2.py +17 -16
- snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
- snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
- snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
- snowflake/snowpark_connect/relation/map_catalog.py +2 -2
- snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
- snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
- snowflake/snowpark_connect/relation/map_extension.py +81 -56
- snowflake/snowpark_connect/relation/map_join.py +72 -63
- snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
- snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
- snowflake/snowpark_connect/relation/map_relation.py +22 -16
- snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
- snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
- snowflake/snowpark_connect/relation/map_show_string.py +42 -5
- snowflake/snowpark_connect/relation/map_sql.py +141 -237
- snowflake/snowpark_connect/relation/map_stats.py +88 -39
- snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
- snowflake/snowpark_connect/relation/map_udtf.py +10 -13
- snowflake/snowpark_connect/relation/read/map_read.py +8 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
- snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
- snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
- snowflake/snowpark_connect/relation/utils.py +11 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
- snowflake/snowpark_connect/relation/write/map_write.py +259 -56
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
- snowflake/snowpark_connect/server.py +43 -4
- snowflake/snowpark_connect/type_mapping.py +6 -23
- snowflake/snowpark_connect/utils/cache.py +27 -22
- snowflake/snowpark_connect/utils/context.py +33 -17
- snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
- snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
- snowflake/snowpark_connect/utils/session.py +41 -38
- snowflake/snowpark_connect/utils/telemetry.py +214 -63
- snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/__init__.py +0 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
- snowflake/snowpark_decoder/dp_session.py +111 -0
- snowflake/snowpark_decoder/spark_decoder.py +76 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
- snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
- snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
- spark/__init__.py +0 -0
- spark/connect/__init__.py +0 -0
- spark/connect/envelope_pb2.py +31 -0
- spark/connect/envelope_pb2.pyi +46 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -14,7 +14,10 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
|
14
14
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
15
15
|
import sqlglot
|
|
16
16
|
from google.protobuf.any_pb2 import Any
|
|
17
|
-
from pyspark.errors.exceptions.base import
|
|
17
|
+
from pyspark.errors.exceptions.base import (
|
|
18
|
+
AnalysisException,
|
|
19
|
+
UnsupportedOperationException,
|
|
20
|
+
)
|
|
18
21
|
from sqlglot.expressions import ColumnDef, DataType, FileFormatProperty, Identifier
|
|
19
22
|
|
|
20
23
|
import snowflake.snowpark.functions as snowpark_fn
|
|
@@ -26,15 +29,15 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
26
29
|
unquote_if_quoted,
|
|
27
30
|
)
|
|
28
31
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
|
29
|
-
from snowflake.snowpark._internal.utils import is_sql_select_statement
|
|
30
|
-
from snowflake.snowpark.functions import when_matched, when_not_matched
|
|
32
|
+
from snowflake.snowpark._internal.utils import is_sql_select_statement, quote_name
|
|
31
33
|
from snowflake.snowpark_connect.config import (
|
|
32
|
-
|
|
34
|
+
auto_uppercase_non_column_identifiers,
|
|
33
35
|
get_boolean_session_config_param,
|
|
34
36
|
global_config,
|
|
35
37
|
set_config_param,
|
|
36
38
|
unset_config_param,
|
|
37
39
|
)
|
|
40
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
38
41
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
39
42
|
ColumnNameMap,
|
|
40
43
|
map_single_column_expression,
|
|
@@ -55,16 +58,15 @@ from snowflake.snowpark_connect.utils.context import (
|
|
|
55
58
|
get_session_id,
|
|
56
59
|
push_evaluating_sql_scope,
|
|
57
60
|
push_sql_scope,
|
|
58
|
-
set_plan_id_map,
|
|
59
61
|
set_sql_args,
|
|
60
62
|
set_sql_plan_name,
|
|
61
63
|
)
|
|
62
64
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
63
65
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
64
66
|
SnowparkConnectNotImplementedError,
|
|
67
|
+
telemetry,
|
|
65
68
|
)
|
|
66
69
|
|
|
67
|
-
from .. import column_name_handler
|
|
68
70
|
from ..expression.map_sql_expression import (
|
|
69
71
|
_window_specs,
|
|
70
72
|
as_java_list,
|
|
@@ -72,6 +74,7 @@ from ..expression.map_sql_expression import (
|
|
|
72
74
|
map_logical_plan_expression,
|
|
73
75
|
sql_parser,
|
|
74
76
|
)
|
|
77
|
+
from ..utils.identifiers import spark_to_sf_single_id
|
|
75
78
|
|
|
76
79
|
_ctes = ContextVar[dict[str, relation_proto.Relation]]("_ctes", default={})
|
|
77
80
|
|
|
@@ -159,38 +162,33 @@ def parse_pos_args(
|
|
|
159
162
|
return dict(zip(sorted(positions), pos_args))
|
|
160
163
|
|
|
161
164
|
|
|
162
|
-
def execute_logical_plan(logical_plan) ->
|
|
165
|
+
def execute_logical_plan(logical_plan) -> DataFrameContainer:
|
|
163
166
|
proto = map_logical_plan_relation(logical_plan)
|
|
167
|
+
telemetry.report_parsed_sql_plan(proto)
|
|
164
168
|
with push_evaluating_sql_scope():
|
|
165
169
|
return map_relation(proto)
|
|
166
170
|
|
|
167
171
|
|
|
168
|
-
def _spark_to_snowflake_single_id(name: str) -> str:
|
|
169
|
-
name = quote_name_without_upper_casing(name)
|
|
170
|
-
return name.upper() if auto_uppercase_ddl() else name
|
|
171
|
-
|
|
172
|
-
|
|
173
172
|
def _spark_to_snowflake(multipart_id: jpype.JObject) -> str:
|
|
174
173
|
return ".".join(
|
|
175
|
-
|
|
174
|
+
spark_to_sf_single_id(str(part)) for part in as_java_list(multipart_id)
|
|
176
175
|
)
|
|
177
176
|
|
|
178
177
|
|
|
179
178
|
def _rename_columns(
|
|
180
|
-
df: snowpark.DataFrame, user_specified_columns
|
|
179
|
+
df: snowpark.DataFrame, user_specified_columns, column_map: ColumnNameMap
|
|
181
180
|
) -> snowpark.DataFrame:
|
|
182
181
|
user_columns = [str(col._1()) for col in as_java_list(user_specified_columns)]
|
|
183
182
|
|
|
184
183
|
if user_columns:
|
|
185
184
|
columns = zip(df.columns, user_columns)
|
|
186
185
|
else:
|
|
187
|
-
columns =
|
|
186
|
+
columns = column_map.snowpark_to_spark_map().items()
|
|
188
187
|
|
|
189
188
|
for orig_column, user_column in columns:
|
|
190
189
|
df = df.with_column_renamed(
|
|
191
|
-
orig_column,
|
|
190
|
+
orig_column, spark_to_sf_single_id(user_column, is_column=True)
|
|
192
191
|
)
|
|
193
|
-
|
|
194
192
|
return df
|
|
195
193
|
|
|
196
194
|
|
|
@@ -199,11 +197,12 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
|
|
|
199
197
|
name = get_relation_identifier_name(logical_plan.name())
|
|
200
198
|
comment = logical_plan.tableSpec().comment()
|
|
201
199
|
|
|
202
|
-
|
|
203
|
-
|
|
200
|
+
container = execute_logical_plan(logical_plan.query())
|
|
201
|
+
df = container.dataframe
|
|
202
|
+
columns = container.column_map.snowpark_to_spark_map().items()
|
|
204
203
|
for orig_column, user_column in columns:
|
|
205
204
|
df = df.with_column_renamed(
|
|
206
|
-
orig_column,
|
|
205
|
+
orig_column, spark_to_sf_single_id(user_column, is_column=True)
|
|
207
206
|
)
|
|
208
207
|
|
|
209
208
|
# TODO escaping should be handled by snowpark. remove when SNOW-2210271 is done
|
|
@@ -218,11 +217,11 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
|
|
|
218
217
|
|
|
219
218
|
|
|
220
219
|
def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
|
|
221
|
-
# Column names will be uppercased according to "snowpark.connect.auto-uppercase
|
|
220
|
+
# Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
|
|
222
221
|
# and struct fields will be left as is. This should allow users to use the same names
|
|
223
222
|
# in spark and Snowflake in most cases.
|
|
224
223
|
if is_column:
|
|
225
|
-
name =
|
|
224
|
+
name = spark_to_sf_single_id(str(field.name()), is_column=True)
|
|
226
225
|
else:
|
|
227
226
|
name = quote_name_without_upper_casing(str(field.name()))
|
|
228
227
|
data_type_str = _spark_datatype_to_sql(field.dataType())
|
|
@@ -257,10 +256,12 @@ def _normalize_identifiers(node):
|
|
|
257
256
|
The identifiers need to be uppercased to match Snowflake's behaviour. Users can disable this by setting
|
|
258
257
|
the `snowpark.connect.auto_uppercase_ddl` config to False.
|
|
259
258
|
"""
|
|
260
|
-
if isinstance(node, Identifier):
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
259
|
+
if not isinstance(node, Identifier):
|
|
260
|
+
return node
|
|
261
|
+
elif auto_uppercase_non_column_identifiers():
|
|
262
|
+
return Identifier(this=node.this.upper(), quoted=True)
|
|
263
|
+
else:
|
|
264
|
+
return Identifier(this=node.this, quoted=True)
|
|
264
265
|
|
|
265
266
|
|
|
266
267
|
def _remove_file_format_property(node):
|
|
@@ -328,7 +329,10 @@ def map_sql_to_pandas_df(
|
|
|
328
329
|
# Build Snowflake SQL from logical plan attributes
|
|
329
330
|
for col in as_java_list(columns_to_add):
|
|
330
331
|
# Follow the same pattern as AlterColumn for column name extraction
|
|
331
|
-
col_name = ".".join(
|
|
332
|
+
col_name = ".".join(
|
|
333
|
+
spark_to_sf_single_id(part, is_column=True)
|
|
334
|
+
for part in as_java_list(col.name())
|
|
335
|
+
)
|
|
332
336
|
col_type = _spark_datatype_to_sql(col.dataType())
|
|
333
337
|
snowflake_sql = (
|
|
334
338
|
f"ALTER TABLE {table_name} ADD COLUMN {col_name} {col_type}"
|
|
@@ -341,9 +345,22 @@ def map_sql_to_pandas_df(
|
|
|
341
345
|
|
|
342
346
|
# Extract actual column name
|
|
343
347
|
column_name = ".".join(
|
|
344
|
-
|
|
348
|
+
spark_to_sf_single_id(part, is_column=True)
|
|
349
|
+
for part in as_java_list(column_obj.name())
|
|
345
350
|
)
|
|
346
351
|
|
|
352
|
+
if not global_config.spark_sql_caseSensitive:
|
|
353
|
+
case_insensitive_name = next(
|
|
354
|
+
(
|
|
355
|
+
f.name
|
|
356
|
+
for f in session.table(table_name).schema.fields
|
|
357
|
+
if f.name.lower() == column_name.lower()
|
|
358
|
+
),
|
|
359
|
+
None,
|
|
360
|
+
)
|
|
361
|
+
if case_insensitive_name:
|
|
362
|
+
column_name = case_insensitive_name
|
|
363
|
+
|
|
347
364
|
# Build ALTER COLUMN command from logical plan attributes
|
|
348
365
|
alter_parts = []
|
|
349
366
|
|
|
@@ -381,9 +398,7 @@ def map_sql_to_pandas_df(
|
|
|
381
398
|
if_not_exists = "IF NOT EXISTS " if logical_plan.ifNotExists() else ""
|
|
382
399
|
session.sql(f"CREATE SCHEMA {if_not_exists}{name}").collect()
|
|
383
400
|
if previous_name is not None:
|
|
384
|
-
session.sql(
|
|
385
|
-
f"USE SCHEMA {_spark_to_snowflake_single_id(previous_name)}"
|
|
386
|
-
).collect()
|
|
401
|
+
session.sql(f"USE SCHEMA {quote_name(previous_name)}").collect()
|
|
387
402
|
else:
|
|
388
403
|
# TODO: Unset the schema
|
|
389
404
|
pass
|
|
@@ -443,7 +458,8 @@ def map_sql_to_pandas_df(
|
|
|
443
458
|
else:
|
|
444
459
|
object_name: str = as_java_list(logical_plan.child().nameParts())[0]
|
|
445
460
|
_accessing_temp_object.set(False)
|
|
446
|
-
|
|
461
|
+
df_container = execute_logical_plan(logical_plan.query())
|
|
462
|
+
df = df_container.dataframe
|
|
447
463
|
if _accessing_temp_object.get():
|
|
448
464
|
raise AnalysisException(
|
|
449
465
|
f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
|
|
@@ -454,7 +470,9 @@ def map_sql_to_pandas_df(
|
|
|
454
470
|
name = get_relation_identifier_name(logical_plan.child())
|
|
455
471
|
comment = logical_plan.comment()
|
|
456
472
|
|
|
457
|
-
df = _rename_columns(
|
|
473
|
+
df = _rename_columns(
|
|
474
|
+
df, logical_plan.userSpecifiedColumns(), df_container.column_map
|
|
475
|
+
)
|
|
458
476
|
|
|
459
477
|
# TODO: Support logical_plan.replace() == False
|
|
460
478
|
df.create_or_replace_view(
|
|
@@ -464,7 +482,8 @@ def map_sql_to_pandas_df(
|
|
|
464
482
|
else None,
|
|
465
483
|
)
|
|
466
484
|
case "CreateViewCommand":
|
|
467
|
-
|
|
485
|
+
df_container = execute_logical_plan(logical_plan.plan())
|
|
486
|
+
df = df_container.dataframe
|
|
468
487
|
tmp_views = _get_current_temp_objects()
|
|
469
488
|
tmp_views.add(
|
|
470
489
|
(
|
|
@@ -475,7 +494,7 @@ def map_sql_to_pandas_df(
|
|
|
475
494
|
)
|
|
476
495
|
|
|
477
496
|
name = str(logical_plan.name().identifier())
|
|
478
|
-
name =
|
|
497
|
+
name = spark_to_sf_single_id(name)
|
|
479
498
|
if isinstance(
|
|
480
499
|
logical_plan.viewType(),
|
|
481
500
|
jpype.JClass(
|
|
@@ -490,7 +509,9 @@ def map_sql_to_pandas_df(
|
|
|
490
509
|
else None
|
|
491
510
|
)
|
|
492
511
|
|
|
493
|
-
df = _rename_columns(
|
|
512
|
+
df = _rename_columns(
|
|
513
|
+
df, logical_plan.userSpecifiedColumns(), df_container.column_map
|
|
514
|
+
)
|
|
494
515
|
|
|
495
516
|
if logical_plan.replace():
|
|
496
517
|
df.create_or_replace_temp_view(
|
|
@@ -504,6 +525,7 @@ def map_sql_to_pandas_df(
|
|
|
504
525
|
)
|
|
505
526
|
case "DescribeColumn":
|
|
506
527
|
name = get_relation_identifier_name(logical_plan.column())
|
|
528
|
+
# todo double check if this is correct
|
|
507
529
|
rows = session.sql(f"DESCRIBE TABLE {name}").collect()
|
|
508
530
|
case "DescribeNamespace":
|
|
509
531
|
name = get_relation_identifier_name(logical_plan.namespace(), True)
|
|
@@ -521,11 +543,12 @@ def map_sql_to_pandas_df(
|
|
|
521
543
|
# This gets the schema without executing the query (similar to Spark's DESCRIBE QUERY)
|
|
522
544
|
# Get the inner query plan and convert it to SQL
|
|
523
545
|
inner_query_plan = logical_plan.plan()
|
|
524
|
-
|
|
546
|
+
df_container = execute_logical_plan(inner_query_plan)
|
|
547
|
+
df = df_container.dataframe
|
|
525
548
|
schema = df.schema
|
|
526
549
|
|
|
527
550
|
# Get original Spark column names using the column map from the original DataFrame
|
|
528
|
-
spark_columns =
|
|
551
|
+
spark_columns = df_container.column_map.get_spark_columns()
|
|
529
552
|
data = []
|
|
530
553
|
for i, field in enumerate(schema.fields):
|
|
531
554
|
# Use original Spark column name from column map
|
|
@@ -615,9 +638,9 @@ def map_sql_to_pandas_df(
|
|
|
615
638
|
"UnresolvedHaving",
|
|
616
639
|
"Distinct",
|
|
617
640
|
):
|
|
618
|
-
expr = execute_logical_plan(
|
|
619
|
-
|
|
620
|
-
][0]
|
|
641
|
+
expr = execute_logical_plan(
|
|
642
|
+
logical_plan.logicalPlan()
|
|
643
|
+
).dataframe.queries["queries"][0]
|
|
621
644
|
final_sql = f"EXPLAIN USING TEXT {expr}"
|
|
622
645
|
rows = session.sql(final_sql).collect()
|
|
623
646
|
elif (
|
|
@@ -626,7 +649,7 @@ def map_sql_to_pandas_df(
|
|
|
626
649
|
):
|
|
627
650
|
expr = execute_logical_plan(
|
|
628
651
|
logical_plan.logicalPlan().query()
|
|
629
|
-
).queries["queries"][0]
|
|
652
|
+
).dataframe.queries["queries"][0]
|
|
630
653
|
final_sql = f"EXPLAIN USING TEXT {expr}"
|
|
631
654
|
rows = session.sql(final_sql).collect()
|
|
632
655
|
else:
|
|
@@ -635,7 +658,8 @@ def map_sql_to_pandas_df(
|
|
|
635
658
|
f"{logical_plan_name} is not supported yet with EXPLAIN."
|
|
636
659
|
)
|
|
637
660
|
case "InsertIntoStatement":
|
|
638
|
-
|
|
661
|
+
df_container = execute_logical_plan(logical_plan.query())
|
|
662
|
+
df = df_container.dataframe
|
|
639
663
|
queries = df.queries["queries"]
|
|
640
664
|
if len(queries) != 1:
|
|
641
665
|
raise SnowparkConnectNotImplementedError(
|
|
@@ -645,7 +669,7 @@ def map_sql_to_pandas_df(
|
|
|
645
669
|
name = get_relation_identifier_name(logical_plan.table(), True)
|
|
646
670
|
|
|
647
671
|
user_columns = [
|
|
648
|
-
|
|
672
|
+
spark_to_sf_single_id(str(col), is_column=True)
|
|
649
673
|
for col in as_java_list(logical_plan.userSpecifiedCols())
|
|
650
674
|
]
|
|
651
675
|
overwrite_str = "OVERWRITE" if logical_plan.overwrite() else ""
|
|
@@ -690,192 +714,43 @@ def map_sql_to_pandas_df(
|
|
|
690
714
|
f"INSERT {overwrite_str} INTO {name} {cols_str} {final_query}",
|
|
691
715
|
).collect()
|
|
692
716
|
case "MergeIntoTable":
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
if action.condition().isDefined():
|
|
697
|
-
(_, condition_typed_col,) = map_single_column_expression(
|
|
698
|
-
map_logical_plan_expression(
|
|
699
|
-
matched_action.condition().get()
|
|
700
|
-
),
|
|
701
|
-
column_mapping,
|
|
702
|
-
typer,
|
|
703
|
-
)
|
|
704
|
-
condition = condition_typed_col.col
|
|
705
|
-
return condition
|
|
706
|
-
|
|
707
|
-
def _get_assignments_from_action(
|
|
708
|
-
action,
|
|
709
|
-
column_mapping_source,
|
|
710
|
-
column_mapping_target,
|
|
711
|
-
typer_source,
|
|
712
|
-
typer_target,
|
|
713
|
-
):
|
|
714
|
-
assignments = dict()
|
|
715
|
-
if (
|
|
716
|
-
action.getClass().getSimpleName() == "InsertAction"
|
|
717
|
-
or action.getClass().getSimpleName() == "UpdateAction"
|
|
718
|
-
):
|
|
719
|
-
incoming_assignments = as_java_list(action.assignments())
|
|
720
|
-
for assignment in incoming_assignments:
|
|
721
|
-
(key_name, _) = map_single_column_expression(
|
|
722
|
-
map_logical_plan_expression(assignment.key()),
|
|
723
|
-
column_mapping=column_mapping_target,
|
|
724
|
-
typer=typer_source,
|
|
725
|
-
)
|
|
726
|
-
|
|
727
|
-
(_, val_typ_col) = map_single_column_expression(
|
|
728
|
-
map_logical_plan_expression(assignment.value()),
|
|
729
|
-
column_mapping=column_mapping_source,
|
|
730
|
-
typer=typer_target,
|
|
731
|
-
)
|
|
732
|
-
|
|
733
|
-
assignments[key_name] = val_typ_col.col
|
|
734
|
-
elif (
|
|
735
|
-
action.getClass().getSimpleName() == "InsertStarAction"
|
|
736
|
-
or action.getClass().getSimpleName() == "UpdateStarAction"
|
|
737
|
-
):
|
|
738
|
-
if len(column_mapping_source.columns) != len(
|
|
739
|
-
column_mapping_target.columns
|
|
740
|
-
):
|
|
741
|
-
raise ValueError(
|
|
742
|
-
"source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
|
|
743
|
-
)
|
|
744
|
-
for i, col in enumerate(column_mapping_target.columns):
|
|
745
|
-
if assignments.get(col.snowpark_name) is not None:
|
|
746
|
-
raise SnowparkConnectNotImplementedError(
|
|
747
|
-
"UpdateStarAction or InsertStarAction is not supported with duplicate columns."
|
|
748
|
-
)
|
|
749
|
-
assignments[col.snowpark_name] = snowpark_fn.col(
|
|
750
|
-
column_mapping_source.columns[i].snowpark_name
|
|
751
|
-
)
|
|
752
|
-
return assignments
|
|
753
|
-
|
|
754
|
-
source_df = map_relation(
|
|
755
|
-
map_logical_plan_relation(logical_plan.sourceTable())
|
|
756
|
-
)
|
|
757
|
-
|
|
758
|
-
plan_id = gen_sql_plan_id()
|
|
759
|
-
|
|
760
|
-
target_df = map_relation(
|
|
761
|
-
map_logical_plan_relation(logical_plan.targetTable(), plan_id)
|
|
762
|
-
)
|
|
763
|
-
for col in target_df._column_map.columns:
|
|
764
|
-
target_df = target_df.with_column_renamed(
|
|
765
|
-
col.snowpark_name, _spark_to_snowflake_single_id(col.spark_name)
|
|
766
|
-
)
|
|
767
|
-
target_df = column_name_handler.with_column_map(
|
|
768
|
-
target_df, target_df.columns, target_df.columns
|
|
769
|
-
)
|
|
770
|
-
|
|
771
|
-
set_plan_id_map(plan_id, target_df)
|
|
772
|
-
|
|
773
|
-
joined_df_before_condition: snowpark.DataFrame = source_df.join(
|
|
774
|
-
target_df
|
|
775
|
-
)
|
|
776
|
-
|
|
777
|
-
column_mapping_for_conditions = column_name_handler.JoinColumnNameMap(
|
|
778
|
-
source_df,
|
|
779
|
-
target_df,
|
|
780
|
-
)
|
|
781
|
-
typer_for_expressions = ExpressionTyper(joined_df_before_condition)
|
|
782
|
-
|
|
783
|
-
(_, merge_condition_typed_col,) = map_single_column_expression(
|
|
784
|
-
map_logical_plan_expression(logical_plan.mergeCondition()),
|
|
785
|
-
column_mapping=column_mapping_for_conditions,
|
|
786
|
-
typer=typer_for_expressions,
|
|
787
|
-
)
|
|
788
|
-
|
|
789
|
-
clauses = []
|
|
790
|
-
|
|
791
|
-
for matched_action in as_java_list(logical_plan.matchedActions()):
|
|
792
|
-
condition = _get_condition_from_action(
|
|
793
|
-
matched_action,
|
|
794
|
-
column_mapping_for_conditions,
|
|
795
|
-
typer_for_expressions,
|
|
796
|
-
)
|
|
797
|
-
if matched_action.getClass().getSimpleName() == "DeleteAction":
|
|
798
|
-
clauses.append(when_matched(condition).delete())
|
|
799
|
-
elif (
|
|
800
|
-
matched_action.getClass().getSimpleName() == "UpdateAction"
|
|
801
|
-
or matched_action.getClass().getSimpleName()
|
|
802
|
-
== "UpdateStarAction"
|
|
803
|
-
):
|
|
804
|
-
assignments = _get_assignments_from_action(
|
|
805
|
-
matched_action,
|
|
806
|
-
source_df._column_map,
|
|
807
|
-
target_df._column_map,
|
|
808
|
-
ExpressionTyper(source_df),
|
|
809
|
-
ExpressionTyper(target_df),
|
|
810
|
-
)
|
|
811
|
-
clauses.append(when_matched(condition).update(assignments))
|
|
812
|
-
|
|
813
|
-
for not_matched_action in as_java_list(
|
|
814
|
-
logical_plan.notMatchedActions()
|
|
815
|
-
):
|
|
816
|
-
condition = _get_condition_from_action(
|
|
817
|
-
not_matched_action,
|
|
818
|
-
column_mapping_for_conditions,
|
|
819
|
-
typer_for_expressions,
|
|
820
|
-
)
|
|
821
|
-
if (
|
|
822
|
-
not_matched_action.getClass().getSimpleName() == "InsertAction"
|
|
823
|
-
or not_matched_action.getClass().getSimpleName()
|
|
824
|
-
== "InsertStarAction"
|
|
825
|
-
):
|
|
826
|
-
assignments = _get_assignments_from_action(
|
|
827
|
-
not_matched_action,
|
|
828
|
-
source_df._column_map,
|
|
829
|
-
target_df._column_map,
|
|
830
|
-
ExpressionTyper(source_df),
|
|
831
|
-
ExpressionTyper(target_df),
|
|
832
|
-
)
|
|
833
|
-
clauses.append(when_not_matched(condition).insert(assignments))
|
|
834
|
-
|
|
835
|
-
if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
|
|
836
|
-
raise SnowparkConnectNotImplementedError(
|
|
837
|
-
"Snowflake does not support 'not matched by source' actions in MERGE statements."
|
|
838
|
-
)
|
|
839
|
-
|
|
840
|
-
if (
|
|
841
|
-
logical_plan.targetTable().getClass().getSimpleName()
|
|
842
|
-
== "UnresolvedRelation"
|
|
843
|
-
):
|
|
844
|
-
target_table_name = _spark_to_snowflake(
|
|
845
|
-
logical_plan.targetTable().multipartIdentifier()
|
|
846
|
-
)
|
|
847
|
-
else:
|
|
848
|
-
target_table_name = _spark_to_snowflake(
|
|
849
|
-
logical_plan.targetTable().child().multipartIdentifier()
|
|
850
|
-
)
|
|
851
|
-
session.table(target_table_name).merge(
|
|
852
|
-
source_df, merge_condition_typed_col.col, clauses
|
|
717
|
+
raise UnsupportedOperationException(
|
|
718
|
+
"[UNSUPPORTED_SQL_EXTENSION] The MERGE INTO command failed.\n"
|
|
719
|
+
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
853
720
|
)
|
|
854
721
|
case "DeleteFromTable":
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
condition_typed_col,
|
|
866
|
-
) = map_single_column_expression(
|
|
867
|
-
map_logical_plan_expression(logical_plan.condition()),
|
|
868
|
-
df._column_map,
|
|
869
|
-
ExpressionTyper(df),
|
|
722
|
+
raise UnsupportedOperationException(
|
|
723
|
+
"[UNSUPPORTED_SQL_EXTENSION] The DELETE FROM command failed.\n"
|
|
724
|
+
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
725
|
+
)
|
|
726
|
+
case "UpdateTable":
|
|
727
|
+
# Databricks/Delta-specific extension not supported by SAS.
|
|
728
|
+
# Provide an actionable, clear error.
|
|
729
|
+
raise UnsupportedOperationException(
|
|
730
|
+
"[UNSUPPORTED_SQL_EXTENSION] The UPDATE TABLE command failed.\n"
|
|
731
|
+
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
870
732
|
)
|
|
871
|
-
session.table(name).delete(condition_typed_col.col)
|
|
872
733
|
case "RenameColumn":
|
|
873
734
|
table_name = get_relation_identifier_name(logical_plan.table(), True)
|
|
874
735
|
column_obj = logical_plan.column()
|
|
875
736
|
old_column_name = ".".join(
|
|
876
|
-
str(part)
|
|
737
|
+
spark_to_sf_single_id(str(part), is_column=True)
|
|
738
|
+
for part in as_java_list(column_obj.name())
|
|
739
|
+
)
|
|
740
|
+
if not global_config.spark_sql_caseSensitive:
|
|
741
|
+
case_insensitive_name = next(
|
|
742
|
+
(
|
|
743
|
+
f.name
|
|
744
|
+
for f in session.table(table_name).schema.fields
|
|
745
|
+
if f.name.lower() == old_column_name.lower()
|
|
746
|
+
),
|
|
747
|
+
None,
|
|
748
|
+
)
|
|
749
|
+
if case_insensitive_name:
|
|
750
|
+
old_column_name = case_insensitive_name
|
|
751
|
+
new_column_name = spark_to_sf_single_id(
|
|
752
|
+
str(logical_plan.newName()), is_column=True
|
|
877
753
|
)
|
|
878
|
-
new_column_name = str(logical_plan.newName())
|
|
879
754
|
|
|
880
755
|
# Pass through to Snowflake
|
|
881
756
|
snowflake_sql = f"ALTER TABLE {table_name} RENAME COLUMN {old_column_name} TO {new_column_name}"
|
|
@@ -971,6 +846,32 @@ def map_sql_to_pandas_df(
|
|
|
971
846
|
else:
|
|
972
847
|
rows = session.sql("SHOW TABLES").collect()
|
|
973
848
|
|
|
849
|
+
# Return empty DataFrame with proper schema if no results
|
|
850
|
+
if not rows:
|
|
851
|
+
if class_name == "ShowTableExtended":
|
|
852
|
+
return (
|
|
853
|
+
pandas.DataFrame(
|
|
854
|
+
{
|
|
855
|
+
"namespace": [""],
|
|
856
|
+
"tableName": [""],
|
|
857
|
+
"isTemporary": [""],
|
|
858
|
+
"information": [""],
|
|
859
|
+
}
|
|
860
|
+
),
|
|
861
|
+
"",
|
|
862
|
+
)
|
|
863
|
+
else:
|
|
864
|
+
return (
|
|
865
|
+
pandas.DataFrame(
|
|
866
|
+
{
|
|
867
|
+
"namespace": [""],
|
|
868
|
+
"tableName": [""],
|
|
869
|
+
"isTemporary": [""],
|
|
870
|
+
}
|
|
871
|
+
),
|
|
872
|
+
"",
|
|
873
|
+
)
|
|
874
|
+
|
|
974
875
|
# Apply pattern filtering if pattern is provided
|
|
975
876
|
# This is workaround to filter using Python regex.
|
|
976
877
|
if pattern and rows:
|
|
@@ -1020,9 +921,7 @@ def map_sql_to_pandas_df(
|
|
|
1020
921
|
if db_name and multi_part_len == 2:
|
|
1021
922
|
# Check db_name is same as in the full table name
|
|
1022
923
|
if (
|
|
1023
|
-
|
|
1024
|
-
str(db_and_table_name[0])
|
|
1025
|
-
).casefold()
|
|
924
|
+
spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
|
|
1026
925
|
!= db_name.casefold()
|
|
1027
926
|
):
|
|
1028
927
|
raise AnalysisException(
|
|
@@ -1075,11 +974,7 @@ def map_sql_to_pandas_df(
|
|
|
1075
974
|
if _is_sql_select_statement_helper(sql_string):
|
|
1076
975
|
return None, None
|
|
1077
976
|
session = snowpark.Session.get_active_session()
|
|
1078
|
-
|
|
1079
|
-
columns = sql_df.columns
|
|
1080
|
-
column_name_handler.with_column_map(sql_df, columns, columns)
|
|
1081
|
-
rows = sql_df.collect()
|
|
1082
|
-
|
|
977
|
+
rows = session.sql(sql_string).collect()
|
|
1083
978
|
if rows:
|
|
1084
979
|
return pandas.DataFrame(rows), ""
|
|
1085
980
|
return pandas.DataFrame({"": [""]}), ""
|
|
@@ -1089,7 +984,9 @@ def get_sql_passthrough() -> bool:
|
|
|
1089
984
|
return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
|
|
1090
985
|
|
|
1091
986
|
|
|
1092
|
-
def map_sql(
|
|
987
|
+
def map_sql(
|
|
988
|
+
rel: relation_proto.Relation,
|
|
989
|
+
) -> DataFrameContainer:
|
|
1093
990
|
"""
|
|
1094
991
|
Map a SQL string to a DataFrame.
|
|
1095
992
|
|
|
@@ -1112,7 +1009,11 @@ def map_sql(rel: relation_proto.Relation) -> snowpark.DataFrame:
|
|
|
1112
1009
|
session = snowpark.Session.get_active_session()
|
|
1113
1010
|
sql_df = session.sql(rel.sql.query)
|
|
1114
1011
|
columns = sql_df.columns
|
|
1115
|
-
return
|
|
1012
|
+
return DataFrameContainer.create_with_column_mapping(
|
|
1013
|
+
dataframe=sql_df,
|
|
1014
|
+
spark_column_names=columns,
|
|
1015
|
+
snowpark_column_names=columns,
|
|
1016
|
+
)
|
|
1116
1017
|
|
|
1117
1018
|
|
|
1118
1019
|
def map_logical_plan_relation(
|
|
@@ -1453,8 +1354,9 @@ def map_logical_plan_relation(
|
|
|
1453
1354
|
|
|
1454
1355
|
# Need to find ids which are not part of values and remaining cols of df
|
|
1455
1356
|
input_rel = map_logical_plan_relation(rel.child())
|
|
1456
|
-
|
|
1457
|
-
|
|
1357
|
+
result = map_relation(input_rel)
|
|
1358
|
+
input_df: snowpark.DataFrame = result.dataframe
|
|
1359
|
+
column_map = result.column_map
|
|
1458
1360
|
typer = ExpressionTyper(input_df)
|
|
1459
1361
|
unpivot_spark_names = []
|
|
1460
1362
|
for v in values:
|
|
@@ -1744,8 +1646,8 @@ def map_logical_plan_relation(
|
|
|
1744
1646
|
# )
|
|
1745
1647
|
|
|
1746
1648
|
# This is a workaround to fix the bug in snowpark where if we select posexplode with *, it would return wrong columns
|
|
1747
|
-
|
|
1748
|
-
spark_columns =
|
|
1649
|
+
input_container = map_relation(input_relation)
|
|
1650
|
+
spark_columns = input_container.column_map.get_spark_columns()
|
|
1749
1651
|
column_expressions = [
|
|
1750
1652
|
expressions_proto.Expression(
|
|
1751
1653
|
unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
|
|
@@ -1796,7 +1698,9 @@ def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
|
|
|
1796
1698
|
expr = map_single_column_expression(
|
|
1797
1699
|
expr_proto, m, ExpressionTyper.dummy_typer(session)
|
|
1798
1700
|
)
|
|
1799
|
-
name =
|
|
1701
|
+
name = spark_to_sf_single_id(
|
|
1702
|
+
session.range(1).select(expr[1].col).collect()[0][0]
|
|
1703
|
+
)
|
|
1800
1704
|
else:
|
|
1801
1705
|
if is_multi_part:
|
|
1802
1706
|
name = _spark_to_snowflake(name_obj.multipartIdentifier())
|