snowpark-connect 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +73 -100
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +4 -2
- snowflake/snowpark_connect/expression/map_expression.py +5 -4
- snowflake/snowpark_connect/expression/map_extension.py +12 -6
- snowflake/snowpark_connect/expression/map_sql_expression.py +38 -3
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +5 -5
- snowflake/snowpark_connect/expression/map_unresolved_function.py +869 -107
- snowflake/snowpark_connect/expression/map_unresolved_star.py +9 -7
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +4 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_column_ops.py +4 -3
- snowflake/snowpark_connect/relation/map_extension.py +10 -9
- snowflake/snowpark_connect/relation/map_join.py +5 -2
- snowflake/snowpark_connect/relation/map_sql.py +33 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +6 -3
- snowflake/snowpark_connect/relation/write/map_write.py +29 -14
- snowflake/snowpark_connect/server.py +1 -2
- snowflake/snowpark_connect/type_mapping.py +36 -3
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/session.py +19 -3
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +5 -2
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +36 -37
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.31.0.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.31.0.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.31.0.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -11,6 +11,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
11
11
|
)
|
|
12
12
|
from snowflake.snowpark.types import StructType
|
|
13
13
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
14
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
14
15
|
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
16
|
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
16
17
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
@@ -28,7 +29,7 @@ def check_struct_and_get_field_datatype(field_name, schema):
|
|
|
28
29
|
else:
|
|
29
30
|
return None
|
|
30
31
|
else:
|
|
31
|
-
None
|
|
32
|
+
return None
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
def map_unresolved_star(
|
|
@@ -55,16 +56,17 @@ def map_unresolved_star(
|
|
|
55
56
|
return spark_names, typed_column
|
|
56
57
|
|
|
57
58
|
# scenario where it is expanding * to mulitple columns
|
|
58
|
-
spark_names = []
|
|
59
|
-
snowpark_names = []
|
|
60
|
-
qualifiers = []
|
|
59
|
+
spark_names: list[str] = []
|
|
60
|
+
snowpark_names: list[str] = []
|
|
61
|
+
qualifiers: list[set[ColumnQualifier]] = []
|
|
61
62
|
|
|
63
|
+
target_qualifier = ColumnQualifier(tuple(name_parts[:-1]))
|
|
62
64
|
(
|
|
63
65
|
spark_names,
|
|
64
66
|
snowpark_names,
|
|
65
67
|
qualifiers,
|
|
66
68
|
) = column_mapping.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
67
|
-
|
|
69
|
+
target_qualifier
|
|
68
70
|
)
|
|
69
71
|
|
|
70
72
|
if len(spark_names) == 0:
|
|
@@ -75,7 +77,7 @@ def map_unresolved_star(
|
|
|
75
77
|
snowpark_names,
|
|
76
78
|
qualifiers,
|
|
77
79
|
) = column_mapping_for_outer_df.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
78
|
-
|
|
80
|
+
target_qualifier
|
|
79
81
|
)
|
|
80
82
|
if len(spark_names) > 0:
|
|
81
83
|
break
|
|
@@ -141,7 +143,7 @@ def map_unresolved_star(
|
|
|
141
143
|
final_sql_expr,
|
|
142
144
|
lambda final_sql_expr=final_sql_expr: typer.type(final_sql_expr),
|
|
143
145
|
)
|
|
144
|
-
typed_column.set_multi_col_qualifiers([
|
|
146
|
+
typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
|
|
145
147
|
return spark_names, typed_column
|
|
146
148
|
else:
|
|
147
149
|
result_exp = snowpark_fn.sql_expr(
|
|
@@ -19,6 +19,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
19
19
|
)
|
|
20
20
|
from snowflake.snowpark.functions import lit
|
|
21
21
|
from snowflake.snowpark.types import BooleanType, StringType
|
|
22
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
22
23
|
from snowflake.snowpark_connect.config import (
|
|
23
24
|
auto_uppercase_non_column_identifiers,
|
|
24
25
|
global_config,
|
|
@@ -743,7 +744,9 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
743
744
|
sp_schema = proto_to_snowpark_type(schema)
|
|
744
745
|
columns = [c.name for c in schema.struct.fields]
|
|
745
746
|
table_name_parts = split_fully_qualified_spark_name(tableName)
|
|
746
|
-
qualifiers = [
|
|
747
|
+
qualifiers: list[set[ColumnQualifier]] = [
|
|
748
|
+
{ColumnQualifier(tuple(table_name_parts))} for _ in columns
|
|
749
|
+
]
|
|
747
750
|
column_types = [f.datatype for f in sp_schema.fields]
|
|
748
751
|
return DataFrameContainer.create_with_column_mapping(
|
|
749
752
|
dataframe=session.createDataFrame([], sp_schema),
|
|
@@ -16,6 +16,7 @@ from snowflake.snowpark.types import DataType
|
|
|
16
16
|
from snowflake.snowpark_connect.column_name_handler import (
|
|
17
17
|
make_column_names_snowpark_compatible,
|
|
18
18
|
)
|
|
19
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
19
20
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
20
21
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
21
22
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
@@ -200,7 +201,9 @@ def map_pivot_aggregate(
|
|
|
200
201
|
dataframe=result.select(*column_selectors),
|
|
201
202
|
spark_column_names=reordered_spark_names,
|
|
202
203
|
snowpark_column_names=reordered_snowpark_names,
|
|
203
|
-
column_qualifiers=[
|
|
204
|
+
column_qualifiers=[
|
|
205
|
+
{ColumnQualifier.no_qualifier()} for _ in reordered_spark_names
|
|
206
|
+
],
|
|
204
207
|
parent_column_name_map=input_container.column_map,
|
|
205
208
|
snowpark_column_types=reordered_types,
|
|
206
209
|
)
|
|
@@ -349,7 +352,7 @@ class _ColumnMetadata:
|
|
|
349
352
|
spark_name: str
|
|
350
353
|
snowpark_name: str
|
|
351
354
|
data_type: DataType
|
|
352
|
-
qualifiers:
|
|
355
|
+
qualifiers: set[ColumnQualifier]
|
|
353
356
|
|
|
354
357
|
|
|
355
358
|
@dataclass(frozen=True)
|
|
@@ -385,7 +388,7 @@ class _Columns:
|
|
|
385
388
|
col.spark_name for col in self.grouping_columns + self.aggregation_columns
|
|
386
389
|
]
|
|
387
390
|
|
|
388
|
-
def get_qualifiers(self) -> list[
|
|
391
|
+
def get_qualifiers(self) -> list[set[ColumnQualifier]]:
|
|
389
392
|
return [
|
|
390
393
|
col.qualifiers for col in self.grouping_columns + self.aggregation_columns
|
|
391
394
|
]
|
|
@@ -429,7 +432,7 @@ def map_aggregate_helper(
|
|
|
429
432
|
new_name,
|
|
430
433
|
None if skip_alias else alias,
|
|
431
434
|
None if pivot else snowpark_column.typ,
|
|
432
|
-
snowpark_column.get_qualifiers(),
|
|
435
|
+
qualifiers=snowpark_column.get_qualifiers(),
|
|
433
436
|
)
|
|
434
437
|
)
|
|
435
438
|
|
|
@@ -469,7 +472,7 @@ def map_aggregate_helper(
|
|
|
469
472
|
new_name,
|
|
470
473
|
None if skip_alias else alias,
|
|
471
474
|
agg_col_typ,
|
|
472
|
-
|
|
475
|
+
qualifiers={ColumnQualifier.no_qualifier()},
|
|
473
476
|
)
|
|
474
477
|
)
|
|
475
478
|
|
|
@@ -29,6 +29,7 @@ from snowflake.snowpark.column import Column
|
|
|
29
29
|
from snowflake.snowpark.table_function import _ExplodeFunctionCall
|
|
30
30
|
from snowflake.snowpark.types import DataType, StructField, StructType, _NumericType
|
|
31
31
|
from snowflake.snowpark_connect.column_name_handler import (
|
|
32
|
+
ColumnQualifier,
|
|
32
33
|
make_column_names_snowpark_compatible,
|
|
33
34
|
)
|
|
34
35
|
from snowflake.snowpark_connect.config import global_config
|
|
@@ -1014,7 +1015,7 @@ def map_unpivot(
|
|
|
1014
1015
|
column_project = []
|
|
1015
1016
|
column_reverse_project = []
|
|
1016
1017
|
snowpark_columns = []
|
|
1017
|
-
qualifiers = []
|
|
1018
|
+
qualifiers: list[set[ColumnQualifier]] = []
|
|
1018
1019
|
for c in input_container.column_map.get_snowpark_columns():
|
|
1019
1020
|
c_name = snowpark_functions_col(c, input_container.column_map).get_name()
|
|
1020
1021
|
if c_name in unpivot_col_names:
|
|
@@ -1042,7 +1043,7 @@ def map_unpivot(
|
|
|
1042
1043
|
)
|
|
1043
1044
|
snowpark_columns.append(c)
|
|
1044
1045
|
qualifiers.append(
|
|
1045
|
-
input_container.column_map.
|
|
1046
|
+
input_container.column_map.get_qualifiers_for_spark_column(c)
|
|
1046
1047
|
)
|
|
1047
1048
|
|
|
1048
1049
|
# Without the case when postprocessing, the result Spark dataframe is:
|
|
@@ -1087,7 +1088,7 @@ def map_unpivot(
|
|
|
1087
1088
|
snowpark_functions_col(snowpark_value_column_name, input_container.column_map)
|
|
1088
1089
|
)
|
|
1089
1090
|
snowpark_columns.append(snowpark_value_column_name)
|
|
1090
|
-
qualifiers.extend([
|
|
1091
|
+
qualifiers.extend([set() for _ in range(2)])
|
|
1091
1092
|
|
|
1092
1093
|
result = (
|
|
1093
1094
|
input_df.select(*column_project)
|
|
@@ -15,6 +15,7 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
15
15
|
ColumnNameMap,
|
|
16
16
|
make_column_names_snowpark_compatible,
|
|
17
17
|
)
|
|
18
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
18
19
|
from snowflake.snowpark_connect.config import get_boolean_session_config_param
|
|
19
20
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
20
21
|
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
@@ -178,7 +179,7 @@ def get_udtf_project(relation: relation_proto.Relation) -> bool:
|
|
|
178
179
|
|
|
179
180
|
def handle_udtf_with_table_arguments(
|
|
180
181
|
udtf_info: snowflake_proto.UDTFWithTableArguments,
|
|
181
|
-
) ->
|
|
182
|
+
) -> DataFrameContainer:
|
|
182
183
|
"""
|
|
183
184
|
Handle UDTF with one or more table arguments using Snowpark's join_table_function.
|
|
184
185
|
For multiple table arguments, this creates a Cartesian product of all input tables.
|
|
@@ -286,7 +287,7 @@ def handle_lateral_join_with_udtf(
|
|
|
286
287
|
left_result: DataFrameContainer,
|
|
287
288
|
udtf_relation: relation_proto.Relation,
|
|
288
289
|
udtf_info: tuple[snowpark.udtf.UserDefinedTableFunction, list],
|
|
289
|
-
) ->
|
|
290
|
+
) -> DataFrameContainer:
|
|
290
291
|
"""
|
|
291
292
|
Handle lateral join with UDTF on the right side using join_table_function.
|
|
292
293
|
"""
|
|
@@ -319,7 +320,7 @@ def handle_lateral_join_with_udtf(
|
|
|
319
320
|
|
|
320
321
|
def map_aggregate(
|
|
321
322
|
aggregate: snowflake_proto.Aggregate, plan_id: int
|
|
322
|
-
) ->
|
|
323
|
+
) -> DataFrameContainer:
|
|
323
324
|
input_container = map_relation(aggregate.input)
|
|
324
325
|
input_df: snowpark.DataFrame = input_container.dataframe
|
|
325
326
|
|
|
@@ -363,7 +364,7 @@ def map_aggregate(
|
|
|
363
364
|
return new_names[0], snowpark_column
|
|
364
365
|
|
|
365
366
|
raw_groupings: list[tuple[str, TypedColumn]] = []
|
|
366
|
-
raw_aggregations: list[tuple[str, TypedColumn,
|
|
367
|
+
raw_aggregations: list[tuple[str, TypedColumn, set[ColumnQualifier]]] = []
|
|
367
368
|
|
|
368
369
|
if not is_group_by_all:
|
|
369
370
|
raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
|
|
@@ -401,11 +402,11 @@ def map_aggregate(
|
|
|
401
402
|
col = _map_column(exp)
|
|
402
403
|
if exp.WhichOneof("expr_type") == "unresolved_attribute":
|
|
403
404
|
spark_name = col[0]
|
|
404
|
-
qualifiers
|
|
405
|
-
|
|
406
|
-
)
|
|
405
|
+
qualifiers: set[
|
|
406
|
+
ColumnQualifier
|
|
407
|
+
] = input_container.column_map.get_qualifiers_for_spark_column(spark_name)
|
|
407
408
|
else:
|
|
408
|
-
qualifiers =
|
|
409
|
+
qualifiers = set()
|
|
409
410
|
|
|
410
411
|
raw_aggregations.append((col[0], col[1], qualifiers))
|
|
411
412
|
|
|
@@ -438,7 +439,7 @@ def map_aggregate(
|
|
|
438
439
|
spark_columns: list[str] = []
|
|
439
440
|
snowpark_columns: list[str] = []
|
|
440
441
|
snowpark_column_types: list[snowpark_types.DataType] = []
|
|
441
|
-
all_qualifiers: list[
|
|
442
|
+
all_qualifiers: list[set[ColumnQualifier]] = []
|
|
442
443
|
|
|
443
444
|
# Use grouping columns directly without aliases
|
|
444
445
|
groupings = [col.col for _, col in raw_groupings]
|
|
@@ -10,6 +10,7 @@ from pyspark.errors import AnalysisException
|
|
|
10
10
|
import snowflake.snowpark.functions as snowpark_fn
|
|
11
11
|
from snowflake import snowpark
|
|
12
12
|
from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
|
|
13
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
13
14
|
from snowflake.snowpark_connect.config import global_config
|
|
14
15
|
from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
|
|
15
16
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
@@ -267,8 +268,10 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
267
268
|
] # this is to make sure we only remove the column once
|
|
268
269
|
]
|
|
269
270
|
|
|
270
|
-
qualifiers = list(
|
|
271
|
-
|
|
271
|
+
qualifiers: list[set[ColumnQualifier]] = list(
|
|
272
|
+
left_container.column_map.get_qualifiers()
|
|
273
|
+
) + [
|
|
274
|
+
{right_container.column_map.get_qualifier_for_spark_column(spark_col)}
|
|
272
275
|
for i, spark_col in enumerate(
|
|
273
276
|
right_container.column_map.get_spark_columns()
|
|
274
277
|
)
|
|
@@ -1343,6 +1343,33 @@ def map_sql_to_pandas_df(
|
|
|
1343
1343
|
)
|
|
1344
1344
|
SNOWFLAKE_CATALOG.refreshTable(table_name_unquoted)
|
|
1345
1345
|
|
|
1346
|
+
return pandas.DataFrame({"": [""]}), ""
|
|
1347
|
+
case "RepairTable":
|
|
1348
|
+
# No-Op. Snowflake doesn't have explicit partitions to repair.
|
|
1349
|
+
table_relation = logical_plan.child()
|
|
1350
|
+
db_and_table_name = as_java_list(table_relation.multipartIdentifier())
|
|
1351
|
+
multi_part_len = len(db_and_table_name)
|
|
1352
|
+
|
|
1353
|
+
if multi_part_len == 1:
|
|
1354
|
+
table_name = db_and_table_name[0]
|
|
1355
|
+
db_name = None
|
|
1356
|
+
full_table_name = table_name
|
|
1357
|
+
else:
|
|
1358
|
+
db_name = db_and_table_name[0]
|
|
1359
|
+
table_name = db_and_table_name[1]
|
|
1360
|
+
full_table_name = db_name + "." + table_name
|
|
1361
|
+
|
|
1362
|
+
df = SNOWFLAKE_CATALOG.tableExists(table_name, db_name)
|
|
1363
|
+
|
|
1364
|
+
table_exist = df.iloc[0, 0]
|
|
1365
|
+
|
|
1366
|
+
if not table_exist:
|
|
1367
|
+
exception = AnalysisException(
|
|
1368
|
+
f"[TABLE_OR_VIEW_NOT_FOUND] Table not found `{full_table_name}`."
|
|
1369
|
+
)
|
|
1370
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
1371
|
+
raise exception
|
|
1372
|
+
|
|
1346
1373
|
return pandas.DataFrame({"": [""]}), ""
|
|
1347
1374
|
case _:
|
|
1348
1375
|
execute_logical_plan(logical_plan)
|
|
@@ -1483,7 +1510,12 @@ def map_sql(
|
|
|
1483
1510
|
snowpark_connect_sql_passthrough, sql_stmt = is_valid_passthrough_sql(rel.sql.query)
|
|
1484
1511
|
|
|
1485
1512
|
if not snowpark_connect_sql_passthrough:
|
|
1486
|
-
|
|
1513
|
+
# Changed from parseQuery to parsePlan as Spark parseQuery() call generating wrong logical plan for
|
|
1514
|
+
# query like this: SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
|
|
1515
|
+
# As such other place in this file we use parsePlan.
|
|
1516
|
+
# Main difference between parsePlan() and parseQuery() is, parsePlan() can be called for any SQL statement, while
|
|
1517
|
+
# parseQuery() can only be called for query statements.
|
|
1518
|
+
logical_plan = sql_parser().parsePlan(sql_stmt)
|
|
1487
1519
|
|
|
1488
1520
|
parsed_pos_args = parse_pos_args(logical_plan, rel.sql.pos_args)
|
|
1489
1521
|
set_sql_args(rel.sql.args, parsed_pos_args)
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
6
6
|
|
|
7
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
7
8
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
8
9
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
9
10
|
|
|
@@ -18,7 +19,9 @@ def map_alias(
|
|
|
18
19
|
# we set reuse_parsed_plan=False because we need new expr_id for the attributes (output columns) in aliased snowpark dataframe
|
|
19
20
|
# reuse_parsed_plan will lead to ambiguous column name for operations like joining two dataframes that are aliased from the same dataframe
|
|
20
21
|
input_container = map_relation(rel.subquery_alias.input, reuse_parsed_plan=False)
|
|
21
|
-
qualifiers = [
|
|
22
|
+
qualifiers = [
|
|
23
|
+
{ColumnQualifier((alias,))} for _ in input_container.column_map.columns
|
|
24
|
+
]
|
|
22
25
|
|
|
23
26
|
return DataFrameContainer.create_with_column_mapping(
|
|
24
27
|
dataframe=input_container.dataframe,
|
|
@@ -16,6 +16,7 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
16
16
|
ColumnNameMap,
|
|
17
17
|
make_column_names_snowpark_compatible,
|
|
18
18
|
)
|
|
19
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
19
20
|
from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
|
|
20
21
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
21
22
|
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
@@ -58,7 +59,7 @@ def post_process_df(
|
|
|
58
59
|
spark_column_names=true_names,
|
|
59
60
|
snowpark_column_names=snowpark_column_names,
|
|
60
61
|
snowpark_column_types=[f.datatype for f in df.schema.fields],
|
|
61
|
-
column_qualifiers=[name_parts
|
|
62
|
+
column_qualifiers=[{ColumnQualifier(tuple(name_parts))} for _ in true_names]
|
|
62
63
|
if source_table_name
|
|
63
64
|
else None,
|
|
64
65
|
)
|
|
@@ -94,8 +95,10 @@ def _get_temporary_view(
|
|
|
94
95
|
spark_column_names=temp_view.column_map.get_spark_columns(),
|
|
95
96
|
snowpark_column_names=snowpark_column_names,
|
|
96
97
|
column_metadata=temp_view.column_map.column_metadata,
|
|
97
|
-
column_qualifiers=[
|
|
98
|
-
|
|
98
|
+
column_qualifiers=[
|
|
99
|
+
{ColumnQualifier(tuple(split_fully_qualified_spark_name(table_name)))}
|
|
100
|
+
for _ in range(len(temp_view.column_map.get_spark_columns()))
|
|
101
|
+
],
|
|
99
102
|
parent_column_name_map=temp_view.column_map.get_parent_column_name_map(),
|
|
100
103
|
)
|
|
101
104
|
|
|
@@ -16,7 +16,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
16
16
|
unquote_if_quoted,
|
|
17
17
|
)
|
|
18
18
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
19
|
-
from snowflake.snowpark.functions import col, lit, object_construct, sql_expr
|
|
19
|
+
from snowflake.snowpark.functions import col, lit, object_construct, sql_expr, when
|
|
20
20
|
from snowflake.snowpark.types import (
|
|
21
21
|
ArrayType,
|
|
22
22
|
DataType,
|
|
@@ -1083,20 +1083,35 @@ def rewrite_df(input_df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
|
|
|
1083
1083
|
json: construct the dataframe to 1 column in json format
|
|
1084
1084
|
1. Append columns which represents the column name
|
|
1085
1085
|
2. Use object_construct to aggregate the dataframe into 1 column
|
|
1086
|
-
|
|
1086
|
+
csv:
|
|
1087
|
+
Use "" to replace empty string
|
|
1087
1088
|
"""
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1089
|
+
match source:
|
|
1090
|
+
case "json":
|
|
1091
|
+
rand_salt = random_string(10, "_")
|
|
1092
|
+
rewritten_df = input_df.with_columns(
|
|
1093
|
+
[co + rand_salt for co in input_df.columns],
|
|
1094
|
+
[lit(unquote_if_quoted(co)) for co in input_df.columns],
|
|
1095
|
+
)
|
|
1096
|
+
construct_key_values = []
|
|
1097
|
+
for co in input_df.columns:
|
|
1098
|
+
construct_key_values.append(col(co + rand_salt))
|
|
1099
|
+
construct_key_values.append(col(co))
|
|
1100
|
+
return rewritten_df.select(object_construct(*construct_key_values))
|
|
1101
|
+
case "csv":
|
|
1102
|
+
new_cols = []
|
|
1103
|
+
for co in input_df.columns:
|
|
1104
|
+
if isinstance(input_df.schema[co].datatype, StringType):
|
|
1105
|
+
new_col = col(co)
|
|
1106
|
+
new_col = when(
|
|
1107
|
+
new_col.isNotNull() & (new_col == ""), lit('""')
|
|
1108
|
+
).otherwise(new_col)
|
|
1109
|
+
new_cols.append(new_col.alias(co))
|
|
1110
|
+
else:
|
|
1111
|
+
new_cols.append(col(co))
|
|
1112
|
+
return input_df.select(new_cols)
|
|
1113
|
+
case _:
|
|
1114
|
+
return input_df
|
|
1100
1115
|
|
|
1101
1116
|
|
|
1102
1117
|
def handle_column_names(
|
|
@@ -158,9 +158,8 @@ def _handle_exception(context, e: Exception):
|
|
|
158
158
|
logger.error("Error: %s - %s", type(e).__name__, str(e))
|
|
159
159
|
|
|
160
160
|
telemetry.report_request_failure(e)
|
|
161
|
-
|
|
162
161
|
if tcm.TCM_MODE:
|
|
163
|
-
#
|
|
162
|
+
# spark decoder will catch the error and return it to GS gracefully
|
|
164
163
|
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
165
164
|
raise e
|
|
166
165
|
|
|
@@ -582,11 +582,14 @@ def map_snowpark_types_to_pyarrow_types(
|
|
|
582
582
|
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
583
583
|
raise exception
|
|
584
584
|
case snowpark.types.TimestampType:
|
|
585
|
-
|
|
586
|
-
|
|
585
|
+
# Check if pa_type has unit attribute (it should be a timestamp type)
|
|
586
|
+
unit = pa_type.unit if hasattr(pa_type, "unit") else "us"
|
|
587
|
+
tz = pa_type.tz if hasattr(pa_type, "tz") else None
|
|
588
|
+
|
|
589
|
+
# Spark truncates nanosecond precision to microseconds
|
|
587
590
|
if unit == "ns":
|
|
588
|
-
# Spark truncates nanosecond precision to microseconds
|
|
589
591
|
unit = "us"
|
|
592
|
+
|
|
590
593
|
return pa.timestamp(unit, tz=tz)
|
|
591
594
|
case snowpark.types.VariantType:
|
|
592
595
|
return pa.string()
|
|
@@ -670,6 +673,9 @@ def map_pyarrow_to_snowpark_types(pa_type: pa.DataType) -> snowpark.types.DataTy
|
|
|
670
673
|
return snowpark.types.TimestampType()
|
|
671
674
|
elif pa.types.is_null(pa_type):
|
|
672
675
|
return snowpark.types.NullType()
|
|
676
|
+
elif pa.types.is_duration(pa_type):
|
|
677
|
+
# Map PyArrow duration[us] to DayTimeIntervalType
|
|
678
|
+
return snowpark.types.DayTimeIntervalType()
|
|
673
679
|
else:
|
|
674
680
|
exception = SnowparkConnectNotImplementedError(
|
|
675
681
|
f"Unsupported PyArrow data type: {pa_type}"
|
|
@@ -892,6 +898,33 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
|
|
|
892
898
|
return snowpark.types.YearMonthIntervalType()
|
|
893
899
|
case type_name if _INTERVAL_DAYTIME_PATTERN_RE.match(type_name):
|
|
894
900
|
return snowpark.types.DayTimeIntervalType()
|
|
901
|
+
# Year-Month interval cases
|
|
902
|
+
case "interval year":
|
|
903
|
+
return snowpark.types.YearMonthIntervalType(0)
|
|
904
|
+
case "interval month":
|
|
905
|
+
return snowpark.types.YearMonthIntervalType(1)
|
|
906
|
+
case "interval year to month":
|
|
907
|
+
return snowpark.types.YearMonthIntervalType(0, 1)
|
|
908
|
+
case "interval day":
|
|
909
|
+
return snowpark.types.DayTimeIntervalType(0)
|
|
910
|
+
case "interval hour":
|
|
911
|
+
return snowpark.types.DayTimeIntervalType(1)
|
|
912
|
+
case "interval minute":
|
|
913
|
+
return snowpark.types.DayTimeIntervalType(2)
|
|
914
|
+
case "interval second":
|
|
915
|
+
return snowpark.types.DayTimeIntervalType(3)
|
|
916
|
+
case "interval day to hour":
|
|
917
|
+
return snowpark.types.DayTimeIntervalType(0, 1)
|
|
918
|
+
case "interval day to minute":
|
|
919
|
+
return snowpark.types.DayTimeIntervalType(0, 2)
|
|
920
|
+
case "interval day to second":
|
|
921
|
+
return snowpark.types.DayTimeIntervalType(0, 3)
|
|
922
|
+
case "interval hour to minute":
|
|
923
|
+
return snowpark.types.DayTimeIntervalType(1, 2)
|
|
924
|
+
case "interval hour to second":
|
|
925
|
+
return snowpark.types.DayTimeIntervalType(1, 3)
|
|
926
|
+
case "interval minute to second":
|
|
927
|
+
return snowpark.types.DayTimeIntervalType(2, 3)
|
|
895
928
|
case _:
|
|
896
929
|
if simple_type.startswith("decimal"):
|
|
897
930
|
precision = int(simple_type.split("(")[1].split(",")[0])
|
|
@@ -8,6 +8,7 @@ from functools import cached_property
|
|
|
8
8
|
import snowflake.snowpark.functions as snowpark_fn
|
|
9
9
|
from snowflake import snowpark
|
|
10
10
|
from snowflake.snowpark.column import Column
|
|
11
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
11
12
|
|
|
12
13
|
_EMPTY_COLUMN = Column("")
|
|
13
14
|
|
|
@@ -44,11 +45,11 @@ class TypedColumn:
|
|
|
44
45
|
def alias(self, alias_name: str):
|
|
45
46
|
return TypedColumn(self.col.alias(alias_name), self._type_resolver)
|
|
46
47
|
|
|
47
|
-
def set_qualifiers(self, qualifiers:
|
|
48
|
+
def set_qualifiers(self, qualifiers: set[ColumnQualifier]) -> None:
|
|
48
49
|
self.qualifiers = qualifiers
|
|
49
50
|
|
|
50
|
-
def get_qualifiers(self) ->
|
|
51
|
-
return getattr(self, "qualifiers",
|
|
51
|
+
def get_qualifiers(self) -> set[ColumnQualifier]:
|
|
52
|
+
return getattr(self, "qualifiers", {ColumnQualifier.no_qualifier()})
|
|
52
53
|
|
|
53
54
|
def set_catalog_database_info(self, catalog_database_info: dict[str, str]) -> None:
|
|
54
55
|
self._catalog_database_info = catalog_database_info
|
|
@@ -63,12 +64,13 @@ class TypedColumn:
|
|
|
63
64
|
def get_database(self) -> str | None:
|
|
64
65
|
return self._catalog_database_info.get("database")
|
|
65
66
|
|
|
66
|
-
def set_multi_col_qualifiers(self, qualifiers: list[
|
|
67
|
+
def set_multi_col_qualifiers(self, qualifiers: list[set[ColumnQualifier]]) -> None:
|
|
67
68
|
self.multi_col_qualifiers = qualifiers
|
|
68
69
|
|
|
69
|
-
def get_multi_col_qualifiers(self, num_columns) -> list[
|
|
70
|
+
def get_multi_col_qualifiers(self, num_columns) -> list[set[ColumnQualifier]]:
|
|
70
71
|
if not hasattr(self, "multi_col_qualifiers"):
|
|
71
|
-
|
|
72
|
+
|
|
73
|
+
return [{ColumnQualifier.no_qualifier()} for i in range(num_columns)]
|
|
72
74
|
assert (
|
|
73
75
|
len(self.multi_col_qualifiers) == num_columns
|
|
74
76
|
), f"Expected {num_columns} multi-column qualifiers, got {len(self.multi_col_qualifiers)}"
|
|
@@ -23,6 +23,13 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
|
23
23
|
from snowflake.snowpark_connect.utils.telemetry import telemetry
|
|
24
24
|
from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
|
|
25
25
|
|
|
26
|
+
SKIP_SESSION_CONFIGURATION = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def skip_session_configuration(skip: bool):
|
|
30
|
+
global SKIP_SESSION_CONFIGURATION
|
|
31
|
+
SKIP_SESSION_CONFIGURATION = skip
|
|
32
|
+
|
|
26
33
|
|
|
27
34
|
# Suppress experimental warnings from snowflake.snowpark logger
|
|
28
35
|
def _filter_experimental_warnings(record):
|
|
@@ -57,6 +64,8 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
57
64
|
global_config,
|
|
58
65
|
)
|
|
59
66
|
|
|
67
|
+
global SKIP_SESSION_CONFIGURATION
|
|
68
|
+
|
|
60
69
|
logger.info(f"Configuring session {session}")
|
|
61
70
|
|
|
62
71
|
telemetry.initialize(session)
|
|
@@ -124,9 +133,16 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
124
133
|
"QUERY_TAG": f"'{query_tag}'",
|
|
125
134
|
}
|
|
126
135
|
|
|
127
|
-
session.
|
|
128
|
-
|
|
129
|
-
|
|
136
|
+
# SNOW-2245971: Stored procedures inside Native Apps run as Execute As Owner and hence cannot set session params.
|
|
137
|
+
if not SKIP_SESSION_CONFIGURATION:
|
|
138
|
+
session.sql(
|
|
139
|
+
f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
|
|
140
|
+
).collect()
|
|
141
|
+
else:
|
|
142
|
+
session_param_names = ", ".join(session_params.keys())
|
|
143
|
+
logger.info(
|
|
144
|
+
f"Skipping Snowpark Connect session configuration as requested. Please make sure following session parameters are set correctly: {session_param_names}"
|
|
145
|
+
)
|
|
130
146
|
|
|
131
147
|
# Instrument the snowpark session to use a cache for describe queries.
|
|
132
148
|
instrument_session_for_describe_cache(session)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowpark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.32.0
|
|
4
4
|
Summary: Snowpark Connect for Spark
|
|
5
5
|
Author: Snowflake, Inc
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -13,7 +13,7 @@ Requires-Dist: certifi>=2025.1.31
|
|
|
13
13
|
Requires-Dist: cloudpickle
|
|
14
14
|
Requires-Dist: fsspec[http]
|
|
15
15
|
Requires-Dist: jpype1
|
|
16
|
-
Requires-Dist: protobuf<
|
|
16
|
+
Requires-Dist: protobuf<6.32.0,>=4.25.3
|
|
17
17
|
Requires-Dist: s3fs>=2025.3.0
|
|
18
18
|
Requires-Dist: snowflake.core<2,>=1.0.5
|
|
19
19
|
Requires-Dist: snowflake-snowpark-python[pandas]<1.41.0,==1.40.0
|
|
@@ -29,11 +29,14 @@ Requires-Dist: grpcio-status<1.63,>=1.56.0
|
|
|
29
29
|
Requires-Dist: googleapis-common-protos>=1.56.4
|
|
30
30
|
Requires-Dist: numpy<2,>=1.15
|
|
31
31
|
Requires-Dist: gcsfs>=2025.9.0
|
|
32
|
+
Provides-Extra: jdk
|
|
33
|
+
Requires-Dist: jdk4py==17.0.9.2; extra == "jdk"
|
|
32
34
|
Dynamic: author
|
|
33
35
|
Dynamic: description
|
|
34
36
|
Dynamic: description-content-type
|
|
35
37
|
Dynamic: license
|
|
36
38
|
Dynamic: license-file
|
|
39
|
+
Dynamic: provides-extra
|
|
37
40
|
Dynamic: requires-dist
|
|
38
41
|
Dynamic: requires-python
|
|
39
42
|
Dynamic: summary
|