snowpark-connect 0.26.0__py3-none-any.whl → 0.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/config.py +0 -3
- snowflake/snowpark_connect/expression/map_expression.py +15 -0
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +18 -0
- snowflake/snowpark_connect/expression/map_unresolved_function.py +16 -7
- snowflake/snowpark_connect/relation/map_extension.py +9 -7
- snowflake/snowpark_connect/relation/map_map_partitions.py +36 -77
- snowflake/snowpark_connect/relation/map_relation.py +3 -1
- snowflake/snowpark_connect/relation/map_show_string.py +2 -0
- snowflake/snowpark_connect/relation/map_sql.py +63 -2
- snowflake/snowpark_connect/relation/write/map_write.py +24 -4
- snowflake/snowpark_connect/server.py +4 -1
- snowflake/snowpark_connect/utils/artifacts.py +4 -5
- snowflake/snowpark_connect/utils/context.py +41 -1
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +86 -2
- snowflake/snowpark_connect/utils/udf_utils.py +66 -103
- snowflake/snowpark_connect/utils/udtf_helper.py +17 -7
- snowflake/snowpark_connect/version.py +2 -3
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/METADATA +2 -2
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/RECORD +27 -27
- {snowpark_connect-0.26.0.data → snowpark_connect-0.27.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.26.0.data → snowpark_connect-0.27.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.26.0.data → snowpark_connect-0.27.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/top_level.txt +0 -0
|
@@ -171,9 +171,6 @@ class GlobalConfig:
|
|
|
171
171
|
"spark.app.name": lambda session, name: setattr(
|
|
172
172
|
session, "query_tag", f"Spark-Connect-App-Name={name}"
|
|
173
173
|
),
|
|
174
|
-
"snowpark.connect.udf.packages": lambda session, packages: session.add_packages(
|
|
175
|
-
*packages.strip("[] ").split(",")
|
|
176
|
-
),
|
|
177
174
|
"snowpark.connect.udf.imports": lambda session, imports: parse_imports(
|
|
178
175
|
session, imports
|
|
179
176
|
),
|
|
@@ -6,6 +6,7 @@ import datetime
|
|
|
6
6
|
from collections import defaultdict
|
|
7
7
|
|
|
8
8
|
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
9
|
+
from pyspark.errors.exceptions.connect import AnalysisException
|
|
9
10
|
|
|
10
11
|
import snowflake.snowpark.functions as snowpark_fn
|
|
11
12
|
from snowflake import snowpark
|
|
@@ -34,6 +35,7 @@ from snowflake.snowpark_connect.type_mapping import (
|
|
|
34
35
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
35
36
|
from snowflake.snowpark_connect.utils.context import (
|
|
36
37
|
gen_sql_plan_id,
|
|
38
|
+
get_current_lambda_params,
|
|
37
39
|
is_function_argument_being_resolved,
|
|
38
40
|
is_lambda_being_resolved,
|
|
39
41
|
)
|
|
@@ -271,6 +273,19 @@ def map_expression(
|
|
|
271
273
|
case "unresolved_function":
|
|
272
274
|
return map_func.map_unresolved_function(exp, column_mapping, typer)
|
|
273
275
|
case "unresolved_named_lambda_variable":
|
|
276
|
+
# Validate that this lambda variable is in scope
|
|
277
|
+
var_name = exp.unresolved_named_lambda_variable.name_parts[0]
|
|
278
|
+
current_params = get_current_lambda_params()
|
|
279
|
+
|
|
280
|
+
if current_params and var_name not in current_params:
|
|
281
|
+
raise AnalysisException(
|
|
282
|
+
f"Reference to non-lambda variable '{var_name}' within lambda function. "
|
|
283
|
+
f"Lambda functions can only access their own parameters. "
|
|
284
|
+
f"Available lambda parameters are: {current_params}. "
|
|
285
|
+
f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
|
|
286
|
+
f"that is an unsupported feature in Snowflake SQL."
|
|
287
|
+
)
|
|
288
|
+
|
|
274
289
|
col = snowpark_fn.Column(
|
|
275
290
|
UnresolvedAttribute(exp.unresolved_named_lambda_variable.name_parts[0])
|
|
276
291
|
)
|
|
@@ -22,6 +22,7 @@ from snowflake.snowpark_connect.utils.context import (
|
|
|
22
22
|
get_is_evaluating_sql,
|
|
23
23
|
get_outer_dataframes,
|
|
24
24
|
get_plan_id_map,
|
|
25
|
+
is_lambda_being_resolved,
|
|
25
26
|
resolve_lca_alias,
|
|
26
27
|
)
|
|
27
28
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
@@ -356,6 +357,23 @@ def map_unresolved_attribute(
|
|
|
356
357
|
return (unqualified_name, typed_col)
|
|
357
358
|
|
|
358
359
|
if snowpark_name is None:
|
|
360
|
+
# Check if we're inside a lambda and trying to reference an outer column
|
|
361
|
+
# This catches direct column references (not lambda variables)
|
|
362
|
+
if is_lambda_being_resolved() and column_mapping:
|
|
363
|
+
# Check if this column exists in the outer scope (not lambda params)
|
|
364
|
+
outer_col_name = (
|
|
365
|
+
column_mapping.get_snowpark_column_name_from_spark_column_name(
|
|
366
|
+
attr_name, allow_non_exists=True
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
if outer_col_name:
|
|
370
|
+
# This is an outer scope column being referenced inside a lambda
|
|
371
|
+
raise AnalysisException(
|
|
372
|
+
f"Reference to non-lambda variable '{attr_name}' within lambda function. "
|
|
373
|
+
f"Lambda functions can only access their own parameters. "
|
|
374
|
+
f"If '{attr_name}' is a table column, it must be passed as an explicit parameter to the enclosing function."
|
|
375
|
+
)
|
|
376
|
+
|
|
359
377
|
if has_plan_id:
|
|
360
378
|
raise AnalysisException(
|
|
361
379
|
f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
|
|
@@ -711,6 +711,9 @@ def map_unresolved_function(
|
|
|
711
711
|
"-",
|
|
712
712
|
)
|
|
713
713
|
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
714
|
+
case (DateType(), NullType()) | (NullType(), DateType()):
|
|
715
|
+
result_type = LongType()
|
|
716
|
+
result_exp = snowpark_fn.lit(None).cast(result_type)
|
|
714
717
|
case (NullType(), _) | (_, NullType()):
|
|
715
718
|
result_type = _get_add_sub_result_type(
|
|
716
719
|
snowpark_typed_args[0].typ,
|
|
@@ -724,7 +727,10 @@ def map_unresolved_function(
|
|
|
724
727
|
result_type = LongType()
|
|
725
728
|
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
726
729
|
case (DateType(), StringType()):
|
|
727
|
-
if
|
|
730
|
+
if (
|
|
731
|
+
hasattr(snowpark_typed_args[1].col._expr1, "pretty_name")
|
|
732
|
+
and "INTERVAL" == snowpark_typed_args[1].col._expr1.pretty_name
|
|
733
|
+
):
|
|
728
734
|
result_type = TimestampType()
|
|
729
735
|
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
730
736
|
else:
|
|
@@ -2421,7 +2427,7 @@ def map_unresolved_function(
|
|
|
2421
2427
|
"try_to_date",
|
|
2422
2428
|
snowpark_fn.cast(
|
|
2423
2429
|
truncated_date,
|
|
2424
|
-
TimestampType(
|
|
2430
|
+
TimestampType(),
|
|
2425
2431
|
),
|
|
2426
2432
|
snowpark_args[1],
|
|
2427
2433
|
)
|
|
@@ -8725,7 +8731,7 @@ def _resolve_function_with_lambda(
|
|
|
8725
8731
|
artificial_df = Session.get_active_session().create_dataframe([], schema)
|
|
8726
8732
|
set_schema_getter(artificial_df, lambda: schema)
|
|
8727
8733
|
|
|
8728
|
-
with resolving_lambda_function():
|
|
8734
|
+
with resolving_lambda_function(names):
|
|
8729
8735
|
return map_expression(
|
|
8730
8736
|
(
|
|
8731
8737
|
lambda_exp.lambda_function.function
|
|
@@ -9911,7 +9917,10 @@ def _get_spark_function_name(
|
|
|
9911
9917
|
return f"({date_param_name1} {operation_op} {date_param_name2})"
|
|
9912
9918
|
case (StringType(), DateType()):
|
|
9913
9919
|
date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
|
|
9914
|
-
if
|
|
9920
|
+
if (
|
|
9921
|
+
hasattr(col1.col._expr1, "pretty_name")
|
|
9922
|
+
and "INTERVAL" == col1.col._expr1.pretty_name
|
|
9923
|
+
):
|
|
9915
9924
|
return f"{date_param_name2} {operation_op} {snowpark_arg_names[0]}"
|
|
9916
9925
|
elif global_config.spark_sql_ansi_enabled and function_name == "+":
|
|
9917
9926
|
return f"{operation_func}(cast({date_param_name2} as date), cast({snowpark_arg_names[0]} as double))"
|
|
@@ -9919,9 +9928,9 @@ def _get_spark_function_name(
|
|
|
9919
9928
|
return f"({snowpark_arg_names[0]} {operation_op} {date_param_name2})"
|
|
9920
9929
|
case (DateType(), StringType()):
|
|
9921
9930
|
date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
|
|
9922
|
-
if (
|
|
9923
|
-
|
|
9924
|
-
|
|
9931
|
+
if global_config.spark_sql_ansi_enabled or (
|
|
9932
|
+
hasattr(col2.col._expr1, "pretty_name")
|
|
9933
|
+
and "INTERVAL" == col2.col._expr1.pretty_name
|
|
9925
9934
|
):
|
|
9926
9935
|
return f"{date_param_name1} {operation_op} {snowpark_arg_names[1]}"
|
|
9927
9936
|
else:
|
|
@@ -23,6 +23,7 @@ from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
|
23
23
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
24
24
|
from snowflake.snowpark_connect.utils.context import (
|
|
25
25
|
get_sql_aggregate_function_count,
|
|
26
|
+
not_resolving_fun_args,
|
|
26
27
|
push_outer_dataframe,
|
|
27
28
|
set_current_grouping_columns,
|
|
28
29
|
)
|
|
@@ -335,14 +336,15 @@ def map_aggregate(
|
|
|
335
336
|
typer = ExpressionTyper(input_df)
|
|
336
337
|
|
|
337
338
|
def _map_column(exp: expression_proto.Expression) -> tuple[str, TypedColumn]:
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
if len(new_names) != 1:
|
|
342
|
-
raise SnowparkConnectNotImplementedError(
|
|
343
|
-
"Multi-column aggregate expressions are not supported"
|
|
339
|
+
with not_resolving_fun_args():
|
|
340
|
+
new_names, snowpark_column = map_expression(
|
|
341
|
+
exp, input_container.column_map, typer
|
|
344
342
|
)
|
|
345
|
-
|
|
343
|
+
if len(new_names) != 1:
|
|
344
|
+
raise SnowparkConnectNotImplementedError(
|
|
345
|
+
"Multi-column aggregate expressions are not supported"
|
|
346
|
+
)
|
|
347
|
+
return new_names[0], snowpark_column
|
|
346
348
|
|
|
347
349
|
raw_groupings: list[tuple[str, TypedColumn]] = []
|
|
348
350
|
raw_aggregations: list[tuple[str, TypedColumn]] = []
|
|
@@ -8,28 +8,20 @@ from pyspark.sql.connect.proto.expressions_pb2 import CommonInlineUserDefinedFun
|
|
|
8
8
|
import snowflake.snowpark.functions as snowpark_fn
|
|
9
9
|
from snowflake import snowpark
|
|
10
10
|
from snowflake.snowpark.types import StructType
|
|
11
|
-
from snowflake.snowpark_connect.config import global_config
|
|
12
11
|
from snowflake.snowpark_connect.constants import MAP_IN_ARROW_EVAL_TYPE
|
|
13
12
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
14
13
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
15
14
|
from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
|
|
16
|
-
from snowflake.snowpark_connect.utils.
|
|
17
|
-
from snowflake.snowpark_connect.utils.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
require_creating_udf_in_sproc,
|
|
21
|
-
udf_check,
|
|
22
|
-
)
|
|
23
|
-
from snowflake.snowpark_connect.utils.udf_utils import (
|
|
24
|
-
ProcessCommonInlineUserDefinedFunction,
|
|
15
|
+
from snowflake.snowpark_connect.utils.context import map_partitions_depth
|
|
16
|
+
from snowflake.snowpark_connect.utils.pandas_udtf_utils import (
|
|
17
|
+
create_pandas_udtf,
|
|
18
|
+
create_pandas_udtf_with_arrow,
|
|
25
19
|
)
|
|
20
|
+
from snowflake.snowpark_connect.utils.udf_helper import udf_check
|
|
26
21
|
from snowflake.snowpark_connect.utils.udtf_helper import (
|
|
27
22
|
create_pandas_udtf_in_sproc,
|
|
28
23
|
require_creating_udtf_in_sproc,
|
|
29
24
|
)
|
|
30
|
-
from snowflake.snowpark_connect.utils.udxf_import_utils import (
|
|
31
|
-
get_python_udxf_import_files,
|
|
32
|
-
)
|
|
33
25
|
|
|
34
26
|
|
|
35
27
|
def map_map_partitions(
|
|
@@ -41,18 +33,10 @@ def map_map_partitions(
|
|
|
41
33
|
This is a simple wrapper around the `mapInPandas` method in Snowpark.
|
|
42
34
|
"""
|
|
43
35
|
input_container = map_relation(rel.map_partitions.input)
|
|
44
|
-
input_df = input_container.dataframe
|
|
45
36
|
udf_proto = rel.map_partitions.func
|
|
46
37
|
udf_check(udf_proto)
|
|
47
38
|
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
udf_proto.WhichOneof("function") == "python_udf"
|
|
51
|
-
and udf_proto.python_udf.eval_type == MAP_IN_ARROW_EVAL_TYPE
|
|
52
|
-
):
|
|
53
|
-
return _map_in_arrow_with_pandas_udtf(input_container, udf_proto)
|
|
54
|
-
else:
|
|
55
|
-
return _map_partitions_with_udf(input_df, udf_proto)
|
|
39
|
+
return _map_with_pandas_udtf(input_container, udf_proto)
|
|
56
40
|
|
|
57
41
|
|
|
58
42
|
def _call_udtf(
|
|
@@ -71,12 +55,17 @@ def _call_udtf(
|
|
|
71
55
|
|
|
72
56
|
udtf_columns = input_df.columns + [snowpark_fn.col("_DUMMY_PARTITION_KEY")]
|
|
73
57
|
|
|
74
|
-
|
|
75
|
-
snowpark_fn.
|
|
76
|
-
partition_by=[snowpark_fn.col("_DUMMY_PARTITION_KEY")]
|
|
77
|
-
)
|
|
58
|
+
tfc = snowpark_fn.call_table_function(udtf_name, *udtf_columns).over(
|
|
59
|
+
partition_by=[snowpark_fn.col("_DUMMY_PARTITION_KEY")]
|
|
78
60
|
)
|
|
79
61
|
|
|
62
|
+
# Use map_partitions_depth only when mapping non nested map_partitions
|
|
63
|
+
# When mapping chained functions additional column casting is necessary
|
|
64
|
+
if map_partitions_depth() == 1:
|
|
65
|
+
result_df_with_dummy = input_df_with_dummy.join_table_function(tfc)
|
|
66
|
+
else:
|
|
67
|
+
result_df_with_dummy = input_df_with_dummy.select(tfc)
|
|
68
|
+
|
|
80
69
|
output_cols = [field.name for field in return_type.fields]
|
|
81
70
|
|
|
82
71
|
# Only return the output columns.
|
|
@@ -90,7 +79,7 @@ def _call_udtf(
|
|
|
90
79
|
)
|
|
91
80
|
|
|
92
81
|
|
|
93
|
-
def
|
|
82
|
+
def _map_with_pandas_udtf(
|
|
94
83
|
input_df_container: DataFrameContainer,
|
|
95
84
|
udf_proto: CommonInlineUserDefinedFunction,
|
|
96
85
|
) -> snowpark.DataFrame:
|
|
@@ -100,59 +89,29 @@ def _map_in_arrow_with_pandas_udtf(
|
|
|
100
89
|
input_df = input_df_container.dataframe
|
|
101
90
|
input_schema = input_df.schema
|
|
102
91
|
spark_column_names = input_df_container.column_map.get_spark_columns()
|
|
103
|
-
return_type = proto_to_snowpark_type(
|
|
92
|
+
return_type = proto_to_snowpark_type(
|
|
93
|
+
udf_proto.python_udf.output_type
|
|
94
|
+
if udf_proto.WhichOneof("function") == "python_udf"
|
|
95
|
+
else udf_proto.scalar_scala_udf.outputType
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Check if this is mapInArrow (eval_type == 207)
|
|
99
|
+
map_in_arrow = (
|
|
100
|
+
udf_proto.WhichOneof("function") == "python_udf"
|
|
101
|
+
and udf_proto.python_udf.eval_type == MAP_IN_ARROW_EVAL_TYPE
|
|
102
|
+
)
|
|
104
103
|
if require_creating_udtf_in_sproc(udf_proto):
|
|
105
104
|
udtf_name = create_pandas_udtf_in_sproc(
|
|
106
105
|
udf_proto, spark_column_names, input_schema, return_type
|
|
107
106
|
)
|
|
108
107
|
else:
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
108
|
+
if map_in_arrow:
|
|
109
|
+
map_udtf = create_pandas_udtf_with_arrow(
|
|
110
|
+
udf_proto, spark_column_names, input_schema, return_type
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
map_udtf = create_pandas_udtf(
|
|
114
|
+
udf_proto, spark_column_names, input_schema, return_type
|
|
115
|
+
)
|
|
116
|
+
udtf_name = map_udtf.name
|
|
113
117
|
return _call_udtf(udtf_name, input_df, return_type)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def _map_partitions_with_udf(
|
|
117
|
-
input_df: snowpark.DataFrame, udf_proto
|
|
118
|
-
) -> snowpark.DataFrame:
|
|
119
|
-
"""
|
|
120
|
-
Original UDF-based approach for non-mapInArrow map_partitions cases.
|
|
121
|
-
"""
|
|
122
|
-
input_column_names = input_df.columns
|
|
123
|
-
kwargs = {
|
|
124
|
-
"common_inline_user_defined_function": udf_proto,
|
|
125
|
-
"input_types": [f.datatype for f in input_df.schema.fields],
|
|
126
|
-
"called_from": "map_map_partitions",
|
|
127
|
-
"udf_name": "spark_map_partitions_udf",
|
|
128
|
-
"input_column_names": input_column_names,
|
|
129
|
-
"replace": True,
|
|
130
|
-
"return_type": proto_to_snowpark_type(
|
|
131
|
-
udf_proto.python_udf.output_type
|
|
132
|
-
if udf_proto.WhichOneof("function") == "python_udf"
|
|
133
|
-
else udf_proto.scalar_scala_udf.outputType
|
|
134
|
-
),
|
|
135
|
-
"udf_packages": global_config.get("snowpark.connect.udf.packages", ""),
|
|
136
|
-
"udf_imports": get_python_udxf_import_files(input_df.session),
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
if require_creating_udf_in_sproc(udf_proto):
|
|
140
|
-
snowpark_udf = process_udf_in_sproc(**kwargs)
|
|
141
|
-
else:
|
|
142
|
-
udf_processor = ProcessCommonInlineUserDefinedFunction(**kwargs)
|
|
143
|
-
udf = udf_processor.create_udf()
|
|
144
|
-
snowpark_udf = SnowparkUDF(
|
|
145
|
-
name=udf.name,
|
|
146
|
-
input_types=udf._input_types,
|
|
147
|
-
return_type=udf._return_type,
|
|
148
|
-
original_return_type=None,
|
|
149
|
-
)
|
|
150
|
-
udf_column_name = "UDF_OUTPUT"
|
|
151
|
-
snowpark_columns = [snowpark_fn.col(name) for name in input_df.columns]
|
|
152
|
-
result = input_df.select(snowpark_fn.call_udf(snowpark_udf.name, *snowpark_columns))
|
|
153
|
-
return DataFrameContainer.create_with_column_mapping(
|
|
154
|
-
dataframe=result,
|
|
155
|
-
spark_column_names=[udf_column_name],
|
|
156
|
-
snowpark_column_names=[udf_column_name],
|
|
157
|
-
snowpark_column_types=[snowpark_udf.return_type],
|
|
158
|
-
)
|
|
@@ -15,6 +15,7 @@ from snowflake.snowpark_connect.utils.cache import (
|
|
|
15
15
|
from snowflake.snowpark_connect.utils.context import (
|
|
16
16
|
get_plan_id_map,
|
|
17
17
|
get_session_id,
|
|
18
|
+
push_map_partitions,
|
|
18
19
|
push_operation_scope,
|
|
19
20
|
set_is_aggregate_function,
|
|
20
21
|
set_plan_id_map,
|
|
@@ -180,7 +181,8 @@ def map_relation(
|
|
|
180
181
|
)
|
|
181
182
|
return cached_df
|
|
182
183
|
case "map_partitions":
|
|
183
|
-
|
|
184
|
+
with push_map_partitions():
|
|
185
|
+
result = map_map_partitions.map_map_partitions(rel)
|
|
184
186
|
case "offset":
|
|
185
187
|
result = map_row_ops.map_offset(rel)
|
|
186
188
|
case "project":
|
|
@@ -12,6 +12,7 @@ from snowflake.snowpark._internal.analyzer import analyzer_utils
|
|
|
12
12
|
from snowflake.snowpark.functions import col
|
|
13
13
|
from snowflake.snowpark.types import DateType, StringType, StructField, StructType
|
|
14
14
|
from snowflake.snowpark_connect.column_name_handler import set_schema_getter
|
|
15
|
+
from snowflake.snowpark_connect.config import global_config
|
|
15
16
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
16
17
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
17
18
|
|
|
@@ -33,6 +34,7 @@ def map_show_string(rel: relation_proto.Relation) -> pandas.DataFrame:
|
|
|
33
34
|
truncate=rel.show_string.truncate,
|
|
34
35
|
vertical=rel.show_string.vertical,
|
|
35
36
|
_spark_column_names=input_df_container.column_map.get_spark_columns(),
|
|
37
|
+
_spark_session_tz=global_config.spark_sql_session_timeZone,
|
|
36
38
|
)
|
|
37
39
|
return pandas.DataFrame({"show_string": [show_string]})
|
|
38
40
|
|
|
@@ -56,6 +56,7 @@ from snowflake.snowpark_connect.utils.context import (
|
|
|
56
56
|
_accessing_temp_object,
|
|
57
57
|
gen_sql_plan_id,
|
|
58
58
|
get_session_id,
|
|
59
|
+
get_sql_plan,
|
|
59
60
|
push_evaluating_sql_scope,
|
|
60
61
|
push_sql_scope,
|
|
61
62
|
set_sql_args,
|
|
@@ -542,6 +543,7 @@ def map_sql_to_pandas_df(
|
|
|
542
543
|
rows = session.sql(f"DESCRIBE TABLE {name}").collect()
|
|
543
544
|
case "DescribeNamespace":
|
|
544
545
|
name = get_relation_identifier_name(logical_plan.namespace(), True)
|
|
546
|
+
name = change_default_to_public(name)
|
|
545
547
|
rows = session.sql(f"DESCRIBE SCHEMA {name}").collect()
|
|
546
548
|
if not rows:
|
|
547
549
|
rows = None
|
|
@@ -793,6 +795,7 @@ def map_sql_to_pandas_df(
|
|
|
793
795
|
case "SetCatalogAndNamespace":
|
|
794
796
|
# TODO: add catalog setting here
|
|
795
797
|
name = get_relation_identifier_name(logical_plan.child(), True)
|
|
798
|
+
name = change_default_to_public(name)
|
|
796
799
|
session.sql(f"USE SCHEMA {name}").collect()
|
|
797
800
|
case "SetCommand":
|
|
798
801
|
kv_result_tuple = logical_plan.kv().get()
|
|
@@ -801,6 +804,7 @@ def map_sql_to_pandas_df(
|
|
|
801
804
|
set_config_param(get_session_id(), key, val, session)
|
|
802
805
|
case "SetNamespaceCommand":
|
|
803
806
|
name = _spark_to_snowflake(logical_plan.namespace())
|
|
807
|
+
name = change_default_to_public(name)
|
|
804
808
|
session.sql(f"USE SCHEMA {name}").collect()
|
|
805
809
|
case "SetNamespaceLocation" | "SetNamespaceProperties":
|
|
806
810
|
raise SnowparkConnectNotImplementedError(
|
|
@@ -997,6 +1001,20 @@ def get_sql_passthrough() -> bool:
|
|
|
997
1001
|
return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
|
|
998
1002
|
|
|
999
1003
|
|
|
1004
|
+
def change_default_to_public(name: str) -> str:
|
|
1005
|
+
"""
|
|
1006
|
+
Change the namespace to PUBLIC when given name is DEFAULT
|
|
1007
|
+
:param name: Given namespace
|
|
1008
|
+
:return: if name is DEFAULT return PUBLIC otherwise name
|
|
1009
|
+
"""
|
|
1010
|
+
if name.startswith('"'):
|
|
1011
|
+
if name.upper() == '"DEFAULT"':
|
|
1012
|
+
return name.replace("DEFAULT", "PUBLIC")
|
|
1013
|
+
elif name.upper() == "DEFAULT":
|
|
1014
|
+
return "PUBLIC"
|
|
1015
|
+
return name
|
|
1016
|
+
|
|
1017
|
+
|
|
1000
1018
|
def map_sql(
|
|
1001
1019
|
rel: relation_proto.Relation,
|
|
1002
1020
|
) -> DataFrameContainer:
|
|
@@ -1008,7 +1026,6 @@ def map_sql(
|
|
|
1008
1026
|
In passthough mode as True, SAS calls session.sql() and not calling Spark Parser.
|
|
1009
1027
|
This is to mitigate any issue not covered by spark logical plan to protobuf conversion.
|
|
1010
1028
|
"""
|
|
1011
|
-
|
|
1012
1029
|
snowpark_connect_sql_passthrough = get_sql_passthrough()
|
|
1013
1030
|
|
|
1014
1031
|
if not snowpark_connect_sql_passthrough:
|
|
@@ -1353,6 +1370,7 @@ def map_logical_plan_relation(
|
|
|
1353
1370
|
left_input=map_logical_plan_relation(children[0]),
|
|
1354
1371
|
right_input=map_logical_plan_relation(children[1]),
|
|
1355
1372
|
set_op_type=relation_proto.SetOperation.SET_OP_TYPE_UNION,
|
|
1373
|
+
is_all=True,
|
|
1356
1374
|
by_name=rel.byName(),
|
|
1357
1375
|
allow_missing_columns=rel.allowMissingCol(),
|
|
1358
1376
|
)
|
|
@@ -1701,7 +1719,50 @@ def map_logical_plan_relation(
|
|
|
1701
1719
|
_window_specs.get()[key] = window_spec
|
|
1702
1720
|
proto = map_logical_plan_relation(rel.child())
|
|
1703
1721
|
case "Generate":
|
|
1704
|
-
|
|
1722
|
+
# Generate creates a nested Project relation (see lines 1785-1790) without
|
|
1723
|
+
# setting its plan_id field. When this Project is later processed by map_project
|
|
1724
|
+
# (map_column_ops.py), it uses rel.common.plan_id which defaults to 0 for unset
|
|
1725
|
+
# protobuf fields. This means all columns from the Generate operation (both exploded
|
|
1726
|
+
# columns and passthrough columns) will have plan_id=0 in their names.
|
|
1727
|
+
#
|
|
1728
|
+
# If Generate's child is a SubqueryAlias whose inner relation was processed
|
|
1729
|
+
# with a non-zero plan_id, there will be a mismatch between:
|
|
1730
|
+
# - The columns referenced in the Project (expecting plan_id from SubqueryAlias's child)
|
|
1731
|
+
# - The actual column names created by Generate's Project (using plan_id=0)
|
|
1732
|
+
|
|
1733
|
+
# Therefore, when Generate has a SubqueryAlias child, we explicitly process the inner
|
|
1734
|
+
# relation with plan_id=0 to match what Generate's Project will use. This only applies when
|
|
1735
|
+
# the immediate child of Generate is a SubqueryAlias and preserves existing registrations (like CTEs),
|
|
1736
|
+
# so it won't affect other patterns.
|
|
1737
|
+
|
|
1738
|
+
child_class = str(rel.child().getClass().getSimpleName())
|
|
1739
|
+
|
|
1740
|
+
if child_class == "SubqueryAlias":
|
|
1741
|
+
alias = str(rel.child().alias())
|
|
1742
|
+
|
|
1743
|
+
# Check if this alias was already registered during initial SQL parsing
|
|
1744
|
+
existing_plan_id = get_sql_plan(alias)
|
|
1745
|
+
|
|
1746
|
+
if existing_plan_id is not None:
|
|
1747
|
+
# Use the existing plan_id to maintain consistency with prior registration
|
|
1748
|
+
used_plan_id = existing_plan_id
|
|
1749
|
+
else:
|
|
1750
|
+
# Use plan_id=0 to match what the nested Project will use (protobuf default)
|
|
1751
|
+
used_plan_id = 0
|
|
1752
|
+
set_sql_plan_name(alias, used_plan_id)
|
|
1753
|
+
|
|
1754
|
+
# Process the inner child with the determined plan_id
|
|
1755
|
+
inner_child = map_logical_plan_relation(
|
|
1756
|
+
rel.child().child(), plan_id=used_plan_id
|
|
1757
|
+
)
|
|
1758
|
+
input_relation = relation_proto.Relation(
|
|
1759
|
+
subquery_alias=relation_proto.SubqueryAlias(
|
|
1760
|
+
input=inner_child,
|
|
1761
|
+
alias=alias,
|
|
1762
|
+
)
|
|
1763
|
+
)
|
|
1764
|
+
else:
|
|
1765
|
+
input_relation = map_logical_plan_relation(rel.child())
|
|
1705
1766
|
generator_output_list = as_java_list(rel.generatorOutput())
|
|
1706
1767
|
generator_output_list_expressions = [
|
|
1707
1768
|
map_logical_plan_expression(e) for e in generator_output_list
|
|
@@ -51,6 +51,7 @@ from snowflake.snowpark_connect.utils.identifiers import (
|
|
|
51
51
|
split_fully_qualified_spark_name,
|
|
52
52
|
)
|
|
53
53
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
54
|
+
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
54
55
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
55
56
|
SnowparkConnectNotImplementedError,
|
|
56
57
|
telemetry,
|
|
@@ -160,6 +161,29 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
160
161
|
compression_option = write_op.options.get("compression", "none")
|
|
161
162
|
|
|
162
163
|
# Generate Spark-compatible filename or prefix
|
|
164
|
+
# we need a random prefix to support "append" mode
|
|
165
|
+
# otherwise copy into with overwrite=False will fail if the file already exists
|
|
166
|
+
overwrite = (
|
|
167
|
+
write_op.mode
|
|
168
|
+
== commands_proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if overwrite:
|
|
172
|
+
try:
|
|
173
|
+
path_after_stage = (
|
|
174
|
+
write_path.split("/", 1)[1] if "/" in write_path else ""
|
|
175
|
+
)
|
|
176
|
+
if not path_after_stage or path_after_stage == "/":
|
|
177
|
+
logger.warning(
|
|
178
|
+
f"Skipping REMOVE for root path {write_path} - too broad scope"
|
|
179
|
+
)
|
|
180
|
+
else:
|
|
181
|
+
remove_command = f"REMOVE {write_path}/"
|
|
182
|
+
session.sql(remove_command).collect()
|
|
183
|
+
logger.info(f"Successfully cleared directory: {write_path}")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.warning(f"Could not clear directory {write_path}: {e}")
|
|
186
|
+
|
|
163
187
|
if should_write_to_single_file:
|
|
164
188
|
# Single file: generate complete filename with extension
|
|
165
189
|
spark_filename = generate_spark_compatible_filename(
|
|
@@ -178,10 +202,6 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
178
202
|
format_ext="", # No extension for prefix
|
|
179
203
|
)
|
|
180
204
|
temp_file_prefix_on_stage = f"{write_path}/{spark_filename_prefix}"
|
|
181
|
-
overwrite = (
|
|
182
|
-
write_op.mode
|
|
183
|
-
== commands_proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
|
|
184
|
-
)
|
|
185
205
|
|
|
186
206
|
default_compression = "NONE" if write_op.source != "parquet" else "snappy"
|
|
187
207
|
compression = write_op.options.get(
|
|
@@ -531,7 +531,10 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
531
531
|
if name.endswith(".class"):
|
|
532
532
|
# name is <dir>/<package>/<class_name>
|
|
533
533
|
# we don't need the dir name, but require the package, so only remove dir
|
|
534
|
-
|
|
534
|
+
if os.name != "nt":
|
|
535
|
+
class_files[name.split("/", 1)[-1]] = filepath
|
|
536
|
+
else:
|
|
537
|
+
class_files[name.split("\\", 1)[-1]] = filepath
|
|
535
538
|
continue
|
|
536
539
|
session.file.put(
|
|
537
540
|
filepath,
|
|
@@ -39,7 +39,7 @@ def write_temporary_artifact(
|
|
|
39
39
|
if os.name != "nt":
|
|
40
40
|
filepath = f"/tmp/sas-{session.session_id}/{name}"
|
|
41
41
|
else:
|
|
42
|
-
filepath = f"{tempfile.gettempdir()}
|
|
42
|
+
filepath = f"{tempfile.gettempdir()}\\sas-{session.session_id}\\{name}"
|
|
43
43
|
# The name comes to us as a path (e.g. cache/<name>), so we need to create
|
|
44
44
|
# the parent directory if it doesn't exist to avoid errors during writing.
|
|
45
45
|
pathlib.Path(filepath).parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -55,11 +55,10 @@ def write_class_files_to_stage(
|
|
|
55
55
|
) -> None:
|
|
56
56
|
if os.name != "nt":
|
|
57
57
|
filepath = f"/tmp/sas-{session.session_id}"
|
|
58
|
+
jar_name = f'{filepath}/{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
|
|
58
59
|
else:
|
|
59
|
-
filepath = f"{tempfile.gettempdir()}
|
|
60
|
-
|
|
61
|
-
f'{filepath}/{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
|
|
62
|
-
)
|
|
60
|
+
filepath = f"{tempfile.gettempdir()}\\sas-{session.session_id}"
|
|
61
|
+
jar_name = f'{filepath}\\{hashlib.sha256(str(files).encode("utf-8")).hexdigest()[:10]}.jar'
|
|
63
62
|
with zipfile.ZipFile(jar_name, "w", zipfile.ZIP_DEFLATED) as jar:
|
|
64
63
|
for name, path in files.items():
|
|
65
64
|
jar.write(path, name)
|
|
@@ -30,6 +30,9 @@ _sql_aggregate_function_count = ContextVar[int](
|
|
|
30
30
|
"_contains_aggregate_function", default=0
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
+
# Context for parsing map_partitions
|
|
34
|
+
_map_partitions_stack = ContextVar[int]("_map_partitions_stack", default=0)
|
|
35
|
+
|
|
33
36
|
# We have to generate our own plan IDs that are different from Spark's.
|
|
34
37
|
# Spark plan IDs start at 0, so pick a "big enough" number to avoid overlaps.
|
|
35
38
|
_STARTING_SQL_PLAN_ID = 0x80000000
|
|
@@ -49,6 +52,7 @@ _spark_client_type_regex = re.compile(r"spark/(?P<spark_version>\d+\.\d+\.\d+)")
|
|
|
49
52
|
_current_operation = ContextVar[str]("_current_operation", default="default")
|
|
50
53
|
_resolving_fun_args = ContextVar[bool]("_resolving_fun_args", default=False)
|
|
51
54
|
_resolving_lambda_fun = ContextVar[bool]("_resolving_lambdas", default=False)
|
|
55
|
+
_current_lambda_params = ContextVar[list[str]]("_current_lambda_params", default=[])
|
|
52
56
|
|
|
53
57
|
_is_window_enabled = ContextVar[bool]("_is_window_enabled", default=False)
|
|
54
58
|
_is_in_pivot = ContextVar[bool]("_is_in_pivot", default=False)
|
|
@@ -206,6 +210,16 @@ def push_evaluating_join_condition(join_type, left_keys, right_keys):
|
|
|
206
210
|
_is_evaluating_join_condition.set(prev)
|
|
207
211
|
|
|
208
212
|
|
|
213
|
+
@contextmanager
|
|
214
|
+
def push_map_partitions():
|
|
215
|
+
_map_partitions_stack.set(_map_partitions_stack.get() + 1)
|
|
216
|
+
yield
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def map_partitions_depth() -> int:
|
|
220
|
+
return _map_partitions_stack.get()
|
|
221
|
+
|
|
222
|
+
|
|
209
223
|
@contextmanager
|
|
210
224
|
def push_sql_scope():
|
|
211
225
|
"""
|
|
@@ -238,16 +252,21 @@ def push_operation_scope(operation: str):
|
|
|
238
252
|
|
|
239
253
|
|
|
240
254
|
@contextmanager
|
|
241
|
-
def resolving_lambda_function():
|
|
255
|
+
def resolving_lambda_function(param_names: list[str] = None):
|
|
242
256
|
"""
|
|
243
257
|
Context manager that sets a flag indicating lambda function is being resolved.
|
|
258
|
+
Also tracks the lambda parameter names for validation.
|
|
244
259
|
"""
|
|
245
260
|
prev = _resolving_lambda_fun.get()
|
|
261
|
+
prev_params = _current_lambda_params.get()
|
|
246
262
|
try:
|
|
247
263
|
_resolving_lambda_fun.set(True)
|
|
264
|
+
if param_names is not None:
|
|
265
|
+
_current_lambda_params.set(param_names)
|
|
248
266
|
yield
|
|
249
267
|
finally:
|
|
250
268
|
_resolving_lambda_fun.set(prev)
|
|
269
|
+
_current_lambda_params.set(prev_params)
|
|
251
270
|
|
|
252
271
|
|
|
253
272
|
def is_lambda_being_resolved() -> bool:
|
|
@@ -257,6 +276,13 @@ def is_lambda_being_resolved() -> bool:
|
|
|
257
276
|
return _resolving_lambda_fun.get()
|
|
258
277
|
|
|
259
278
|
|
|
279
|
+
def get_current_lambda_params() -> list[str]:
|
|
280
|
+
"""
|
|
281
|
+
Returns the current lambda parameter names.
|
|
282
|
+
"""
|
|
283
|
+
return _current_lambda_params.get()
|
|
284
|
+
|
|
285
|
+
|
|
260
286
|
@contextmanager
|
|
261
287
|
def resolving_fun_args():
|
|
262
288
|
"""
|
|
@@ -270,6 +296,19 @@ def resolving_fun_args():
|
|
|
270
296
|
_resolving_fun_args.set(prev)
|
|
271
297
|
|
|
272
298
|
|
|
299
|
+
@contextmanager
|
|
300
|
+
def not_resolving_fun_args():
|
|
301
|
+
"""
|
|
302
|
+
Context manager that sets a flag indicating function arguments are *not* being resolved.
|
|
303
|
+
"""
|
|
304
|
+
prev = _resolving_fun_args.get()
|
|
305
|
+
try:
|
|
306
|
+
_resolving_fun_args.set(False)
|
|
307
|
+
yield
|
|
308
|
+
finally:
|
|
309
|
+
_resolving_fun_args.set(prev)
|
|
310
|
+
|
|
311
|
+
|
|
273
312
|
def is_function_argument_being_resolved() -> bool:
|
|
274
313
|
"""
|
|
275
314
|
Returns True if function arguments are being resolved.
|
|
@@ -350,6 +389,7 @@ def clear_context_data() -> None:
|
|
|
350
389
|
|
|
351
390
|
_next_sql_plan_id.set(_STARTING_SQL_PLAN_ID)
|
|
352
391
|
_sql_plan_name_map.set({})
|
|
392
|
+
_map_partitions_stack.set(0)
|
|
353
393
|
_sql_aggregate_function_count.set(0)
|
|
354
394
|
_sql_named_args.set({})
|
|
355
395
|
_sql_pos_args.set({})
|
|
@@ -87,9 +87,93 @@ def get_map_in_arrow_udtf(
|
|
|
87
87
|
def create_pandas_udtf(
|
|
88
88
|
udtf_proto: CommonInlineUserDefinedFunction,
|
|
89
89
|
spark_column_names: list[str],
|
|
90
|
-
input_schema: StructType
|
|
91
|
-
return_schema: StructType
|
|
90
|
+
input_schema: StructType,
|
|
91
|
+
return_schema: StructType,
|
|
92
|
+
):
|
|
93
|
+
user_function, _ = cloudpickle.loads(udtf_proto.python_udf.command)
|
|
94
|
+
output_column_names = [field.name for field in return_schema.fields]
|
|
95
|
+
output_column_original_names = [
|
|
96
|
+
field.original_column_identifier for field in return_schema.fields
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
class MapPandasUDTF:
|
|
100
|
+
def __init__(self) -> None:
|
|
101
|
+
self.user_function = user_function
|
|
102
|
+
self.output_column_names = output_column_names
|
|
103
|
+
self.spark_column_names = spark_column_names
|
|
104
|
+
self.output_column_original_names = output_column_original_names
|
|
105
|
+
|
|
106
|
+
def end_partition(self, df: pd.DataFrame):
|
|
107
|
+
if df.empty:
|
|
108
|
+
empty_df = pd.DataFrame(columns=self.output_column_names)
|
|
109
|
+
yield empty_df
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
df_without_dummy = df.drop(
|
|
113
|
+
columns=["_DUMMY_PARTITION_KEY"], errors="ignore"
|
|
114
|
+
)
|
|
115
|
+
df_without_dummy.columns = self.spark_column_names
|
|
116
|
+
result_iterator = self.user_function(
|
|
117
|
+
[pd.DataFrame([row]) for _, row in df_without_dummy.iterrows()]
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if not isinstance(result_iterator, Iterator) and not hasattr(
|
|
121
|
+
result_iterator, "__iter__"
|
|
122
|
+
):
|
|
123
|
+
raise RuntimeError(
|
|
124
|
+
f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should be "
|
|
125
|
+
f"iterator of pandas.DataFrame, but is {type(result_iterator).__name__}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
output_df = pd.concat(result_iterator)
|
|
129
|
+
generated_output_column_names = list(output_df.columns)
|
|
130
|
+
|
|
131
|
+
missing_columns = []
|
|
132
|
+
for original_column in self.output_column_original_names:
|
|
133
|
+
if original_column not in generated_output_column_names:
|
|
134
|
+
missing_columns.append(original_column)
|
|
135
|
+
|
|
136
|
+
if missing_columns:
|
|
137
|
+
unexpected_columns = [
|
|
138
|
+
column
|
|
139
|
+
for column in generated_output_column_names
|
|
140
|
+
if column not in self.output_column_original_names
|
|
141
|
+
]
|
|
142
|
+
raise RuntimeError(
|
|
143
|
+
f"[RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
|
|
144
|
+
"."
|
|
145
|
+
)
|
|
146
|
+
reordered_df = output_df[self.output_column_original_names]
|
|
147
|
+
reordered_df.columns = self.output_column_names
|
|
148
|
+
yield reordered_df
|
|
149
|
+
|
|
150
|
+
return snowpark_fn.pandas_udtf(
|
|
151
|
+
MapPandasUDTF,
|
|
152
|
+
output_schema=PandasDataFrameType(
|
|
153
|
+
[field.datatype for field in return_schema.fields],
|
|
154
|
+
[field.name for field in return_schema.fields],
|
|
155
|
+
),
|
|
156
|
+
input_types=[
|
|
157
|
+
PandasDataFrameType(
|
|
158
|
+
[field.datatype for field in input_schema.fields] + [IntegerType()]
|
|
159
|
+
)
|
|
160
|
+
],
|
|
161
|
+
input_names=[field.name for field in input_schema.fields]
|
|
162
|
+
+ ["_DUMMY_PARTITION_KEY"],
|
|
163
|
+
name="map_pandas_udtf",
|
|
164
|
+
replace=True,
|
|
165
|
+
packages=["pandas"],
|
|
166
|
+
is_permanent=False,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def create_pandas_udtf_with_arrow(
|
|
171
|
+
udtf_proto: CommonInlineUserDefinedFunction,
|
|
172
|
+
spark_column_names: list[str],
|
|
173
|
+
input_schema: StructType,
|
|
174
|
+
return_schema: StructType,
|
|
92
175
|
) -> str | snowpark.udtf.UserDefinedTableFunction:
|
|
176
|
+
|
|
93
177
|
user_function, _ = cloudpickle.loads(udtf_proto.python_udf.command)
|
|
94
178
|
output_column_names = [field.name for field in return_schema.fields]
|
|
95
179
|
|
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
# So its dependencies are restricted to pandas, snowpark, and, pyspark
|
|
7
7
|
import functools
|
|
8
8
|
import inspect
|
|
9
|
-
from typing import Iterator
|
|
10
9
|
|
|
11
10
|
import pandas
|
|
12
11
|
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
@@ -198,111 +197,75 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
198
197
|
|
|
199
198
|
needs_struct_conversion = isinstance(self._original_return_type, StructType)
|
|
200
199
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
200
|
+
if not needs_struct_conversion:
|
|
201
|
+
return snowpark_fn.udf(
|
|
202
|
+
create_null_safe_wrapper(callable_func),
|
|
203
|
+
return_type=self._return_type,
|
|
204
|
+
input_types=self._input_types,
|
|
205
|
+
name=self._udf_name,
|
|
206
|
+
replace=self._replace,
|
|
207
|
+
packages=packages,
|
|
208
|
+
imports=imports,
|
|
209
|
+
immutable=self._is_deterministic,
|
|
210
|
+
)
|
|
205
211
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
return result
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
212
|
+
is_pandas_udf, _, return_types, _ = extract_return_input_types(
|
|
213
|
+
callable_func,
|
|
214
|
+
self._original_return_type,
|
|
215
|
+
self._input_types,
|
|
216
|
+
TempObjectType.FUNCTION,
|
|
217
|
+
)
|
|
218
|
+
if is_pandas_udf and isinstance(return_types, PandasDataFrameType):
|
|
219
|
+
# Snowpark Python UDFs only support returning a Pandas Series.
|
|
220
|
+
# We change the return type to make the input callable compatible with Snowpark Python UDFs,
|
|
221
|
+
# and then in the wrapper function we convert the pandas DataFrame of the
|
|
222
|
+
# original callable to a Pandas Series.
|
|
223
|
+
original_callable.__annotations__["return"] = pandas.Series
|
|
224
|
+
|
|
225
|
+
field_names = [field.name for field in self._original_return_type.fields]
|
|
226
|
+
|
|
227
|
+
def struct_wrapper(*args):
|
|
228
|
+
result = callable_func(*args)
|
|
229
|
+
if isinstance(result, (tuple, list)):
|
|
230
|
+
# Convert tuple/list to dict using struct field names
|
|
231
|
+
if len(result) == len(field_names):
|
|
232
|
+
return dict(zip(field_names, result))
|
|
233
|
+
return result
|
|
234
|
+
|
|
235
|
+
def pandas_struct_wrapper(*args):
|
|
236
|
+
# inspired by the following snowpark modin code to handle Pandas int/bool/null data in Snowflake VariantType
|
|
237
|
+
# https://github.com/snowflakedb/snowpark-python/blob/e095d5a54f3a697416c3f1df87d239def47a5495/src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py#L1309-L1366
|
|
238
|
+
def convert_to_snowflake_compatible_type(value):
|
|
239
|
+
import numpy as np
|
|
240
|
+
from pandas.api.types import is_scalar
|
|
241
|
+
|
|
242
|
+
if is_scalar(value) and pandas.isna(value):
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
return (
|
|
246
|
+
int(value)
|
|
247
|
+
if np.issubdtype(type(value), np.integer)
|
|
248
|
+
else (
|
|
249
|
+
bool(value) if np.issubdtype(type(value), np.bool_) else value
|
|
241
250
|
)
|
|
242
|
-
|
|
243
|
-
is_pandas_udf, _, return_types, _ = extract_return_input_types(
|
|
244
|
-
callable_func,
|
|
245
|
-
self._original_return_type,
|
|
246
|
-
self._input_types,
|
|
247
|
-
TempObjectType.FUNCTION,
|
|
248
251
|
)
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
return result
|
|
267
|
-
|
|
268
|
-
def pandas_struct_wrapper(*args):
|
|
269
|
-
# inspired by the following snowpark modin code to handle Pandas int/bool/null data in Snowflake VariantType
|
|
270
|
-
# https://github.com/snowflakedb/snowpark-python/blob/e095d5a54f3a697416c3f1df87d239def47a5495/src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py#L1309-L1366
|
|
271
|
-
def convert_to_snowflake_compatible_type(value):
|
|
272
|
-
import numpy as np
|
|
273
|
-
from pandas.api.types import is_scalar
|
|
274
|
-
|
|
275
|
-
if is_scalar(value) and pandas.isna(value):
|
|
276
|
-
return None
|
|
277
|
-
|
|
278
|
-
return (
|
|
279
|
-
int(value)
|
|
280
|
-
if np.issubdtype(type(value), np.integer)
|
|
281
|
-
else (
|
|
282
|
-
bool(value)
|
|
283
|
-
if np.issubdtype(type(value), np.bool_)
|
|
284
|
-
else value
|
|
285
|
-
)
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
result = callable_func(*args)
|
|
289
|
-
assert (
|
|
290
|
-
len(result) == 1
|
|
291
|
-
), "Expected result to be a single row DataFrame"
|
|
292
|
-
# df.applymap doesn't help here, the original type was preserved, hence we convert each value
|
|
293
|
-
row_data = [
|
|
294
|
-
convert_to_snowflake_compatible_type(value)
|
|
295
|
-
for value in result.iloc[0].tolist()
|
|
296
|
-
]
|
|
297
|
-
result = pandas.Series([dict(zip(field_names, row_data))])
|
|
298
|
-
return result
|
|
299
|
-
|
|
300
|
-
if is_pandas_udf:
|
|
301
|
-
udf_function = pandas_struct_wrapper
|
|
302
|
-
if isinstance(return_types, PandasDataFrameType):
|
|
303
|
-
udf_function.__annotations__ = original_callable.__annotations__
|
|
304
|
-
else:
|
|
305
|
-
udf_function = create_null_safe_wrapper(struct_wrapper)
|
|
252
|
+
|
|
253
|
+
result = callable_func(*args)
|
|
254
|
+
assert len(result) == 1, "Expected result to be a single row DataFrame"
|
|
255
|
+
# df.applymap doesn't help here, the original type was preserved, hence we convert each value
|
|
256
|
+
row_data = [
|
|
257
|
+
convert_to_snowflake_compatible_type(value)
|
|
258
|
+
for value in result.iloc[0].tolist()
|
|
259
|
+
]
|
|
260
|
+
result = pandas.Series([dict(zip(field_names, row_data))])
|
|
261
|
+
return result
|
|
262
|
+
|
|
263
|
+
if is_pandas_udf:
|
|
264
|
+
udf_function = pandas_struct_wrapper
|
|
265
|
+
if isinstance(return_types, PandasDataFrameType):
|
|
266
|
+
udf_function.__annotations__ = original_callable.__annotations__
|
|
267
|
+
else:
|
|
268
|
+
udf_function = create_null_safe_wrapper(struct_wrapper)
|
|
306
269
|
|
|
307
270
|
return snowpark_fn.udf(
|
|
308
271
|
udf_function,
|
|
@@ -261,15 +261,25 @@ def create(session, b64_str, spark_column_names_json_str, input_schema_json_str,
|
|
|
261
261
|
udf_proto = CommonInlineUserDefinedFunction()
|
|
262
262
|
udf_proto.ParseFromString(restored_bytes)
|
|
263
263
|
|
|
264
|
+
if not input_schema_json_str:
|
|
265
|
+
raise ValueError("Input schema is required for pandas UDTF.")
|
|
266
|
+
if not return_schema_json_str:
|
|
267
|
+
raise ValueError("Return schema is required for pandas UDTF.")
|
|
268
|
+
|
|
264
269
|
spark_column_names = json.loads(spark_column_names_json_str)
|
|
265
|
-
input_schema = StructType.fromJson(json.loads(input_schema_json_str))
|
|
266
|
-
return_schema = StructType.fromJson(json.loads(return_schema_json_str))
|
|
270
|
+
input_schema = StructType.fromJson(json.loads(input_schema_json_str))
|
|
271
|
+
return_schema = StructType.fromJson(json.loads(return_schema_json_str))
|
|
267
272
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
+
map_in_arrow = udf_proto.WhichOneof("function") == "python_udf" and udf_proto.python_udf.eval_type == 207
|
|
274
|
+
if map_in_arrow:
|
|
275
|
+
map_udtf = create_pandas_udtf_with_arrow(
|
|
276
|
+
udf_proto, spark_column_names, input_schema, return_schema
|
|
277
|
+
)
|
|
278
|
+
else:
|
|
279
|
+
map_udtf = create_pandas_udtf(
|
|
280
|
+
udf_proto, spark_column_names, input_schema, return_schema
|
|
281
|
+
)
|
|
282
|
+
return map_udtf.name
|
|
273
283
|
$$;
|
|
274
284
|
"""
|
|
275
285
|
session.sql(create_udtf_sproc_sql).collect()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowpark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.27.0
|
|
4
4
|
Summary: Snowpark Connect for Spark
|
|
5
5
|
Author: Snowflake, Inc
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -16,7 +16,7 @@ Requires-Dist: jpype1
|
|
|
16
16
|
Requires-Dist: protobuf<5.0,>=4.25.3
|
|
17
17
|
Requires-Dist: s3fs>=2025.3.0
|
|
18
18
|
Requires-Dist: snowflake.core<2,>=1.0.5
|
|
19
|
-
Requires-Dist: snowflake-snowpark-python[pandas]<1.
|
|
19
|
+
Requires-Dist: snowflake-snowpark-python[pandas]<1.39.0,==1.38.0
|
|
20
20
|
Requires-Dist: sqlglot>=26.3.8
|
|
21
21
|
Requires-Dist: jaydebeapi
|
|
22
22
|
Requires-Dist: aiobotocore~=2.23.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
snowflake/snowpark_connect/__init__.py,sha256=Sml4x1LTNnxZyw6nnDeJrZWUi3eUAR46Rsw6N-wHUSA,605
|
|
2
2
|
snowflake/snowpark_connect/column_name_handler.py,sha256=_bTrguwdiF_cqFvoihFU7f42lNqtJ3Af-9fPvSLbf_A,31919
|
|
3
|
-
snowflake/snowpark_connect/config.py,sha256=
|
|
3
|
+
snowflake/snowpark_connect/config.py,sha256=3rdbc05u0IcivwsdUwV4Wg9u6Tf-AhPT2INIinhv1PY,24948
|
|
4
4
|
snowflake/snowpark_connect/constants.py,sha256=FBDxNUxdqWxnf6d5-eanHlYdFFyQqCqvNyZG-uOiO6Q,598
|
|
5
5
|
snowflake/snowpark_connect/control_server.py,sha256=mz3huYo84hgqUB6maZxu3LYyGq7vVL1nv7-7-MjuSYY,1956
|
|
6
6
|
snowflake/snowpark_connect/dataframe_container.py,sha256=sCzMxzb6UIJz1mVpwzSYeVBiqtSzZv5n_pn7FVhbRv0,9114
|
|
@@ -9,12 +9,12 @@ snowflake/snowpark_connect/date_time_format_mapping.py,sha256=qtQ-JTGR1VRWM2oxM1
|
|
|
9
9
|
snowflake/snowpark_connect/empty_dataframe.py,sha256=aKO6JkYnArWCpLGcn9BzvTspw2k_c6eAM0mQImAY0J0,428
|
|
10
10
|
snowflake/snowpark_connect/hidden_column.py,sha256=k56-e97vqQmoOFtOtIPy6rSu8mZAz-toKHoIBXYW3CU,1322
|
|
11
11
|
snowflake/snowpark_connect/resources_initializer.py,sha256=njE4_L82evaCwFYPGBUX6OEVaHqww42oGXXmzUylt0I,4424
|
|
12
|
-
snowflake/snowpark_connect/server.py,sha256=
|
|
12
|
+
snowflake/snowpark_connect/server.py,sha256=uYw0pbpWmgfRkrgI5Q5vFsu3Ng-5_H3SR86KkC13S8M,51108
|
|
13
13
|
snowflake/snowpark_connect/start_server.py,sha256=udegO0rk2FeSnXsIcCIYQW3VRlGDjB_cU8lJ8xSzuM8,942
|
|
14
14
|
snowflake/snowpark_connect/tcm.py,sha256=ftncZFbVO-uyWMhF1_HYKQykB7KobHEYoyQsYbQj1EM,203
|
|
15
15
|
snowflake/snowpark_connect/type_mapping.py,sha256=6Hg-h1iVzVB_FnwG3Sjl-UGr2Itrs4LxVb2Pz5Ue-YA,41566
|
|
16
16
|
snowflake/snowpark_connect/typed_column.py,sha256=Tavii8b4zMj5IWOvN6tlOVmC80W6eQct0pC_tF2nlhU,3867
|
|
17
|
-
snowflake/snowpark_connect/version.py,sha256=
|
|
17
|
+
snowflake/snowpark_connect/version.py,sha256=7e7JVBr1dPQ6_J46i0ARFv1O8q5XD9BxRNnNSMkrI0M,118
|
|
18
18
|
snowflake/snowpark_connect/analyze_plan/__init__.py,sha256=xsIE96jDASko3F-MeNf4T4Gg5ufthS8CejeiJDfri0M,76
|
|
19
19
|
snowflake/snowpark_connect/analyze_plan/map_tree_string.py,sha256=Q3ZD-Z7uForrF7W3mSAjwaiEcIv2KDXr5jPfVbromVg,1470
|
|
20
20
|
snowflake/snowpark_connect/error/__init__.py,sha256=oQo6k4zztLmNF1c5IvJLcS99J6RWY9KBTN3RJ2pKimg,249
|
|
@@ -30,13 +30,13 @@ snowflake/snowpark_connect/expression/function_defaults.py,sha256=WEnzc_uzZZltcf
|
|
|
30
30
|
snowflake/snowpark_connect/expression/hybrid_column_map.py,sha256=2jItaXnFnqcaOIiHKxscFLj7hi2zQE7F3xcher8Zo2U,7614
|
|
31
31
|
snowflake/snowpark_connect/expression/literal.py,sha256=wk5NnLR85SLHe7GoAvSzMTW0B-3yhAiRz4F5SfU2REs,4459
|
|
32
32
|
snowflake/snowpark_connect/expression/map_cast.py,sha256=uxNukg9op0i_sKLhY43fJJJ2-SKZ-8wkRMRBiPikJ0c,14799
|
|
33
|
-
snowflake/snowpark_connect/expression/map_expression.py,sha256=
|
|
33
|
+
snowflake/snowpark_connect/expression/map_expression.py,sha256=Jj8boZ3rSypGM26_g5yL4DJ1z5L_p70_-E5XNriLZN0,14949
|
|
34
34
|
snowflake/snowpark_connect/expression/map_extension.py,sha256=Qm1Jn3JunswD_hO7ornvKpR6WcM3UKO_JpZE6ovH3VM,4939
|
|
35
35
|
snowflake/snowpark_connect/expression/map_sql_expression.py,sha256=bHl7_YrKnWy0k1gMz02HYF8lTZKoamQdv5nFCHEfbzI,25824
|
|
36
36
|
snowflake/snowpark_connect/expression/map_udf.py,sha256=_om_Kqxm_sm3RhwP4DZbGPUpArX90MoJQm-KwEWrTiQ,8034
|
|
37
|
-
snowflake/snowpark_connect/expression/map_unresolved_attribute.py,sha256=
|
|
37
|
+
snowflake/snowpark_connect/expression/map_unresolved_attribute.py,sha256=fnkCQF-Ts-Dw93AlwjzQdd9BAR5J4uldML8dZ3J_RMI,19576
|
|
38
38
|
snowflake/snowpark_connect/expression/map_unresolved_extract_value.py,sha256=A-m-RczZW6xHMjgYR5RV_vzMTpNBRoH3Tk_A1V8z_pk,5382
|
|
39
|
-
snowflake/snowpark_connect/expression/map_unresolved_function.py,sha256=
|
|
39
|
+
snowflake/snowpark_connect/expression/map_unresolved_function.py,sha256=ZnltrpyzLNn4eu0Eg7axYZVsixt9wTtA0JrAVSh9Ojw,477459
|
|
40
40
|
snowflake/snowpark_connect/expression/map_unresolved_star.py,sha256=XNJurGS5RLAaIAcalGGBbPJujqg8YZru_RsoB61m0WQ,8865
|
|
41
41
|
snowflake/snowpark_connect/expression/map_update_fields.py,sha256=bMuZjCOE5wgrZv3ApsoiwEUv-lPgnMWUk-DFX11QuX0,6710
|
|
42
42
|
snowflake/snowpark_connect/expression/map_window_function.py,sha256=apJxtWSnLogMG0F_En67JoGrGRGYQ0I8O0xPRk4TR_o,12152
|
|
@@ -401,15 +401,15 @@ snowflake/snowpark_connect/relation/map_aggregate.py,sha256=KElVYO62V3jkU8ldTCfT
|
|
|
401
401
|
snowflake/snowpark_connect/relation/map_catalog.py,sha256=mcx6An4fqHAxy2OhOC66Xe_0aRtYPDGkBEgMK5CfaXU,5822
|
|
402
402
|
snowflake/snowpark_connect/relation/map_column_ops.py,sha256=grleA0S6jyia6T5IGbPOEdz0zI0TkkV7jENzlfzoQlQ,49017
|
|
403
403
|
snowflake/snowpark_connect/relation/map_crosstab.py,sha256=H_J8-IARK6zMEUFrOjKif1St6M20gvBAnP0EuArFHGg,2422
|
|
404
|
-
snowflake/snowpark_connect/relation/map_extension.py,sha256=
|
|
404
|
+
snowflake/snowpark_connect/relation/map_extension.py,sha256=aZUKnOhpVUrgt8-hRbP7JdVfA453vg9YaWaVKQVEsC4,22271
|
|
405
405
|
snowflake/snowpark_connect/relation/map_join.py,sha256=oINd7AMa_O0bLPqsFFeKnZfE0anKa2RAR9QYh3qdbbY,25042
|
|
406
406
|
snowflake/snowpark_connect/relation/map_local_relation.py,sha256=VBfwBT75GQUe01UOZptwcYsI7zZxaIMQyTOG6kmVuJ0,15219
|
|
407
|
-
snowflake/snowpark_connect/relation/map_map_partitions.py,sha256=
|
|
408
|
-
snowflake/snowpark_connect/relation/map_relation.py,sha256=
|
|
407
|
+
snowflake/snowpark_connect/relation/map_map_partitions.py,sha256=JGiPnqmFFNdHAx54dHyOUwo9JZzyggDd1yLBmu-hasQ,4307
|
|
408
|
+
snowflake/snowpark_connect/relation/map_relation.py,sha256=LaISHm5KrLFb4VSgTT2n-9VL4LB21gaN9xl38qzZmT0,12541
|
|
409
409
|
snowflake/snowpark_connect/relation/map_row_ops.py,sha256=x1Jqircy4I0iiSljx3zbq0YxwGvGzPcXIY8_nhtl2PM,30528
|
|
410
410
|
snowflake/snowpark_connect/relation/map_sample_by.py,sha256=8ALQbeUsB89sI3uiUFqG3w1A4TtOzOAL4umdKp6-c38,1530
|
|
411
|
-
snowflake/snowpark_connect/relation/map_show_string.py,sha256=
|
|
412
|
-
snowflake/snowpark_connect/relation/map_sql.py,sha256=
|
|
411
|
+
snowflake/snowpark_connect/relation/map_show_string.py,sha256=GgKg0qp1pGqSC7TuFedTU4IYaIm-Fx23OJ1LfkcGOHw,3382
|
|
412
|
+
snowflake/snowpark_connect/relation/map_sql.py,sha256=BcgZMiyxDzy5cH8in_-O7819b3qGGaDxYNl0C_6Eqec,86496
|
|
413
413
|
snowflake/snowpark_connect/relation/map_stats.py,sha256=kqRYvix8RfluTKx1cAy9JhBUv6arYQHgfxpP1R4QwBM,13985
|
|
414
414
|
snowflake/snowpark_connect/relation/map_subquery_alias.py,sha256=rHgE9XUzuWWkjNPtJz3Sxzz2aFo690paHKZh9frqPXk,1456
|
|
415
415
|
snowflake/snowpark_connect/relation/map_udtf.py,sha256=cfDnbZ3TRJ6eb0EVResu6GL-OwQpaEabWLbrhgWnkRw,13316
|
|
@@ -433,21 +433,21 @@ snowflake/snowpark_connect/relation/read/reader_config.py,sha256=PMh1R5IjqqTwiAA
|
|
|
433
433
|
snowflake/snowpark_connect/relation/read/utils.py,sha256=rIIM6d2WXHh7MLGyHNiRc9tS8b0dmyFQr7rHepIYJOU,4111
|
|
434
434
|
snowflake/snowpark_connect/relation/write/__init__.py,sha256=xsIE96jDASko3F-MeNf4T4Gg5ufthS8CejeiJDfri0M,76
|
|
435
435
|
snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py,sha256=GI9FyGZuQQNV-6Q8Ob-Xr0im3iAPdH-Jkyx8bjwbOuE,11931
|
|
436
|
-
snowflake/snowpark_connect/relation/write/map_write.py,sha256=
|
|
436
|
+
snowflake/snowpark_connect/relation/write/map_write.py,sha256=02e5JkrS6yr8KYGyGehsSfLZLCNDJHUg8tAVkCUAEgk,40376
|
|
437
437
|
snowflake/snowpark_connect/relation/write/map_write_jdbc.py,sha256=1nOWRgjtZzfRwnSRGFP9V6mqBVlGhSBr2KHGHbe4JMU,1404
|
|
438
438
|
snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar,sha256=tVyOp6tXxu9nm6SDufwQiGzfH3pnuh_7PowsMZxOolY,9773
|
|
439
439
|
snowflake/snowpark_connect/utils/__init__.py,sha256=xsIE96jDASko3F-MeNf4T4Gg5ufthS8CejeiJDfri0M,76
|
|
440
|
-
snowflake/snowpark_connect/utils/artifacts.py,sha256=
|
|
440
|
+
snowflake/snowpark_connect/utils/artifacts.py,sha256=TkHZ2uNfZiphgtG91V1_c_h9yP9dP677BXUMymboCss,2498
|
|
441
441
|
snowflake/snowpark_connect/utils/cache.py,sha256=bAyoNBW6Z1ui9BuppDywbQeG6fdju4L-owFHzySOTnk,3382
|
|
442
442
|
snowflake/snowpark_connect/utils/concurrent.py,sha256=2UrQfJPWFrtpx131514t-9aXFDphbGI3N2pumLrltNk,3543
|
|
443
|
-
snowflake/snowpark_connect/utils/context.py,sha256=
|
|
443
|
+
snowflake/snowpark_connect/utils/context.py,sha256=W9j9eC-lbGp7tfXWhnvI88CVOcLGspYEhEgxGPYVbYE,13288
|
|
444
444
|
snowflake/snowpark_connect/utils/describe_query_cache.py,sha256=2VcPgGP9bUpdIhnN2s_MOG8oGHKX0hS0rT7Y26MJb3A,9001
|
|
445
445
|
snowflake/snowpark_connect/utils/env_utils.py,sha256=g__Uio5ae20Tm1evahIHdJUXQYPmjNUT_kYPSIy5JDU,1488
|
|
446
446
|
snowflake/snowpark_connect/utils/external_udxf_cache.py,sha256=eSZHMbjTxnkg78IlbG5P1Vno6j5ag_FSI0c4Xi2UyPs,1044
|
|
447
447
|
snowflake/snowpark_connect/utils/identifiers.py,sha256=YgtVIQGuUnnTiNdtRficdBwUICWaWkJltjOPnTnfrak,3881
|
|
448
448
|
snowflake/snowpark_connect/utils/interrupt.py,sha256=_awhdrzF1KQO-EQThneEcfMg3Zxed4p3HtMpkcAb6ek,2790
|
|
449
449
|
snowflake/snowpark_connect/utils/io_utils.py,sha256=noBlKpJvzEA6iwLjFgBVGlCLlzjZ16-w0fsGimTyBAQ,1039
|
|
450
|
-
snowflake/snowpark_connect/utils/pandas_udtf_utils.py,sha256=
|
|
450
|
+
snowflake/snowpark_connect/utils/pandas_udtf_utils.py,sha256=3WA_9IVRZL8fnwIHo048LTg62-bPGfCDUZzYd-zjzQQ,7564
|
|
451
451
|
snowflake/snowpark_connect/utils/profiling.py,sha256=ttdHzQUYarvTqJASLNuKFIax7ejO39Tv1mHKl0QjRkg,1519
|
|
452
452
|
snowflake/snowpark_connect/utils/scala_udf_utils.py,sha256=YDHmhqsjgBeuKdv1TaInEUUOvrnbrU3_toQY_MfEygk,22895
|
|
453
453
|
snowflake/snowpark_connect/utils/session.py,sha256=dxt5VoqsQqBZd3OaEpoPfA0U-uNFWDP-HWtrcoey7_w,7770
|
|
@@ -455,8 +455,8 @@ snowflake/snowpark_connect/utils/snowpark_connect_logging.py,sha256=23bvbALGqixJ
|
|
|
455
455
|
snowflake/snowpark_connect/utils/telemetry.py,sha256=BRDX1eRXcww2SAiBlVr2fQVR2I6gH50qSCFkOop_x4M,20736
|
|
456
456
|
snowflake/snowpark_connect/utils/udf_cache.py,sha256=8K7kASEhvpnp-l1hjzovjyboUzKctDq7PiGXRcNv6Lg,12125
|
|
457
457
|
snowflake/snowpark_connect/utils/udf_helper.py,sha256=g-TxTs4ARyJWYgADrosfQQG-ykBBQdm1g5opslxJq_E,12563
|
|
458
|
-
snowflake/snowpark_connect/utils/udf_utils.py,sha256=
|
|
459
|
-
snowflake/snowpark_connect/utils/udtf_helper.py,sha256=
|
|
458
|
+
snowflake/snowpark_connect/utils/udf_utils.py,sha256=pxERcJKum2M5jHxPqsl1NgHFAqZV4RxoEnSLxJV5ups,12009
|
|
459
|
+
snowflake/snowpark_connect/utils/udtf_helper.py,sha256=9B_1iOckfFXQfVv-UHerIJ32fDd4qucKaHGqxtBEi4w,14969
|
|
460
460
|
snowflake/snowpark_connect/utils/udtf_utils.py,sha256=wHO5V0BXRQOLqAYos1vGt8bbdH7jBvD2gwspWywjTtY,33110
|
|
461
461
|
snowflake/snowpark_connect/utils/udxf_import_utils.py,sha256=pPtcaGsyh0tUdy0aAvNqTj04jqPKlEcGmvaZDP9O8Gc,536
|
|
462
462
|
snowflake/snowpark_connect/utils/xxhash64.py,sha256=ysJRxhBPf25LeNhM1RK_H36MWl6q6C6vBRHa-jIna_A,7477
|
|
@@ -465,17 +465,17 @@ snowflake/snowpark_decoder/dp_session.py,sha256=HIr3TfKgYl5zqaGR5xpFU9ZVkcaTB9I8
|
|
|
465
465
|
snowflake/snowpark_decoder/spark_decoder.py,sha256=EQiCvBiqB736Bc17o3gnYGtcYVcyfGxroO5e1kbe1Co,2885
|
|
466
466
|
snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py,sha256=2eSDqeyfMvmIJ6_rF663DrEe1dg_anrP4OpVJNTJHaQ,2598
|
|
467
467
|
snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi,sha256=aIH23k52bXdw5vO3RtM5UcOjDPaWsJFx1SRUSk3qOK8,6142
|
|
468
|
-
snowpark_connect-0.
|
|
469
|
-
snowpark_connect-0.
|
|
470
|
-
snowpark_connect-0.
|
|
471
|
-
snowpark_connect-0.
|
|
472
|
-
snowpark_connect-0.
|
|
473
|
-
snowpark_connect-0.
|
|
468
|
+
snowpark_connect-0.27.0.data/scripts/snowpark-connect,sha256=yZ94KqbWACxnwV8mpg8NjILvvRNjnF8B3cs3ZFNuIM4,1546
|
|
469
|
+
snowpark_connect-0.27.0.data/scripts/snowpark-session,sha256=NMAHSonTo-nmOZSkQNlszUC0jLJ8QWEDUsUmMe2UAOw,190
|
|
470
|
+
snowpark_connect-0.27.0.data/scripts/snowpark-submit,sha256=Zd98H9W_d0dIqMSkQLdHyW5G3myxF0t4c3vNBt2nD6A,12056
|
|
471
|
+
snowpark_connect-0.27.0.dist-info/licenses/LICENSE-binary,sha256=fmBlX39HwTlBUyiKEznaLZGuxQy-7ndLLG_rTXjF02Y,22916
|
|
472
|
+
snowpark_connect-0.27.0.dist-info/licenses/LICENSE.txt,sha256=Ff9cPv4xu0z7bnMTHzo4vDncOShsy33w4oJMA2xjn6c,11365
|
|
473
|
+
snowpark_connect-0.27.0.dist-info/licenses/NOTICE-binary,sha256=elMF8brgGNJwOz8YdorzBF6-U8ZhR8F-77FfGkZng7U,57843
|
|
474
474
|
spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
475
475
|
spark/connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
476
476
|
spark/connect/envelope_pb2.py,sha256=7Gc6OUA3vaCuTCIKamb_Iiw7W9jPTcWNEv1im20eWHM,2726
|
|
477
477
|
spark/connect/envelope_pb2.pyi,sha256=VXTJSPpcxzB_dWqVdvPY4KkPhJfh0WmkX7SNHWoLhx0,3358
|
|
478
|
-
snowpark_connect-0.
|
|
479
|
-
snowpark_connect-0.
|
|
480
|
-
snowpark_connect-0.
|
|
481
|
-
snowpark_connect-0.
|
|
478
|
+
snowpark_connect-0.27.0.dist-info/METADATA,sha256=w6hmGXXIiI6a74FuRcX7WiRBQP2ZlTVA_oqU1Lp8P0k,1594
|
|
479
|
+
snowpark_connect-0.27.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
480
|
+
snowpark_connect-0.27.0.dist-info/top_level.txt,sha256=ExnWqVpoTHRG99fu_AxXZVOz8c-De7nNu0yFCGylM8I,16
|
|
481
|
+
snowpark_connect-0.27.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/LICENSE-binary
RENAMED
|
File without changes
|
{snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/LICENSE.txt
RENAMED
|
File without changes
|
{snowpark_connect-0.26.0.dist-info → snowpark_connect-0.27.0.dist-info}/licenses/NOTICE-binary
RENAMED
|
File without changes
|
|
File without changes
|