snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
- snowflake/snowpark_connect/column_name_handler.py +6 -65
- snowflake/snowpark_connect/config.py +47 -17
- snowflake/snowpark_connect/dataframe_container.py +242 -0
- snowflake/snowpark_connect/error/error_utils.py +25 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
- snowflake/snowpark_connect/expression/map_extension.py +2 -1
- snowflake/snowpark_connect/expression/map_udf.py +4 -4
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
- snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
- snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
- snowflake/snowpark_connect/expression/typer.py +6 -6
- snowflake/snowpark_connect/proto/control_pb2.py +17 -16
- snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
- snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
- snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
- snowflake/snowpark_connect/relation/map_catalog.py +2 -2
- snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
- snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
- snowflake/snowpark_connect/relation/map_extension.py +81 -56
- snowflake/snowpark_connect/relation/map_join.py +72 -63
- snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
- snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
- snowflake/snowpark_connect/relation/map_relation.py +22 -16
- snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
- snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
- snowflake/snowpark_connect/relation/map_show_string.py +42 -5
- snowflake/snowpark_connect/relation/map_sql.py +141 -237
- snowflake/snowpark_connect/relation/map_stats.py +88 -39
- snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
- snowflake/snowpark_connect/relation/map_udtf.py +10 -13
- snowflake/snowpark_connect/relation/read/map_read.py +8 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
- snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
- snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
- snowflake/snowpark_connect/relation/utils.py +11 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
- snowflake/snowpark_connect/relation/write/map_write.py +259 -56
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
- snowflake/snowpark_connect/server.py +43 -4
- snowflake/snowpark_connect/type_mapping.py +6 -23
- snowflake/snowpark_connect/utils/cache.py +27 -22
- snowflake/snowpark_connect/utils/context.py +33 -17
- snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
- snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
- snowflake/snowpark_connect/utils/session.py +41 -38
- snowflake/snowpark_connect/utils/telemetry.py +214 -63
- snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/__init__.py +0 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
- snowflake/snowpark_decoder/dp_session.py +111 -0
- snowflake/snowpark_decoder/spark_decoder.py +76 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
- snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
- snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
- spark/__init__.py +0 -0
- spark/connect/__init__.py +0 -0
- spark/connect/envelope_pb2.py +31 -0
- spark/connect/envelope_pb2.pyi +46 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,21 +1,13 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
5
4
|
from collections import Counter
|
|
6
5
|
|
|
7
6
|
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
8
7
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
9
8
|
|
|
10
|
-
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
11
|
-
quote_name_without_upper_casing,
|
|
12
|
-
)
|
|
13
9
|
from snowflake.snowpark_connect.column_name_handler import ColumnNames
|
|
14
|
-
from snowflake.snowpark_connect.config import
|
|
15
|
-
auto_uppercase_ddl,
|
|
16
|
-
global_config,
|
|
17
|
-
sessions_config,
|
|
18
|
-
)
|
|
10
|
+
from snowflake.snowpark_connect.config import global_config, sessions_config
|
|
19
11
|
from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
|
|
20
12
|
from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
|
|
21
13
|
from snowflake.snowpark_connect.expression import map_udf
|
|
@@ -24,24 +16,23 @@ from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
|
24
16
|
from snowflake.snowpark_connect.relation.map_sql import map_sql_to_pandas_df
|
|
25
17
|
from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
|
|
26
18
|
from snowflake.snowpark_connect.utils.context import get_session_id
|
|
19
|
+
from snowflake.snowpark_connect.utils.identifiers import (
|
|
20
|
+
spark_to_sf_single_id,
|
|
21
|
+
spark_to_sf_single_id_with_unquoting,
|
|
22
|
+
)
|
|
27
23
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
28
24
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
29
25
|
SnowparkConnectNotImplementedError,
|
|
30
26
|
)
|
|
31
27
|
|
|
32
28
|
|
|
33
|
-
def _spark_to_snowflake_single_id(name: str) -> str:
|
|
34
|
-
name = quote_name_without_upper_casing(name)
|
|
35
|
-
return name.upper() if auto_uppercase_ddl() else name
|
|
36
|
-
|
|
37
|
-
|
|
38
29
|
def _create_column_rename_map(
|
|
39
30
|
columns: list[ColumnNames], rename_duplicated: bool
|
|
40
31
|
) -> dict:
|
|
41
32
|
if rename_duplicated is False:
|
|
42
33
|
# if we are not renaming duplicated columns, we can just return the original names
|
|
43
34
|
return {
|
|
44
|
-
col.snowpark_name:
|
|
35
|
+
col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
|
|
45
36
|
for col in columns
|
|
46
37
|
}
|
|
47
38
|
|
|
@@ -64,7 +55,7 @@ def _create_column_rename_map(
|
|
|
64
55
|
|
|
65
56
|
if len(renamed_cols) == 0:
|
|
66
57
|
return {
|
|
67
|
-
col.snowpark_name:
|
|
58
|
+
col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
|
|
68
59
|
for col in not_renamed_cols
|
|
69
60
|
}
|
|
70
61
|
|
|
@@ -95,12 +86,9 @@ def map_execution_command(
|
|
|
95
86
|
match request.plan.command.WhichOneof("command_type"):
|
|
96
87
|
case "create_dataframe_view":
|
|
97
88
|
req = request.plan.command.create_dataframe_view
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
input_df, "_column_map"
|
|
102
|
-
), "input_df does not have the _column_map attribute"
|
|
103
|
-
column_map = input_df._column_map
|
|
89
|
+
input_df_container = map_relation(req.input)
|
|
90
|
+
input_df = input_df_container.dataframe
|
|
91
|
+
column_map = input_df_container.column_map
|
|
104
92
|
|
|
105
93
|
session_config = sessions_config[get_session_id()]
|
|
106
94
|
duplicate_column_names_handling_mode = session_config[
|
|
@@ -133,7 +121,9 @@ def map_execution_command(
|
|
|
133
121
|
view_name = [global_config.spark_sql_globalTempDatabase, req.name]
|
|
134
122
|
else:
|
|
135
123
|
view_name = [req.name]
|
|
136
|
-
view_name = [
|
|
124
|
+
view_name = [
|
|
125
|
+
spark_to_sf_single_id_with_unquoting(part) for part in view_name
|
|
126
|
+
]
|
|
137
127
|
|
|
138
128
|
if req.replace:
|
|
139
129
|
input_df.create_or_replace_temp_view(view_name)
|
|
@@ -20,6 +20,7 @@ from snowflake.snowpark._internal.utils import (
|
|
|
20
20
|
create_or_update_statement_params_with_query_tag,
|
|
21
21
|
)
|
|
22
22
|
from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
|
|
23
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
23
24
|
from snowflake.snowpark_connect.execute_plan.utils import (
|
|
24
25
|
arrow_table_to_arrow_bytes,
|
|
25
26
|
pandas_to_arrow_batches_bytes,
|
|
@@ -89,13 +90,16 @@ def to_arrow_batch_iter(result_df: snowpark.DataFrame) -> Iterator[Table]:
|
|
|
89
90
|
def map_execution_root(
|
|
90
91
|
request: proto_base.ExecutePlanRequest,
|
|
91
92
|
) -> Iterator[proto_base.ExecutePlanResponse | QueryResult]:
|
|
92
|
-
|
|
93
|
+
result: DataFrameContainer | pandas.DataFrame = map_relation(request.plan.root)
|
|
94
|
+
if isinstance(result, pandas.DataFrame):
|
|
95
|
+
result_df = result
|
|
96
|
+
else:
|
|
97
|
+
result_df = result.dataframe
|
|
98
|
+
|
|
93
99
|
if isinstance(result_df, snowpark.DataFrame):
|
|
94
100
|
snowpark_schema = result_df.schema
|
|
95
|
-
schema = snowpark_to_proto_type(
|
|
96
|
-
|
|
97
|
-
)
|
|
98
|
-
spark_columns = result_df._column_map.get_spark_columns()
|
|
101
|
+
schema = snowpark_to_proto_type(snowpark_schema, result.column_map, result_df)
|
|
102
|
+
spark_columns = result.column_map.get_spark_columns()
|
|
99
103
|
if tcm.TCM_MODE:
|
|
100
104
|
# TCM result handling:
|
|
101
105
|
# - small result (only one batch): just return the executePlanResponse
|
|
@@ -58,7 +58,8 @@ def map_extension(
|
|
|
58
58
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
59
59
|
|
|
60
60
|
with push_evaluating_sql_scope():
|
|
61
|
-
|
|
61
|
+
df_container = map_relation(extension.subquery_expression.input)
|
|
62
|
+
df = df_container.dataframe
|
|
62
63
|
|
|
63
64
|
queries = df.queries["queries"]
|
|
64
65
|
if len(queries) != 1:
|
|
@@ -13,10 +13,7 @@ from snowflake.snowpark_connect.config import global_config
|
|
|
13
13
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
14
14
|
from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
|
|
15
15
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
16
|
-
from snowflake.snowpark_connect.utils.session import
|
|
17
|
-
get_or_create_snowpark_session,
|
|
18
|
-
get_python_udxf_import_files,
|
|
19
|
-
)
|
|
16
|
+
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
20
17
|
from snowflake.snowpark_connect.utils.udf_helper import (
|
|
21
18
|
SnowparkUDF,
|
|
22
19
|
gen_input_types,
|
|
@@ -28,6 +25,9 @@ from snowflake.snowpark_connect.utils.udf_helper import (
|
|
|
28
25
|
from snowflake.snowpark_connect.utils.udf_utils import (
|
|
29
26
|
ProcessCommonInlineUserDefinedFunction,
|
|
30
27
|
)
|
|
28
|
+
from snowflake.snowpark_connect.utils.udxf_import_utils import (
|
|
29
|
+
get_python_udxf_import_files,
|
|
30
|
+
)
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def process_udf_return_type(
|
|
@@ -17,15 +17,15 @@ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
|
17
17
|
from snowflake.snowpark_connect.config import global_config
|
|
18
18
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
19
19
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
20
|
-
from snowflake.snowpark_connect.utils.attribute_handling import (
|
|
21
|
-
split_fully_qualified_spark_name,
|
|
22
|
-
)
|
|
23
20
|
from snowflake.snowpark_connect.utils.context import (
|
|
24
21
|
get_is_evaluating_sql,
|
|
25
22
|
get_outer_dataframes,
|
|
26
23
|
get_plan_id_map,
|
|
27
24
|
resolve_lca_alias,
|
|
28
25
|
)
|
|
26
|
+
from snowflake.snowpark_connect.utils.identifiers import (
|
|
27
|
+
split_fully_qualified_spark_name,
|
|
28
|
+
)
|
|
29
29
|
|
|
30
30
|
SPARK_QUOTED = re.compile("^(`.*`)$", re.DOTALL)
|
|
31
31
|
|
|
@@ -46,11 +46,12 @@ def map_unresolved_attribute(
|
|
|
46
46
|
|
|
47
47
|
if has_plan_id:
|
|
48
48
|
plan_id = exp.unresolved_attribute.plan_id
|
|
49
|
-
|
|
49
|
+
target_df_container = get_plan_id_map(plan_id)
|
|
50
|
+
target_df = target_df_container.dataframe
|
|
50
51
|
assert (
|
|
51
52
|
target_df is not None
|
|
52
53
|
), f"resolving an attribute of a unresolved dataframe {plan_id}"
|
|
53
|
-
column_mapping =
|
|
54
|
+
column_mapping = target_df_container.column_map
|
|
54
55
|
typer = ExpressionTyper(target_df)
|
|
55
56
|
|
|
56
57
|
def get_col(snowpark_name):
|
|
@@ -146,8 +147,8 @@ def map_unresolved_attribute(
|
|
|
146
147
|
name_parts[0], allow_non_exists=True
|
|
147
148
|
)
|
|
148
149
|
if snowpark_name is None:
|
|
149
|
-
for
|
|
150
|
-
snowpark_name =
|
|
150
|
+
for outer_df_container in get_outer_dataframes():
|
|
151
|
+
snowpark_name = outer_df_container.column_map.get_snowpark_column_name_from_spark_column_name(
|
|
151
152
|
name_parts[0], allow_non_exists=True
|
|
152
153
|
)
|
|
153
154
|
if snowpark_name is not None:
|