snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
- snowflake/snowpark_connect/column_name_handler.py +6 -65
- snowflake/snowpark_connect/config.py +47 -17
- snowflake/snowpark_connect/dataframe_container.py +242 -0
- snowflake/snowpark_connect/error/error_utils.py +25 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
- snowflake/snowpark_connect/expression/map_extension.py +2 -1
- snowflake/snowpark_connect/expression/map_udf.py +4 -4
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
- snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
- snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
- snowflake/snowpark_connect/expression/typer.py +6 -6
- snowflake/snowpark_connect/proto/control_pb2.py +17 -16
- snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
- snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
- snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
- snowflake/snowpark_connect/relation/map_catalog.py +2 -2
- snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
- snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
- snowflake/snowpark_connect/relation/map_extension.py +81 -56
- snowflake/snowpark_connect/relation/map_join.py +72 -63
- snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
- snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
- snowflake/snowpark_connect/relation/map_relation.py +22 -16
- snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
- snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
- snowflake/snowpark_connect/relation/map_show_string.py +42 -5
- snowflake/snowpark_connect/relation/map_sql.py +141 -237
- snowflake/snowpark_connect/relation/map_stats.py +88 -39
- snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
- snowflake/snowpark_connect/relation/map_udtf.py +10 -13
- snowflake/snowpark_connect/relation/read/map_read.py +8 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
- snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
- snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
- snowflake/snowpark_connect/relation/utils.py +11 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
- snowflake/snowpark_connect/relation/write/map_write.py +259 -56
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
- snowflake/snowpark_connect/server.py +43 -4
- snowflake/snowpark_connect/type_mapping.py +6 -23
- snowflake/snowpark_connect/utils/cache.py +27 -22
- snowflake/snowpark_connect/utils/context.py +33 -17
- snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
- snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
- snowflake/snowpark_connect/utils/session.py +41 -38
- snowflake/snowpark_connect/utils/telemetry.py +214 -63
- snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/__init__.py +0 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
- snowflake/snowpark_decoder/dp_session.py +111 -0
- snowflake/snowpark_decoder/spark_decoder.py +76 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
- snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
- snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
- spark/__init__.py +0 -0
- spark/connect/__init__.py +0 -0
- spark/connect/envelope_pb2.py +31 -0
- spark/connect/envelope_pb2.pyi +46 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -11,12 +11,13 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
11
11
|
unquote_if_quoted,
|
|
12
12
|
)
|
|
13
13
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
14
|
-
from snowflake.snowpark_connect.column_name_handler import
|
|
15
|
-
from snowflake.snowpark_connect.config import
|
|
14
|
+
from snowflake.snowpark_connect.column_name_handler import ALREADY_QUOTED
|
|
15
|
+
from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
|
|
16
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
16
17
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
17
18
|
rename_columns_as_snowflake_standard,
|
|
18
19
|
)
|
|
19
|
-
from snowflake.snowpark_connect.utils.
|
|
20
|
+
from snowflake.snowpark_connect.utils.identifiers import (
|
|
20
21
|
split_fully_qualified_spark_name,
|
|
21
22
|
)
|
|
22
23
|
from snowflake.snowpark_connect.utils.session import _get_current_snowpark_session
|
|
@@ -27,9 +28,16 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
27
28
|
|
|
28
29
|
def post_process_df(
|
|
29
30
|
df: snowpark.DataFrame, plan_id: int, source_table_name: str = None
|
|
30
|
-
) ->
|
|
31
|
+
) -> DataFrameContainer:
|
|
32
|
+
def _lower_or_unquote(string):
|
|
33
|
+
return (
|
|
34
|
+
string[1:-1].replace('""', '"')
|
|
35
|
+
if ALREADY_QUOTED.match(string)
|
|
36
|
+
else string.lower()
|
|
37
|
+
)
|
|
38
|
+
|
|
31
39
|
try:
|
|
32
|
-
true_names = list(map(lambda x:
|
|
40
|
+
true_names = list(map(lambda x: _lower_or_unquote(x), df.columns))
|
|
33
41
|
renamed_df, snowpark_column_names = rename_columns_as_snowflake_standard(
|
|
34
42
|
df, plan_id
|
|
35
43
|
)
|
|
@@ -44,11 +52,11 @@ def post_process_df(
|
|
|
44
52
|
if current_schema:
|
|
45
53
|
name_parts = [unquote_if_quoted(current_schema)] + name_parts
|
|
46
54
|
|
|
47
|
-
return
|
|
48
|
-
renamed_df,
|
|
49
|
-
true_names,
|
|
50
|
-
snowpark_column_names,
|
|
51
|
-
[f.datatype for f in df.schema.fields],
|
|
55
|
+
return DataFrameContainer.create_with_column_mapping(
|
|
56
|
+
dataframe=renamed_df,
|
|
57
|
+
spark_column_names=true_names,
|
|
58
|
+
snowpark_column_names=snowpark_column_names,
|
|
59
|
+
snowpark_column_types=[f.datatype for f in df.schema.fields],
|
|
52
60
|
column_qualifiers=[name_parts] * len(true_names)
|
|
53
61
|
if source_table_name
|
|
54
62
|
else None,
|
|
@@ -66,19 +74,18 @@ def post_process_df(
|
|
|
66
74
|
|
|
67
75
|
def get_table_from_name(
|
|
68
76
|
table_name: str, session: snowpark.Session, plan_id: int
|
|
69
|
-
) ->
|
|
77
|
+
) -> DataFrameContainer:
|
|
78
|
+
"""Get table from name returning a container."""
|
|
70
79
|
snowpark_name = ".".join(
|
|
71
80
|
quote_name_without_upper_casing(part)
|
|
72
81
|
for part in split_fully_qualified_spark_name(table_name)
|
|
73
82
|
)
|
|
74
83
|
|
|
75
|
-
if
|
|
84
|
+
if auto_uppercase_non_column_identifiers():
|
|
76
85
|
snowpark_name = snowpark_name.upper()
|
|
77
86
|
|
|
78
87
|
df = session.read.table(snowpark_name)
|
|
79
|
-
|
|
80
|
-
post_processed_df._table_name = table_name
|
|
81
|
-
return post_processed_df
|
|
88
|
+
return post_process_df(df, plan_id, table_name)
|
|
82
89
|
|
|
83
90
|
|
|
84
91
|
def get_table_from_query(
|
|
@@ -88,7 +95,9 @@ def get_table_from_query(
|
|
|
88
95
|
return post_process_df(df, plan_id)
|
|
89
96
|
|
|
90
97
|
|
|
91
|
-
def map_read_table(
|
|
98
|
+
def map_read_table(
|
|
99
|
+
rel: relation_proto.Relation,
|
|
100
|
+
) -> DataFrameContainer:
|
|
92
101
|
"""
|
|
93
102
|
Read a table into a Snowpark DataFrame.
|
|
94
103
|
"""
|
|
@@ -7,7 +7,7 @@ import typing
|
|
|
7
7
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
8
8
|
|
|
9
9
|
from snowflake import snowpark
|
|
10
|
-
from snowflake.snowpark_connect.
|
|
10
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
11
11
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
12
12
|
get_spark_column_names_from_snowpark_columns,
|
|
13
13
|
rename_columns_as_snowflake_standard,
|
|
@@ -71,7 +71,7 @@ def map_read_text(
|
|
|
71
71
|
schema: snowpark.types.StructType | None,
|
|
72
72
|
session: snowpark.Session,
|
|
73
73
|
paths: list[str],
|
|
74
|
-
) ->
|
|
74
|
+
) -> DataFrameContainer:
|
|
75
75
|
"""
|
|
76
76
|
Read a TEXT file into a Snowpark DataFrame.
|
|
77
77
|
"""
|
|
@@ -98,9 +98,9 @@ def map_read_text(
|
|
|
98
98
|
renamed_df, snowpark_column_names = rename_columns_as_snowflake_standard(
|
|
99
99
|
df, rel.common.plan_id
|
|
100
100
|
)
|
|
101
|
-
return
|
|
102
|
-
renamed_df,
|
|
103
|
-
spark_column_names,
|
|
104
|
-
snowpark_column_names,
|
|
105
|
-
[f.datatype for f in df.schema.fields],
|
|
101
|
+
return DataFrameContainer.create_with_column_mapping(
|
|
102
|
+
dataframe=renamed_df,
|
|
103
|
+
spark_column_names=spark_column_names,
|
|
104
|
+
snowpark_column_names=snowpark_column_names,
|
|
105
|
+
snowpark_column_types=[f.datatype for f in df.schema.fields],
|
|
106
106
|
)
|
|
@@ -32,6 +32,7 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
32
32
|
ColumnNameMap,
|
|
33
33
|
make_column_names_snowpark_compatible,
|
|
34
34
|
)
|
|
35
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
35
36
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
36
37
|
|
|
37
38
|
TYPE_MAP_FOR_TO_SCHEMA = {
|
|
@@ -91,7 +92,9 @@ TYPE_MAP_FOR_TO_SCHEMA = {
|
|
|
91
92
|
|
|
92
93
|
|
|
93
94
|
def get_df_with_partition_row_number(
|
|
94
|
-
|
|
95
|
+
container: DataFrameContainer,
|
|
96
|
+
plan_id: int | None,
|
|
97
|
+
row_number_column_name: str,
|
|
95
98
|
) -> snowpark.DataFrame:
|
|
96
99
|
"""
|
|
97
100
|
Add a row number for each row in each partition for the given df, where
|
|
@@ -106,21 +109,24 @@ def get_df_with_partition_row_number(
|
|
|
106
109
|
| c| 4| | c| 4| 0 |
|
|
107
110
|
+---+---+ +---+---+------------+
|
|
108
111
|
"""
|
|
112
|
+
df = container.dataframe
|
|
113
|
+
column_map = container.column_map
|
|
114
|
+
|
|
109
115
|
row_number_snowpark_column_name = make_column_names_snowpark_compatible(
|
|
110
|
-
[row_number_column_name], plan_id, len(
|
|
116
|
+
[row_number_column_name], plan_id, len(column_map.get_spark_columns())
|
|
111
117
|
)[0]
|
|
112
118
|
row_number_snowpark_column = (
|
|
113
119
|
snowpark_fn.row_number()
|
|
114
120
|
.over(
|
|
115
121
|
snowpark.window.Window.partition_by(
|
|
116
|
-
*
|
|
122
|
+
*column_map.get_snowpark_columns()
|
|
117
123
|
).order_by(snowpark_fn.lit(1))
|
|
118
124
|
)
|
|
119
125
|
.alias(row_number_snowpark_column_name)
|
|
120
126
|
)
|
|
121
127
|
|
|
122
128
|
df_with_partition_number = df.select(
|
|
123
|
-
*
|
|
129
|
+
*column_map.get_snowpark_columns(), row_number_snowpark_column
|
|
124
130
|
)
|
|
125
131
|
return df_with_partition_number
|
|
126
132
|
|
|
@@ -197,7 +203,7 @@ def get_semantic_string(rel: relation_proto.Relation) -> str:
|
|
|
197
203
|
"""
|
|
198
204
|
queries = [
|
|
199
205
|
query
|
|
200
|
-
for query_list in map_relation(rel)._plan.execution_queries.values()
|
|
206
|
+
for query_list in map_relation(rel).dataframe._plan.execution_queries.values()
|
|
201
207
|
for query in query_list
|
|
202
208
|
]
|
|
203
209
|
|
|
@@ -10,6 +10,7 @@ import snowflake.snowpark
|
|
|
10
10
|
from snowflake import snowpark
|
|
11
11
|
from snowflake.snowpark import DataFrameWriter
|
|
12
12
|
from snowflake.snowpark.dataframe import DataFrame
|
|
13
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
13
14
|
from snowflake.snowpark_connect.relation.read import jdbc_read_dbapi
|
|
14
15
|
from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDialect
|
|
15
16
|
from snowflake.snowpark_connect.relation.read.utils import Connection
|
|
@@ -36,7 +37,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
36
37
|
|
|
37
38
|
def jdbc_write_dbapi(
|
|
38
39
|
self,
|
|
39
|
-
|
|
40
|
+
container: DataFrameContainer,
|
|
40
41
|
create_connection: Callable[[dict[str, str]], "Connection"],
|
|
41
42
|
close_connection: Callable[[Connection], None],
|
|
42
43
|
table: str,
|
|
@@ -46,6 +47,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
46
47
|
Write a Snowpark Dataframe data into table of a JDBC datasource.
|
|
47
48
|
"""
|
|
48
49
|
|
|
50
|
+
input_df = container.dataframe
|
|
49
51
|
conn = create_connection(self.jdbc_options)
|
|
50
52
|
try:
|
|
51
53
|
url = self.jdbc_options.get("url", None)
|
|
@@ -53,32 +55,32 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
53
55
|
|
|
54
56
|
table_exist = self._does_table_exist(conn, table)
|
|
55
57
|
insert_query = self._generate_insert_query(
|
|
56
|
-
|
|
58
|
+
container,
|
|
57
59
|
table,
|
|
58
60
|
)
|
|
59
61
|
|
|
60
62
|
match write_mode:
|
|
61
63
|
case "append":
|
|
62
64
|
if not table_exist:
|
|
63
|
-
self._create_table(conn, table,
|
|
65
|
+
self._create_table(conn, table, container, jdbc_dialect)
|
|
64
66
|
case "errorifexists":
|
|
65
67
|
if table_exist:
|
|
66
68
|
raise ValueError(
|
|
67
69
|
"table is already exist and write mode is ERROR_IF_EXISTS"
|
|
68
70
|
)
|
|
69
71
|
else:
|
|
70
|
-
self._create_table(conn, table,
|
|
72
|
+
self._create_table(conn, table, container, jdbc_dialect)
|
|
71
73
|
case "overwrite":
|
|
72
74
|
if table_exist:
|
|
73
75
|
self._drop_table(conn, table)
|
|
74
|
-
self._create_table(conn, table,
|
|
76
|
+
self._create_table(conn, table, container, jdbc_dialect)
|
|
75
77
|
case "ignore":
|
|
76
78
|
if table_exist:
|
|
77
79
|
# With Ignore write mode, if table already exists, the save operation is expected
|
|
78
80
|
# to not save the contents of the DataFrame and to not change the existing data.
|
|
79
81
|
return
|
|
80
82
|
else:
|
|
81
|
-
self._create_table(conn, table,
|
|
83
|
+
self._create_table(conn, table, container, jdbc_dialect)
|
|
82
84
|
case _:
|
|
83
85
|
raise ValueError(f"Invalid write mode value{write_mode}")
|
|
84
86
|
|
|
@@ -92,14 +94,14 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
92
94
|
finally:
|
|
93
95
|
close_connection(conn)
|
|
94
96
|
|
|
95
|
-
def _generate_insert_query(self,
|
|
97
|
+
def _generate_insert_query(self, container: DataFrameContainer, table: str) -> str:
|
|
96
98
|
"""
|
|
97
99
|
Generates INSERT statement with placeholders.
|
|
98
|
-
:param
|
|
100
|
+
:param container: Snowpark dataframe container
|
|
99
101
|
:param table: JDBC datasource table name
|
|
100
102
|
:return: INSERT SQL statement
|
|
101
103
|
"""
|
|
102
|
-
true_names =
|
|
104
|
+
true_names = container.column_map.get_spark_columns()
|
|
103
105
|
# quote each column name to match PySpark's case-sensitive column naming behavior.
|
|
104
106
|
quoted_column_names = ",".join([f'"{col}"' for col in true_names])
|
|
105
107
|
place_holders = ",".join(["?"] * len(true_names))
|
|
@@ -145,7 +147,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
145
147
|
self,
|
|
146
148
|
conn: Connection,
|
|
147
149
|
table: str,
|
|
148
|
-
|
|
150
|
+
container,
|
|
149
151
|
jdbc_dialect: JdbcDialect,
|
|
150
152
|
) -> None:
|
|
151
153
|
"""
|
|
@@ -154,14 +156,15 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
154
156
|
|
|
155
157
|
:param conn: A Python DBAPI connection over JDBC connection
|
|
156
158
|
:param table: DBC datasource table name
|
|
157
|
-
:param
|
|
159
|
+
:param container: Snowpark dataframe container
|
|
158
160
|
:param jdbc_dialect: JDBC specific dialect
|
|
159
161
|
:return: None
|
|
160
162
|
"""
|
|
163
|
+
input_df = container.dataframe
|
|
161
164
|
columns_str = ""
|
|
162
165
|
fields = input_df.schema.fields
|
|
163
166
|
total_columns = len(fields)
|
|
164
|
-
column_map =
|
|
167
|
+
column_map = container.column_map
|
|
165
168
|
|
|
166
169
|
column_index = 0
|
|
167
170
|
for field in fields:
|