snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +680 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +237 -23
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +123 -5
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +85 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +110 -48
- snowflake/snowpark_connect/server.py +546 -456
- snowflake/snowpark_connect/server_common/__init__.py +500 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +187 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +163 -22
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -9,8 +9,11 @@ import snowflake.snowpark.functions as snowpark_fn
|
|
|
9
9
|
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
10
10
|
quote_name_without_upper_casing,
|
|
11
11
|
)
|
|
12
|
-
from snowflake.snowpark.types import StructType
|
|
12
|
+
from snowflake.snowpark.types import StringType, StructType, VariantType
|
|
13
13
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
14
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
15
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
16
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
17
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
15
18
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
16
19
|
from snowflake.snowpark_connect.utils.context import get_outer_dataframes
|
|
@@ -26,7 +29,7 @@ def check_struct_and_get_field_datatype(field_name, schema):
|
|
|
26
29
|
else:
|
|
27
30
|
return None
|
|
28
31
|
else:
|
|
29
|
-
None
|
|
32
|
+
return None
|
|
30
33
|
|
|
31
34
|
|
|
32
35
|
def map_unresolved_star(
|
|
@@ -34,7 +37,6 @@ def map_unresolved_star(
|
|
|
34
37
|
column_mapping: ColumnNameMap,
|
|
35
38
|
typer: ExpressionTyper,
|
|
36
39
|
) -> tuple[list[str], TypedColumn]:
|
|
37
|
-
|
|
38
40
|
if exp.unresolved_star.HasField("unparsed_target"):
|
|
39
41
|
unparsed_target = exp.unresolved_star.unparsed_target
|
|
40
42
|
name_parts = split_fully_qualified_spark_name(unparsed_target)
|
|
@@ -54,16 +56,17 @@ def map_unresolved_star(
|
|
|
54
56
|
return spark_names, typed_column
|
|
55
57
|
|
|
56
58
|
# scenario where it is expanding * to mulitple columns
|
|
57
|
-
spark_names = []
|
|
58
|
-
snowpark_names = []
|
|
59
|
-
qualifiers = []
|
|
59
|
+
spark_names: list[str] = []
|
|
60
|
+
snowpark_names: list[str] = []
|
|
61
|
+
qualifiers: list[set[ColumnQualifier]] = []
|
|
60
62
|
|
|
63
|
+
target_qualifier = ColumnQualifier(tuple(name_parts[:-1]))
|
|
61
64
|
(
|
|
62
65
|
spark_names,
|
|
63
66
|
snowpark_names,
|
|
64
67
|
qualifiers,
|
|
65
68
|
) = column_mapping.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
66
|
-
|
|
69
|
+
target_qualifier
|
|
67
70
|
)
|
|
68
71
|
|
|
69
72
|
if len(spark_names) == 0:
|
|
@@ -74,7 +77,7 @@ def map_unresolved_star(
|
|
|
74
77
|
snowpark_names,
|
|
75
78
|
qualifiers,
|
|
76
79
|
) = column_mapping_for_outer_df.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
77
|
-
|
|
80
|
+
target_qualifier
|
|
78
81
|
)
|
|
79
82
|
if len(spark_names) > 0:
|
|
80
83
|
break
|
|
@@ -103,7 +106,7 @@ def map_unresolved_star(
|
|
|
103
106
|
prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
|
|
104
107
|
prefix_candidate = (
|
|
105
108
|
column_mapping.get_snowpark_column_name_from_spark_column_name(
|
|
106
|
-
prefix_candidate_str, allow_non_exists=True
|
|
109
|
+
prefix_candidate_str, allow_non_exists=True
|
|
107
110
|
)
|
|
108
111
|
)
|
|
109
112
|
if prefix_candidate is None:
|
|
@@ -140,29 +143,86 @@ def map_unresolved_star(
|
|
|
140
143
|
final_sql_expr,
|
|
141
144
|
lambda final_sql_expr=final_sql_expr: typer.type(final_sql_expr),
|
|
142
145
|
)
|
|
143
|
-
typed_column.set_multi_col_qualifiers([
|
|
146
|
+
typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
|
|
144
147
|
return spark_names, typed_column
|
|
145
148
|
else:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
)
|
|
149
|
+
snowpark_columns = column_mapping.get_snowpark_columns()
|
|
150
|
+
result_exp = snowpark_fn.sql_expr(", ".join(snowpark_columns))
|
|
149
151
|
spark_names = column_mapping.get_spark_columns()
|
|
150
152
|
typed_column = TypedColumn(
|
|
151
|
-
result_exp,
|
|
153
|
+
result_exp,
|
|
154
|
+
lambda: [f.datatype for f in typer.df.schema if f.name in snowpark_columns],
|
|
152
155
|
)
|
|
153
156
|
typed_column.set_multi_col_qualifiers(column_mapping.get_qualifiers())
|
|
154
157
|
return spark_names, typed_column
|
|
155
158
|
|
|
156
|
-
|
|
159
|
+
exception = AnalysisException(
|
|
157
160
|
f"[UNRESOLVED_STAR] The unresolved star expression {exp} is not supported."
|
|
158
161
|
)
|
|
162
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
163
|
+
raise exception
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def map_unresolved_star_as_single_column(
|
|
167
|
+
exp: expressions_proto.Expression,
|
|
168
|
+
column_mapping: ColumnNameMap,
|
|
169
|
+
typer: ExpressionTyper,
|
|
170
|
+
) -> tuple[str, TypedColumn]:
|
|
171
|
+
"""
|
|
172
|
+
Similar to map_unresolved_star but returns a single tuple containing
|
|
173
|
+
a combined spark column name and a TypedColumn representing a struct instead of many columns. .
|
|
174
|
+
If star resolves to single column, it works the same.
|
|
175
|
+
"""
|
|
176
|
+
if exp.unresolved_star.HasField("unparsed_target"):
|
|
177
|
+
names, tc = map_unresolved_star(exp, column_mapping, typer)
|
|
178
|
+
assert len(names) == 1, "Expected single column"
|
|
179
|
+
return names[0], tc
|
|
180
|
+
else:
|
|
181
|
+
snowpark_columns = column_mapping.get_snowpark_columns()
|
|
182
|
+
spark_names = column_mapping.get_spark_columns()
|
|
183
|
+
|
|
184
|
+
if len(spark_names) == 1:
|
|
185
|
+
names, tc = map_unresolved_star(exp, column_mapping, typer)
|
|
186
|
+
return names[0], tc
|
|
187
|
+
|
|
188
|
+
fields_cols = [
|
|
189
|
+
(
|
|
190
|
+
spark_name,
|
|
191
|
+
TypedColumn(
|
|
192
|
+
snowpark_fn.sql_expr(snowpark_name),
|
|
193
|
+
lambda snowpark_name=snowpark_name: typer.type(
|
|
194
|
+
snowpark_fn.sql_expr(snowpark_name)
|
|
195
|
+
),
|
|
196
|
+
),
|
|
197
|
+
)
|
|
198
|
+
for spark_name, snowpark_name in zip(spark_names, snowpark_columns)
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
result_exp = snowpark_fn.object_construct_keep_null(
|
|
202
|
+
*[
|
|
203
|
+
name_with_col
|
|
204
|
+
for name, typed_col in fields_cols
|
|
205
|
+
for name_with_col in (
|
|
206
|
+
snowpark_fn.lit(name),
|
|
207
|
+
typed_col.column(to_semi_structure=True),
|
|
208
|
+
)
|
|
209
|
+
]
|
|
210
|
+
).cast(VariantType())
|
|
211
|
+
|
|
212
|
+
combined_spark_name = "value"
|
|
213
|
+
typed_column = TypedColumn(
|
|
214
|
+
result_exp,
|
|
215
|
+
lambda: [VariantType()],
|
|
216
|
+
)
|
|
217
|
+
typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
|
|
218
|
+
return combined_spark_name, typed_column
|
|
159
219
|
|
|
160
220
|
|
|
161
221
|
def map_unresolved_star_struct(
|
|
162
222
|
exp: expressions_proto.Expression,
|
|
163
223
|
column_mapping: ColumnNameMap,
|
|
164
224
|
typer: ExpressionTyper,
|
|
165
|
-
) -> tuple[list[str], list]:
|
|
225
|
+
) -> tuple[list[str], list[TypedColumn]]:
|
|
166
226
|
unparsed_target = exp.unresolved_star.unparsed_target
|
|
167
227
|
name_parts = split_fully_qualified_spark_name(unparsed_target)
|
|
168
228
|
|
|
@@ -170,7 +230,7 @@ def map_unresolved_star_struct(
|
|
|
170
230
|
len(name_parts) > 1 and name_parts[-1] == "*"
|
|
171
231
|
), f"Unable to parse unparsed_target {unparsed_target}"
|
|
172
232
|
|
|
173
|
-
expanded_args = []
|
|
233
|
+
expanded_args: list[TypedColumn] = []
|
|
174
234
|
for i in range(0, len(name_parts) - 1):
|
|
175
235
|
if i == 0:
|
|
176
236
|
prefix_candidate_str = name_parts[i]
|
|
@@ -181,7 +241,7 @@ def map_unresolved_star_struct(
|
|
|
181
241
|
prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
|
|
182
242
|
prefix_candidate = (
|
|
183
243
|
column_mapping.get_snowpark_column_name_from_spark_column_name(
|
|
184
|
-
prefix_candidate_str, allow_non_exists=True
|
|
244
|
+
prefix_candidate_str, allow_non_exists=True
|
|
185
245
|
)
|
|
186
246
|
)
|
|
187
247
|
if prefix_candidate is None:
|
|
@@ -207,13 +267,17 @@ def map_unresolved_star_struct(
|
|
|
207
267
|
if prefix_candidate is None:
|
|
208
268
|
continue
|
|
209
269
|
|
|
210
|
-
spark_names = candidate_leaf_field.names
|
|
270
|
+
spark_names: list[str] = candidate_leaf_field.names
|
|
211
271
|
prefix_candidate = ":".join(fields)
|
|
212
272
|
|
|
213
|
-
for
|
|
214
|
-
|
|
215
|
-
|
|
273
|
+
for struct_field in candidate_leaf_field.fields:
|
|
274
|
+
lit_col = snowpark_fn.lit(struct_field.name)
|
|
275
|
+
expanded_args.append(TypedColumn(lit_col, lambda: [StringType()]))
|
|
276
|
+
field_snowpark_name = f"{prefix_candidate}:{struct_field.name}"
|
|
216
277
|
field_col = snowpark_fn.sql_expr(field_snowpark_name)
|
|
217
|
-
|
|
278
|
+
field_type = struct_field.datatype
|
|
279
|
+
expanded_args.append(
|
|
280
|
+
TypedColumn(field_col, lambda field_type=field_type: [field_type])
|
|
281
|
+
)
|
|
218
282
|
|
|
219
283
|
return spark_names, expanded_args
|
|
@@ -6,8 +6,17 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
|
6
6
|
from pyspark.errors.exceptions.base import AnalysisException
|
|
7
7
|
|
|
8
8
|
import snowflake.snowpark.functions as snowpark_fn
|
|
9
|
-
from snowflake.snowpark.types import
|
|
9
|
+
from snowflake.snowpark.types import (
|
|
10
|
+
DataType,
|
|
11
|
+
MapType,
|
|
12
|
+
StringType,
|
|
13
|
+
StructField,
|
|
14
|
+
StructType,
|
|
15
|
+
VariantType,
|
|
16
|
+
)
|
|
10
17
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
18
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
19
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
20
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
12
21
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
13
22
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
@@ -39,9 +48,11 @@ def update_field_in_schema(
|
|
|
39
48
|
field.name, updated_subschema, field.nullable, _is_column=False
|
|
40
49
|
)
|
|
41
50
|
else:
|
|
42
|
-
|
|
51
|
+
exception = AnalysisException(
|
|
43
52
|
message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}` in `{field}`"
|
|
44
53
|
)
|
|
54
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
55
|
+
raise exception
|
|
45
56
|
field_updated = True
|
|
46
57
|
else:
|
|
47
58
|
new_field = field # leave unchanged
|
|
@@ -59,9 +70,11 @@ def update_field_in_schema(
|
|
|
59
70
|
# if the value type is None that means we want to drop the field and spark does not throw an error if the field does not exists
|
|
60
71
|
# but if the value type is not None, it means we should add or update this field which has already been covered above
|
|
61
72
|
# if we reach this code, it means the field should have existed
|
|
62
|
-
|
|
73
|
+
exception = AnalysisException(
|
|
63
74
|
message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}`"
|
|
64
75
|
)
|
|
76
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
77
|
+
raise exception
|
|
65
78
|
return StructType(new_fields)
|
|
66
79
|
|
|
67
80
|
|
|
@@ -99,13 +112,16 @@ def map_update_fields(
|
|
|
99
112
|
)
|
|
100
113
|
|
|
101
114
|
if not isinstance(struct_typed_column.typ, StructType):
|
|
102
|
-
|
|
115
|
+
exception = AnalysisException(
|
|
103
116
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "update_fields({struct_name}, ...)" due to data type mismatch: Parameter 1 requires the "STRUCT" type'
|
|
104
117
|
)
|
|
118
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
119
|
+
raise exception
|
|
105
120
|
|
|
106
121
|
final_schema = struct_typed_column.typ
|
|
107
122
|
value_column_list = []
|
|
108
|
-
|
|
123
|
+
# Snowflake UDFs don't support StructType/MapType, convert to VariantType
|
|
124
|
+
input_types_to_the_udf = [VariantType()]
|
|
109
125
|
update_operation_strs = []
|
|
110
126
|
array_of_named_parts = []
|
|
111
127
|
for field_expression, value_expression in zip(field_expressions, value_expressions):
|
|
@@ -129,7 +145,11 @@ def map_update_fields(
|
|
|
129
145
|
)
|
|
130
146
|
update_operation_strs.append(f"WithField({value_spark_name})")
|
|
131
147
|
value_column_list.append(value_typed_column.col)
|
|
132
|
-
|
|
148
|
+
# Convert StructType/MapType to VariantType for Snowflake UDFs (ArrayType is supported)
|
|
149
|
+
if isinstance(value_typed_column.typ, (StructType, MapType)):
|
|
150
|
+
input_types_to_the_udf.append(VariantType())
|
|
151
|
+
else:
|
|
152
|
+
input_types_to_the_udf.append(value_typed_column.typ)
|
|
133
153
|
|
|
134
154
|
array_of_named_parts.append(name_parts)
|
|
135
155
|
|
|
@@ -137,28 +157,60 @@ def map_update_fields(
|
|
|
137
157
|
final_name = f"update_fields({struct_name}, {update_operations_str})"
|
|
138
158
|
|
|
139
159
|
if len(final_schema.fields) == 0:
|
|
140
|
-
|
|
160
|
+
exception = AnalysisException(
|
|
141
161
|
f'[DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "{final_name}" due to data type mismatch: Cannot drop all fields in struct.'
|
|
142
162
|
)
|
|
163
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
164
|
+
raise exception
|
|
143
165
|
|
|
166
|
+
# Snowflake UDFs don't support StructType, so we use VariantType
|
|
167
|
+
# The result will be automatically cast back to the struct type
|
|
144
168
|
@snowpark_fn.udf(
|
|
145
169
|
input_types=input_types_to_the_udf,
|
|
146
|
-
return_type=
|
|
170
|
+
return_type=VariantType(),
|
|
147
171
|
)
|
|
148
172
|
def _update(dictionary, *array_of_value):
|
|
173
|
+
if dictionary is None:
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
# Recursively copy to create mutable dict from Snowflake's VARIANT objects
|
|
177
|
+
def make_mutable_copy(obj):
|
|
178
|
+
if obj is None:
|
|
179
|
+
return None
|
|
180
|
+
elif isinstance(obj, dict):
|
|
181
|
+
return {k: make_mutable_copy(v) for k, v in obj.items()}
|
|
182
|
+
elif isinstance(obj, (list, tuple)):
|
|
183
|
+
return [make_mutable_copy(item) for item in obj]
|
|
184
|
+
else:
|
|
185
|
+
return obj
|
|
186
|
+
|
|
187
|
+
result = make_mutable_copy(dictionary)
|
|
188
|
+
|
|
149
189
|
for fields_array, value in zip(array_of_named_parts, array_of_value):
|
|
150
|
-
current =
|
|
190
|
+
current = result
|
|
151
191
|
for k in fields_array[:-1]:
|
|
152
192
|
current = current.get(k)
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
else:
|
|
156
|
-
current[fields_array[-1]] = value
|
|
157
|
-
return dictionary
|
|
193
|
+
if current is None:
|
|
194
|
+
break
|
|
158
195
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
196
|
+
if current is not None and isinstance(current, dict):
|
|
197
|
+
if value == "_SNOWPARK_CONNECT_UPDATE_FIELD_DROP_":
|
|
198
|
+
current.pop(fields_array[-1], None)
|
|
199
|
+
else:
|
|
200
|
+
current[fields_array[-1]] = value
|
|
201
|
+
|
|
202
|
+
return result
|
|
203
|
+
|
|
204
|
+
# Cast inputs to VARIANT (Snowflake UDFs don't support complex types directly)
|
|
205
|
+
struct_as_variant = struct_typed_column.col.cast(VariantType())
|
|
206
|
+
variant_value_list = [
|
|
207
|
+
col.cast(VariantType()) if isinstance(udf_type, VariantType) else col
|
|
208
|
+
for col, udf_type in zip(value_column_list, input_types_to_the_udf[1:])
|
|
209
|
+
]
|
|
210
|
+
|
|
211
|
+
udf_result = _update(struct_as_variant, *variant_value_list)
|
|
212
|
+
|
|
213
|
+
# Cast the VariantType result back to the target StructType
|
|
214
|
+
final_exp = udf_result.cast(final_schema)
|
|
163
215
|
|
|
164
216
|
return [final_name], TypedColumn(final_exp, lambda: typer.type(final_exp))
|
|
@@ -6,7 +6,11 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
|
6
6
|
|
|
7
7
|
from snowflake import snowpark
|
|
8
8
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
9
|
-
from snowflake.snowpark_connect.error.
|
|
9
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
10
|
+
from snowflake.snowpark_connect.error.error_utils import (
|
|
11
|
+
SparkException,
|
|
12
|
+
attach_custom_error_code,
|
|
13
|
+
)
|
|
10
14
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
11
15
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
12
16
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
@@ -29,6 +33,8 @@ SPARK_RANKING_FUNCTIONS = frozenset(
|
|
|
29
33
|
]
|
|
30
34
|
)
|
|
31
35
|
|
|
36
|
+
RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS = frozenset(["percent_rank"])
|
|
37
|
+
|
|
32
38
|
CAPITAL_FUNCTION_NAMES = frozenset(["rank()", "dense_rank()", "percent_rank()"])
|
|
33
39
|
|
|
34
40
|
|
|
@@ -128,6 +134,11 @@ def map_window_function(
|
|
|
128
134
|
case expressions_proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW:
|
|
129
135
|
frame_name.append("ROWS BETWEEN")
|
|
130
136
|
frame_type_func_string = "rows_between"
|
|
137
|
+
if proto_func_name in RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS:
|
|
138
|
+
# Seems like Snowflake and Spark have different understanding of some functions. For those,
|
|
139
|
+
# Spark only allows rows_between while Snowflake only allows range_between. To be compatible
|
|
140
|
+
# with Spark, we have to use range_between here.
|
|
141
|
+
frame_type_func_string = "range_between"
|
|
131
142
|
lower_name, lower = parse_frame_boundary(
|
|
132
143
|
exp.window.frame_spec.lower, is_upper=False
|
|
133
144
|
)
|
|
@@ -138,9 +149,11 @@ def map_window_function(
|
|
|
138
149
|
lower != snowpark.Window.UNBOUNDED_PRECEDING
|
|
139
150
|
or upper != snowpark.Window.CURRENT_ROW
|
|
140
151
|
):
|
|
141
|
-
|
|
152
|
+
exception = SparkException.invalid_ranking_function_window_frame(
|
|
142
153
|
window_frame=f"specifiedwindowframe(RowFrame, {lower_name}, {upper_name})"
|
|
143
154
|
)
|
|
155
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
156
|
+
raise exception
|
|
144
157
|
|
|
145
158
|
is_unbounded = (
|
|
146
159
|
lower == snowpark.Window.UNBOUNDED_PRECEDING
|
|
@@ -165,9 +178,11 @@ def map_window_function(
|
|
|
165
178
|
orders = orders[:1]
|
|
166
179
|
|
|
167
180
|
if proto_func_name in SPARK_RANKING_FUNCTIONS:
|
|
168
|
-
|
|
181
|
+
exception = SparkException.invalid_ranking_function_window_frame(
|
|
169
182
|
window_frame=f"specifiedwindowframe(RangeFrame, {lower_name}, {upper_name})"
|
|
170
183
|
)
|
|
184
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
185
|
+
raise exception
|
|
171
186
|
|
|
172
187
|
is_unbounded = (
|
|
173
188
|
lower == snowpark.Window.UNBOUNDED_PRECEDING
|
snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar}
RENAMED
|
Binary file
|
|
@@ -49,7 +49,7 @@ def main(infile: IO, outfile: IO) -> None:
|
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
spark_connect_session = SparkSession.builder.remote(connect_url).getOrCreate()
|
|
52
|
-
spark_connect_session._client.
|
|
52
|
+
spark_connect_session._client.session_id = session_id # type: ignore[attr-defined]
|
|
53
53
|
|
|
54
54
|
# TODO(SPARK-44460): Pass credentials.
|
|
55
55
|
# TODO(SPARK-44461): Enable Process Isolation
|
snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py
CHANGED
|
@@ -57,7 +57,7 @@ def main(infile: IO, outfile: IO) -> None:
|
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
spark_connect_session = SparkSession.builder.remote(connect_url).getOrCreate()
|
|
60
|
-
spark_connect_session._client.
|
|
60
|
+
spark_connect_session._client.session_id = session_id # type: ignore[attr-defined]
|
|
61
61
|
|
|
62
62
|
# TODO(SPARK-44460): Pass credentials.
|
|
63
63
|
# TODO(SPARK-44461): Enable Process Isolation
|
|
@@ -16,7 +16,7 @@ from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_e
|
|
|
16
16
|
from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1esnowflake_expression_ext.proto\x12\rsnowflake.ext\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\"\
|
|
19
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1esnowflake_expression_ext.proto\x12\rsnowflake.ext\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\"\xde\x01\n\x0c\x45xpExtension\x12@\n\x0enamed_argument\x18\x01 \x01(\x0b\x32&.snowflake.ext.NamedArgumentExpressionH\x00\x12@\n\x13subquery_expression\x18\x02 \x01(\x0b\x32!.snowflake.ext.SubqueryExpressionH\x00\x12\x44\n\x10interval_literal\x18\x03 \x01(\x0b\x32(.snowflake.ext.IntervalLiteralExpressionH\x00\x42\x04\n\x02op\"P\n\x17NamedArgumentExpression\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression\"\xf4\x04\n\x12SubqueryExpression\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x45\n\rsubquery_type\x18\x02 \x01(\x0e\x32..snowflake.ext.SubqueryExpression.SubqueryType\x12Q\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.snowflake.ext.SubqueryExpression.TableArgOptionsH\x00\x88\x01\x01\x12\x35\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x1a\xbb\x01\n\x0fTableArgOptions\x12\x31\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x37\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrder\x12\"\n\x15with_single_partition\x18\x03 \x01(\x08H\x00\x88\x01\x01\x42\x18\n\x16_with_single_partition\"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_options\"\x9f\x01\n\x19IntervalLiteralExpression\x12\x32\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.Literal\x12\x18\n\x0bstart_field\x18\x02 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tend_field\x18\x03 \x01(\x05H\x01\x88\x01\x01\x42\x0e\n\x0c_start_fieldB\x0c\n\n_end_fieldb\x06proto3')
|
|
20
20
|
|
|
21
21
|
_globals = globals()
|
|
22
22
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
@@ -24,13 +24,15 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'snowflake_expression_ext_pb
|
|
|
24
24
|
if _descriptor._USE_C_DESCRIPTORS == False:
|
|
25
25
|
DESCRIPTOR._options = None
|
|
26
26
|
_globals['_EXPEXTENSION']._serialized_start=114
|
|
27
|
-
_globals['_EXPEXTENSION']._serialized_end=
|
|
28
|
-
_globals['_NAMEDARGUMENTEXPRESSION']._serialized_start=
|
|
29
|
-
_globals['_NAMEDARGUMENTEXPRESSION']._serialized_end=
|
|
30
|
-
_globals['_SUBQUERYEXPRESSION']._serialized_start=
|
|
31
|
-
_globals['_SUBQUERYEXPRESSION']._serialized_end=
|
|
32
|
-
_globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_start=
|
|
33
|
-
_globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_end=
|
|
34
|
-
_globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_start=
|
|
35
|
-
_globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_end=
|
|
27
|
+
_globals['_EXPEXTENSION']._serialized_end=336
|
|
28
|
+
_globals['_NAMEDARGUMENTEXPRESSION']._serialized_start=338
|
|
29
|
+
_globals['_NAMEDARGUMENTEXPRESSION']._serialized_end=418
|
|
30
|
+
_globals['_SUBQUERYEXPRESSION']._serialized_start=421
|
|
31
|
+
_globals['_SUBQUERYEXPRESSION']._serialized_end=1049
|
|
32
|
+
_globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_start=693
|
|
33
|
+
_globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_end=880
|
|
34
|
+
_globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_start=883
|
|
35
|
+
_globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_end=1027
|
|
36
|
+
_globals['_INTERVALLITERALEXPRESSION']._serialized_start=1052
|
|
37
|
+
_globals['_INTERVALLITERALEXPRESSION']._serialized_end=1211
|
|
36
38
|
# @@protoc_insertion_point(module_scope)
|
|
@@ -9,12 +9,14 @@ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Map
|
|
|
9
9
|
DESCRIPTOR: _descriptor.FileDescriptor
|
|
10
10
|
|
|
11
11
|
class ExpExtension(_message.Message):
|
|
12
|
-
__slots__ = ("named_argument", "subquery_expression")
|
|
12
|
+
__slots__ = ("named_argument", "subquery_expression", "interval_literal")
|
|
13
13
|
NAMED_ARGUMENT_FIELD_NUMBER: _ClassVar[int]
|
|
14
14
|
SUBQUERY_EXPRESSION_FIELD_NUMBER: _ClassVar[int]
|
|
15
|
+
INTERVAL_LITERAL_FIELD_NUMBER: _ClassVar[int]
|
|
15
16
|
named_argument: NamedArgumentExpression
|
|
16
17
|
subquery_expression: SubqueryExpression
|
|
17
|
-
|
|
18
|
+
interval_literal: IntervalLiteralExpression
|
|
19
|
+
def __init__(self, named_argument: _Optional[_Union[NamedArgumentExpression, _Mapping]] = ..., subquery_expression: _Optional[_Union[SubqueryExpression, _Mapping]] = ..., interval_literal: _Optional[_Union[IntervalLiteralExpression, _Mapping]] = ...) -> None: ...
|
|
18
20
|
|
|
19
21
|
class NamedArgumentExpression(_message.Message):
|
|
20
22
|
__slots__ = ("key", "value")
|
|
@@ -56,3 +58,13 @@ class SubqueryExpression(_message.Message):
|
|
|
56
58
|
table_arg_options: SubqueryExpression.TableArgOptions
|
|
57
59
|
in_subquery_values: _containers.RepeatedCompositeFieldContainer[_expressions_pb2.Expression]
|
|
58
60
|
def __init__(self, input: _Optional[_Union[_relations_pb2.Relation, _Mapping]] = ..., subquery_type: _Optional[_Union[SubqueryExpression.SubqueryType, str]] = ..., table_arg_options: _Optional[_Union[SubqueryExpression.TableArgOptions, _Mapping]] = ..., in_subquery_values: _Optional[_Iterable[_Union[_expressions_pb2.Expression, _Mapping]]] = ...) -> None: ...
|
|
61
|
+
|
|
62
|
+
class IntervalLiteralExpression(_message.Message):
|
|
63
|
+
__slots__ = ("literal", "start_field", "end_field")
|
|
64
|
+
LITERAL_FIELD_NUMBER: _ClassVar[int]
|
|
65
|
+
START_FIELD_FIELD_NUMBER: _ClassVar[int]
|
|
66
|
+
END_FIELD_FIELD_NUMBER: _ClassVar[int]
|
|
67
|
+
literal: _expressions_pb2.Expression.Literal
|
|
68
|
+
start_field: int
|
|
69
|
+
end_field: int
|
|
70
|
+
def __init__(self, literal: _Optional[_Union[_expressions_pb2.Expression.Literal, _Mapping]] = ..., start_field: _Optional[int] = ..., end_field: _Optional[int] = ...) -> None: ...
|
|
@@ -16,7 +16,7 @@ from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_rel
|
|
|
16
16
|
from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1csnowflake_relation_ext.proto\x12\rsnowflake.ext\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto\"\xe3\x02\n\tExtension\x12(\n\x07rdd_map\x18\x01 \x01(\x0b\x32\x15.snowflake.ext.RddMapH\x00\x12.\n\nrdd_reduce\x18\x02 \x01(\x0b\x32\x18.snowflake.ext.RddReduceH\x00\x12G\n\x17subquery_column_aliases\x18\x03 \x01(\x0b\x32$.snowflake.ext.SubqueryColumnAliasesH\x00\x12\x32\n\x0clateral_join\x18\x04 \x01(\x0b\x32\x1a.snowflake.ext.LateralJoinH\x00\x12J\n\x19udtf_with_table_arguments\x18\x05 \x01(\x0b\x32%.snowflake.ext.UDTFWithTableArgumentsH\x00\x12-\n\taggregate\x18\x06 \x01(\x0b\x32\x18.snowflake.ext.AggregateH\x00\x42\x04\n\x02op\">\n\x06RddMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"A\n\tRddReduce\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"P\n\x15SubqueryColumnAliases\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0f\n\x07\x61liases\x18\x02 \x03(\t\"\\\n\x0bLateralJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\"\x98\x01\n\x16UDTFWithTableArguments\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12,\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x39\n\x0ftable_arguments\x18\x03 \x03(\x0b\x32 .snowflake.ext.TableArgumentInfo\"`\n\x11TableArgumentInfo\x12/\n\x0etable_argument\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x1a\n\x12table_argument_idx\x18\x02 \x01(\x05\"\
|
|
19
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1csnowflake_relation_ext.proto\x12\rsnowflake.ext\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto\"\xe3\x02\n\tExtension\x12(\n\x07rdd_map\x18\x01 \x01(\x0b\x32\x15.snowflake.ext.RddMapH\x00\x12.\n\nrdd_reduce\x18\x02 \x01(\x0b\x32\x18.snowflake.ext.RddReduceH\x00\x12G\n\x17subquery_column_aliases\x18\x03 \x01(\x0b\x32$.snowflake.ext.SubqueryColumnAliasesH\x00\x12\x32\n\x0clateral_join\x18\x04 \x01(\x0b\x32\x1a.snowflake.ext.LateralJoinH\x00\x12J\n\x19udtf_with_table_arguments\x18\x05 \x01(\x0b\x32%.snowflake.ext.UDTFWithTableArgumentsH\x00\x12-\n\taggregate\x18\x06 \x01(\x0b\x32\x18.snowflake.ext.AggregateH\x00\x42\x04\n\x02op\">\n\x06RddMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"A\n\tRddReduce\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"P\n\x15SubqueryColumnAliases\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0f\n\x07\x61liases\x18\x02 \x03(\t\"\\\n\x0bLateralJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\"\x98\x01\n\x16UDTFWithTableArguments\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12,\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x39\n\x0ftable_arguments\x18\x03 \x03(\x0b\x32 .snowflake.ext.TableArgumentInfo\"`\n\x11TableArgumentInfo\x12/\n\x0etable_argument\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x1a\n\x12table_argument_idx\x18\x02 \x01(\x05\"\xbf\x06\n\tAggregate\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x36\n\ngroup_type\x18\x02 \x01(\x0e\x32\".snowflake.ext.Aggregate.GroupType\x12\x37\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x38\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12-\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.snowflake.ext.Aggregate.Pivot\x12<\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.snowflake.ext.Aggregate.GroupingSets\x12\x33\n\x10having_condition\x18\x07 \x01(\x0b\x32\x19.spark.connect.Expression\x1a\xd9\x01\n\x05Pivot\x12\x30\n\rpivot_columns\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\x12?\n\x0cpivot_values\x18\x02 \x03(\x0b\x32).snowflake.ext.Aggregate.Pivot.PivotValue\x1a]\n\nPivotValue\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32!.spark.connect.Expression.Literal\x12\x12\n\x05\x61lias\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_alias\x1a?\n\x0cGroupingSets\x12/\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05\x62\x06proto3')
|
|
20
20
|
|
|
21
21
|
_globals = globals()
|
|
22
22
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
@@ -38,11 +38,13 @@ if _descriptor._USE_C_DESCRIPTORS == False:
|
|
|
38
38
|
_globals['_TABLEARGUMENTINFO']._serialized_start=931
|
|
39
39
|
_globals['_TABLEARGUMENTINFO']._serialized_end=1027
|
|
40
40
|
_globals['_AGGREGATE']._serialized_start=1030
|
|
41
|
-
_globals['_AGGREGATE']._serialized_end=
|
|
42
|
-
_globals['_AGGREGATE_PIVOT']._serialized_start=
|
|
43
|
-
_globals['_AGGREGATE_PIVOT']._serialized_end=
|
|
44
|
-
_globals['
|
|
45
|
-
_globals['
|
|
46
|
-
_globals['
|
|
47
|
-
_globals['
|
|
41
|
+
_globals['_AGGREGATE']._serialized_end=1861
|
|
42
|
+
_globals['_AGGREGATE_PIVOT']._serialized_start=1417
|
|
43
|
+
_globals['_AGGREGATE_PIVOT']._serialized_end=1634
|
|
44
|
+
_globals['_AGGREGATE_PIVOT_PIVOTVALUE']._serialized_start=1541
|
|
45
|
+
_globals['_AGGREGATE_PIVOT_PIVOTVALUE']._serialized_end=1634
|
|
46
|
+
_globals['_AGGREGATE_GROUPINGSETS']._serialized_start=1636
|
|
47
|
+
_globals['_AGGREGATE_GROUPINGSETS']._serialized_end=1699
|
|
48
|
+
_globals['_AGGREGATE_GROUPTYPE']._serialized_start=1702
|
|
49
|
+
_globals['_AGGREGATE_GROUPTYPE']._serialized_end=1861
|
|
48
50
|
# @@protoc_insertion_point(module_scope)
|
|
@@ -91,12 +91,19 @@ class Aggregate(_message.Message):
|
|
|
91
91
|
GROUP_TYPE_PIVOT: Aggregate.GroupType
|
|
92
92
|
GROUP_TYPE_GROUPING_SETS: Aggregate.GroupType
|
|
93
93
|
class Pivot(_message.Message):
|
|
94
|
-
__slots__ = ("
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
94
|
+
__slots__ = ("pivot_columns", "pivot_values")
|
|
95
|
+
class PivotValue(_message.Message):
|
|
96
|
+
__slots__ = ("values", "alias")
|
|
97
|
+
VALUES_FIELD_NUMBER: _ClassVar[int]
|
|
98
|
+
ALIAS_FIELD_NUMBER: _ClassVar[int]
|
|
99
|
+
values: _containers.RepeatedCompositeFieldContainer[_expressions_pb2.Expression.Literal]
|
|
100
|
+
alias: str
|
|
101
|
+
def __init__(self, values: _Optional[_Iterable[_Union[_expressions_pb2.Expression.Literal, _Mapping]]] = ..., alias: _Optional[str] = ...) -> None: ...
|
|
102
|
+
PIVOT_COLUMNS_FIELD_NUMBER: _ClassVar[int]
|
|
103
|
+
PIVOT_VALUES_FIELD_NUMBER: _ClassVar[int]
|
|
104
|
+
pivot_columns: _containers.RepeatedCompositeFieldContainer[_expressions_pb2.Expression]
|
|
105
|
+
pivot_values: _containers.RepeatedCompositeFieldContainer[Aggregate.Pivot.PivotValue]
|
|
106
|
+
def __init__(self, pivot_columns: _Optional[_Iterable[_Union[_expressions_pb2.Expression, _Mapping]]] = ..., pivot_values: _Optional[_Iterable[_Union[Aggregate.Pivot.PivotValue, _Mapping]]] = ...) -> None: ...
|
|
100
107
|
class GroupingSets(_message.Message):
|
|
101
108
|
__slots__ = ("grouping_set",)
|
|
102
109
|
GROUPING_SET_FIELD_NUMBER: _ClassVar[int]
|