snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +717 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +309 -26
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/error_utils.py +28 -0
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +224 -15
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +86 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
- snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
- snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +171 -48
- snowflake/snowpark_connect/server.py +528 -473
- snowflake/snowpark_connect/server_common/__init__.py +503 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/type_support.py +130 -0
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +195 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +192 -40
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -4,14 +4,40 @@
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
+
from dataclasses import dataclass
|
|
7
8
|
from typing import TYPE_CHECKING, Callable
|
|
8
9
|
|
|
9
10
|
from snowflake import snowpark
|
|
10
11
|
from snowflake.snowpark.types import StructField, StructType
|
|
11
|
-
from snowflake.snowpark_connect.
|
|
12
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
15
|
+
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
16
|
+
|
|
14
17
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
18
|
+
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class AggregateMetadata:
|
|
23
|
+
"""
|
|
24
|
+
Metadata about aggregation for resolving expressions in ORDER BY.
|
|
25
|
+
|
|
26
|
+
When a Sort operation follows an Aggregate operation, ORDER BY expressions
|
|
27
|
+
may reference:
|
|
28
|
+
1. Grouping columns from the GROUP BY clause
|
|
29
|
+
2. Aggregate result columns (aliases)
|
|
30
|
+
3. Expressions on pre-aggregation columns (e.g., year(date) where date existed before GROUP BY)
|
|
31
|
+
|
|
32
|
+
This metadata enables hybrid resolution similar to HAVING clause.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
input_column_map: ColumnNameMap
|
|
36
|
+
input_dataframe: snowpark.DataFrame
|
|
37
|
+
grouping_expressions: list[expressions_proto.Expression]
|
|
38
|
+
aggregate_expressions: list[expressions_proto.Expression]
|
|
39
|
+
spark_columns: list[str]
|
|
40
|
+
raw_aggregations: list[tuple[str, TypedColumn]]
|
|
15
41
|
|
|
16
42
|
|
|
17
43
|
class DataFrameContainer:
|
|
@@ -30,6 +56,9 @@ class DataFrameContainer:
|
|
|
30
56
|
alias: str | None = None,
|
|
31
57
|
cached_schema_getter: Callable[[], StructType] | None = None,
|
|
32
58
|
partition_hint: int | None = None,
|
|
59
|
+
can_be_cached: bool = True,
|
|
60
|
+
can_be_materialized: bool = True,
|
|
61
|
+
aggregate_metadata: AggregateMetadata | None = None,
|
|
33
62
|
) -> None:
|
|
34
63
|
"""
|
|
35
64
|
Initialize a new DataFrameContainer.
|
|
@@ -41,12 +70,16 @@ class DataFrameContainer:
|
|
|
41
70
|
alias: Optional alias for the DataFrame
|
|
42
71
|
cached_schema_getter: Optional function to get cached schema
|
|
43
72
|
partition_hint: Optional partition count from repartition() operations
|
|
73
|
+
aggregate_metadata: Optional metadata about aggregation for ORDER BY resolution
|
|
44
74
|
"""
|
|
45
75
|
self._dataframe = dataframe
|
|
46
76
|
self._column_map = self._create_default_column_map(column_map)
|
|
47
77
|
self._table_name = table_name
|
|
48
78
|
self._alias = alias
|
|
49
79
|
self._partition_hint = partition_hint
|
|
80
|
+
self._can_be_cached = can_be_cached
|
|
81
|
+
self._can_be_materialized = can_be_materialized
|
|
82
|
+
self._aggregate_metadata = aggregate_metadata
|
|
50
83
|
|
|
51
84
|
if cached_schema_getter is not None:
|
|
52
85
|
self._apply_cached_schema_getter(cached_schema_getter)
|
|
@@ -59,13 +92,16 @@ class DataFrameContainer:
|
|
|
59
92
|
snowpark_column_names: list[str],
|
|
60
93
|
snowpark_column_types: list | None = None,
|
|
61
94
|
column_metadata: dict | None = None,
|
|
62
|
-
column_qualifiers: list[
|
|
95
|
+
column_qualifiers: list[set[ColumnQualifier]] | None = None,
|
|
63
96
|
parent_column_name_map: ColumnNameMap | None = None,
|
|
64
|
-
hidden_columns: set[HiddenColumn] | None = None,
|
|
65
97
|
table_name: str | None = None,
|
|
66
98
|
alias: str | None = None,
|
|
67
99
|
cached_schema_getter: Callable[[], StructType] | None = None,
|
|
68
100
|
partition_hint: int | None = None,
|
|
101
|
+
equivalent_snowpark_names: list[set[str]] | None = None,
|
|
102
|
+
column_is_hidden: list[bool] | None = None,
|
|
103
|
+
can_be_cached: bool = True,
|
|
104
|
+
aggregate_metadata: AggregateMetadata | None = None,
|
|
69
105
|
) -> DataFrameContainer:
|
|
70
106
|
"""
|
|
71
107
|
Create a new container with complete column mapping configuration.
|
|
@@ -78,11 +114,14 @@ class DataFrameContainer:
|
|
|
78
114
|
column_metadata: Optional metadata dictionary
|
|
79
115
|
column_qualifiers: Optional column qualifiers
|
|
80
116
|
parent_column_name_map: Optional parent column name map
|
|
81
|
-
hidden_columns: Optional list of hidden column names
|
|
82
117
|
table_name: Optional table name
|
|
83
118
|
alias: Optional alias
|
|
84
119
|
cached_schema_getter: Optional function to get cached schema
|
|
85
120
|
partition_hint: Optional partition count from repartition() operations
|
|
121
|
+
equivalent_snowpark_names: list of sets with old snowpark names that can be resolved with an existing column
|
|
122
|
+
column_is_hidden: Optional list of booleans indicating whether each column is hidden
|
|
123
|
+
can_be_cached: Optional boolean indicating if the dataframe can be cached
|
|
124
|
+
aggregate_metadata: Optional metadata about aggregation for ORDER BY resolution
|
|
86
125
|
|
|
87
126
|
Returns:
|
|
88
127
|
A new DataFrameContainer instance
|
|
@@ -101,7 +140,8 @@ class DataFrameContainer:
|
|
|
101
140
|
column_metadata,
|
|
102
141
|
column_qualifiers,
|
|
103
142
|
parent_column_name_map,
|
|
104
|
-
|
|
143
|
+
equivalent_snowpark_names,
|
|
144
|
+
column_is_hidden,
|
|
105
145
|
)
|
|
106
146
|
|
|
107
147
|
# Determine the schema getter to use
|
|
@@ -129,8 +169,25 @@ class DataFrameContainer:
|
|
|
129
169
|
alias=alias,
|
|
130
170
|
cached_schema_getter=final_schema_getter,
|
|
131
171
|
partition_hint=partition_hint,
|
|
172
|
+
can_be_cached=can_be_cached,
|
|
173
|
+
aggregate_metadata=aggregate_metadata,
|
|
132
174
|
)
|
|
133
175
|
|
|
176
|
+
@property
|
|
177
|
+
def can_be_cached(self) -> bool:
|
|
178
|
+
"""Indicate if the DataFrame can be cached in df_cache"""
|
|
179
|
+
return self._can_be_cached
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def can_be_materialized(self) -> bool:
|
|
183
|
+
"""Indicate if the DataFrame can be materialized in df_cache"""
|
|
184
|
+
return self._can_be_materialized
|
|
185
|
+
|
|
186
|
+
def without_materialization(self):
|
|
187
|
+
"""Prevent the DataFrame from being materialized in df_cache"""
|
|
188
|
+
self._can_be_materialized = False
|
|
189
|
+
return self
|
|
190
|
+
|
|
134
191
|
@property
|
|
135
192
|
def dataframe(self) -> snowpark.DataFrame:
|
|
136
193
|
"""Get the underlying Snowpark DataFrame."""
|
|
@@ -224,9 +281,10 @@ class DataFrameContainer:
|
|
|
224
281
|
spark_column_names: list[str],
|
|
225
282
|
snowpark_column_names: list[str],
|
|
226
283
|
column_metadata: dict | None = None,
|
|
227
|
-
column_qualifiers: list[
|
|
284
|
+
column_qualifiers: list[set[ColumnQualifier]] | None = None,
|
|
228
285
|
parent_column_name_map: ColumnNameMap | None = None,
|
|
229
|
-
|
|
286
|
+
equivalent_snowpark_names: list[set[str]] | None = None,
|
|
287
|
+
column_is_hidden: list[bool] | None = None,
|
|
230
288
|
) -> ColumnNameMap:
|
|
231
289
|
"""Create a ColumnNameMap with the provided configuration."""
|
|
232
290
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
@@ -237,7 +295,8 @@ class DataFrameContainer:
|
|
|
237
295
|
column_metadata=column_metadata,
|
|
238
296
|
column_qualifiers=column_qualifiers,
|
|
239
297
|
parent_column_name_map=parent_column_name_map,
|
|
240
|
-
|
|
298
|
+
equivalent_snowpark_names=equivalent_snowpark_names,
|
|
299
|
+
column_is_hidden=column_is_hidden,
|
|
241
300
|
)
|
|
242
301
|
|
|
243
302
|
@staticmethod
|
|
@@ -262,3 +321,38 @@ class DataFrameContainer:
|
|
|
262
321
|
)
|
|
263
322
|
]
|
|
264
323
|
)
|
|
324
|
+
|
|
325
|
+
def without_hidden_columns(self) -> DataFrameContainer:
|
|
326
|
+
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
327
|
+
|
|
328
|
+
if not any(c.is_hidden for c in self._column_map.columns):
|
|
329
|
+
return self
|
|
330
|
+
|
|
331
|
+
hidden_column_names = [
|
|
332
|
+
c.snowpark_name for c in self._column_map.columns if c.is_hidden
|
|
333
|
+
]
|
|
334
|
+
visible_columns = [c for c in self._column_map.columns if not c.is_hidden]
|
|
335
|
+
|
|
336
|
+
filtered_df = self._dataframe.drop(hidden_column_names)
|
|
337
|
+
filtered_column_map = ColumnNameMap(
|
|
338
|
+
spark_column_names=[c.spark_name for c in visible_columns],
|
|
339
|
+
snowpark_column_names=[c.snowpark_name for c in visible_columns],
|
|
340
|
+
column_metadata=self._column_map.column_metadata,
|
|
341
|
+
column_qualifiers=[c.qualifiers for c in visible_columns],
|
|
342
|
+
parent_column_name_map=self._column_map._parent_column_name_map,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
return DataFrameContainer(
|
|
346
|
+
dataframe=filtered_df,
|
|
347
|
+
column_map=filtered_column_map,
|
|
348
|
+
table_name=self._table_name,
|
|
349
|
+
alias=self._alias,
|
|
350
|
+
cached_schema_getter=lambda: StructType(
|
|
351
|
+
[
|
|
352
|
+
field
|
|
353
|
+
for field in self._dataframe.schema.fields
|
|
354
|
+
if field.name not in hidden_column_names
|
|
355
|
+
]
|
|
356
|
+
),
|
|
357
|
+
partition_hint=self._partition_hint,
|
|
358
|
+
)
|
|
@@ -24,6 +24,8 @@
|
|
|
24
24
|
from pyspark.errors.exceptions.base import DateTimeException
|
|
25
25
|
|
|
26
26
|
from snowflake.snowpark.types import DataType, StringType
|
|
27
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
28
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
# TODO: There are more patterns where spark may throw an error.
|
|
@@ -229,9 +231,11 @@ def convert_spark_format_to_snowflake(
|
|
|
229
231
|
timestamp_input_type: DataType | None = None,
|
|
230
232
|
):
|
|
231
233
|
if spark_format in {"Y", "w", "W"}:
|
|
232
|
-
|
|
234
|
+
exception = DateTimeException(
|
|
233
235
|
f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter."
|
|
234
236
|
)
|
|
237
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
238
|
+
raise exception
|
|
235
239
|
snowflake_format = ""
|
|
236
240
|
i = 0
|
|
237
241
|
n = len(spark_format)
|
|
@@ -299,28 +303,46 @@ def convert_spark_format_to_snowflake(
|
|
|
299
303
|
# Spark's 'a' would be at most 1 times
|
|
300
304
|
is_valid_a_pattern = spark_format[i : i + 2] != char * 2
|
|
301
305
|
if not is_valid_a_pattern:
|
|
302
|
-
|
|
306
|
+
exception = DateTimeException(
|
|
303
307
|
f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
|
|
304
308
|
)
|
|
309
|
+
attach_custom_error_code(
|
|
310
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
311
|
+
)
|
|
312
|
+
raise exception
|
|
305
313
|
case "h" | "K" | "k" | "H" | "m" | "s" | "d":
|
|
306
314
|
# Spark's characters would be at most 2 times
|
|
307
315
|
is_valid_2_patterns = spark_format[i : i + 3] != char * 3
|
|
308
316
|
if not is_valid_2_patterns:
|
|
309
|
-
|
|
317
|
+
exception = DateTimeException(
|
|
310
318
|
f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
|
|
311
319
|
)
|
|
320
|
+
attach_custom_error_code(
|
|
321
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
322
|
+
)
|
|
323
|
+
raise exception
|
|
312
324
|
case "D":
|
|
313
325
|
# Spark's 'D'' would be at most 3 times
|
|
314
326
|
is_valid_D_patterns = spark_format[i : i + 4] != char * 4
|
|
315
327
|
if not is_valid_D_patterns:
|
|
316
|
-
|
|
328
|
+
exception = DateTimeException(
|
|
317
329
|
f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
|
|
318
330
|
)
|
|
331
|
+
attach_custom_error_code(
|
|
332
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
333
|
+
)
|
|
334
|
+
raise exception
|
|
319
335
|
case "V":
|
|
320
336
|
# Spark's 'V' for Zone ID requires 'VV'. A single 'V' is invalid.
|
|
321
337
|
is_valid_vv_pattern = spark_format[i : i + 2] == "VV"
|
|
322
338
|
if not is_valid_vv_pattern:
|
|
323
|
-
|
|
339
|
+
exception = DateTimeException(
|
|
340
|
+
"Pattern letter count must be 2: V"
|
|
341
|
+
)
|
|
342
|
+
attach_custom_error_code(
|
|
343
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
344
|
+
)
|
|
345
|
+
raise exception
|
|
324
346
|
case "O":
|
|
325
347
|
# Spark's 'O' would be either 1 or 4.
|
|
326
348
|
is_valid_o_or_oooo_pattern = spark_format[i : i + 2] != "OO" or (
|
|
@@ -328,28 +350,52 @@ def convert_spark_format_to_snowflake(
|
|
|
328
350
|
and spark_format[i : i + 5] != "OOOOO"
|
|
329
351
|
)
|
|
330
352
|
if not is_valid_o_or_oooo_pattern:
|
|
331
|
-
|
|
353
|
+
exception = DateTimeException(
|
|
332
354
|
"Pattern letter count must be 1 or 4: O"
|
|
333
355
|
)
|
|
356
|
+
attach_custom_error_code(
|
|
357
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
358
|
+
)
|
|
359
|
+
raise exception
|
|
334
360
|
case "q" | "Q" | "z" | "E":
|
|
335
361
|
# Spark's characters would be at most 4 times
|
|
336
362
|
is_valid_4_patterns = spark_format[i : i + 5] != char * 5
|
|
337
363
|
if not is_valid_4_patterns:
|
|
338
|
-
|
|
364
|
+
exception = DateTimeException(
|
|
365
|
+
f"Too many pattern letters: {char}"
|
|
366
|
+
)
|
|
367
|
+
attach_custom_error_code(
|
|
368
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
369
|
+
)
|
|
370
|
+
raise exception
|
|
339
371
|
case "x" | "X" | "Z":
|
|
340
372
|
# Spark's 'x' or 'X' or 'z' or 'Z' would be at most 5 times
|
|
341
373
|
is_valid_xz_pattern = spark_format[i : i + 6] != char * 6
|
|
342
374
|
if not is_valid_xz_pattern:
|
|
343
|
-
|
|
375
|
+
exception = DateTimeException(
|
|
376
|
+
f"Too many pattern letters: {char}"
|
|
377
|
+
)
|
|
378
|
+
attach_custom_error_code(
|
|
379
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
380
|
+
)
|
|
381
|
+
raise exception
|
|
344
382
|
case "y":
|
|
345
383
|
# Spark's 'y' would be at most 6 times
|
|
346
384
|
is_valid_y_pattern = spark_format[i : i + 7] != char * 7
|
|
347
385
|
if not is_valid_y_pattern:
|
|
348
|
-
|
|
386
|
+
exception = DateTimeException(
|
|
349
387
|
f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
|
|
350
388
|
)
|
|
389
|
+
attach_custom_error_code(
|
|
390
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
391
|
+
)
|
|
392
|
+
raise exception
|
|
351
393
|
case "C" | "I":
|
|
352
|
-
|
|
394
|
+
exception = DateTimeException(f"Unknown pattern letter: {char}")
|
|
395
|
+
attach_custom_error_code(
|
|
396
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
397
|
+
)
|
|
398
|
+
raise exception
|
|
353
399
|
|
|
354
400
|
if (
|
|
355
401
|
spark_format[i : i + 5] in {"M" * 5, "L" * 5}
|
|
@@ -362,9 +408,13 @@ def convert_spark_format_to_snowflake(
|
|
|
362
408
|
or spark_format[i : i + 3] in {"kkk", "KKK"}
|
|
363
409
|
or spark_format[i : i + 10] == "SSSSSSSSSS"
|
|
364
410
|
):
|
|
365
|
-
|
|
411
|
+
exception = DateTimeException(
|
|
366
412
|
f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter."
|
|
367
413
|
)
|
|
414
|
+
attach_custom_error_code(
|
|
415
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
416
|
+
)
|
|
417
|
+
raise exception
|
|
368
418
|
|
|
369
419
|
matched_pattern = False
|
|
370
420
|
|
|
@@ -375,7 +425,11 @@ def convert_spark_format_to_snowflake(
|
|
|
375
425
|
spark_key
|
|
376
426
|
]
|
|
377
427
|
if isinstance(snowflake_equivalent, _UnsupportedSparkFormatPattern):
|
|
378
|
-
|
|
428
|
+
exception = DateTimeException(snowflake_equivalent.message)
|
|
429
|
+
attach_custom_error_code(
|
|
430
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
431
|
+
)
|
|
432
|
+
raise exception
|
|
379
433
|
if snowflake_equivalent is not None:
|
|
380
434
|
snowflake_format += snowflake_equivalent
|
|
381
435
|
i += len(spark_key)
|
|
@@ -389,7 +443,11 @@ def convert_spark_format_to_snowflake(
|
|
|
389
443
|
isinstance(timestamp_input_type, StringType)
|
|
390
444
|
and char not in snowflake_time_format_separator
|
|
391
445
|
):
|
|
392
|
-
|
|
446
|
+
exception = DateTimeException(f"Illegal pattern character: {char}")
|
|
447
|
+
attach_custom_error_code(
|
|
448
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
449
|
+
)
|
|
450
|
+
raise exception
|
|
393
451
|
|
|
394
452
|
snowflake_format += f'"{char}"'
|
|
395
453
|
i += 1
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Error code constants for Snowpark Connect.
|
|
7
|
+
|
|
8
|
+
This module defines custom error codes that can be attached to exceptions
|
|
9
|
+
and included in gRPC error responses.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ErrorCodes:
|
|
14
|
+
"""Constants for Snowpark Connect custom error codes."""
|
|
15
|
+
|
|
16
|
+
# 1000-1999: Startup related errors
|
|
17
|
+
MISSING_DATABASE = 1001
|
|
18
|
+
MISSING_SCHEMA = 1002
|
|
19
|
+
RESOURCE_INITIALIZATION_FAILED = 1003
|
|
20
|
+
TCP_PORT_ALREADY_IN_USE = 1004
|
|
21
|
+
INVALID_SPARK_CONNECT_URL = 1005
|
|
22
|
+
INVALID_STARTUP_INPUT = 1006
|
|
23
|
+
INVALID_STARTUP_OPERATION = 1007
|
|
24
|
+
STARTUP_CONNECTION_FAILED = 1008
|
|
25
|
+
|
|
26
|
+
# 2000-2999: Configuration related errors
|
|
27
|
+
INVALID_CONFIG_VALUE = 2001
|
|
28
|
+
CONFIG_CHANGE_NOT_ALLOWED = 2002
|
|
29
|
+
CONFIG_NOT_ENABLED = 2003
|
|
30
|
+
|
|
31
|
+
# 3000-3999: User code errors
|
|
32
|
+
INVALID_SQL_SYNTAX = 3001
|
|
33
|
+
TYPE_MISMATCH = 3002
|
|
34
|
+
INVALID_CAST = 3003
|
|
35
|
+
INVALID_FUNCTION_ARGUMENT = 3004
|
|
36
|
+
ARRAY_INDEX_OUT_OF_BOUNDS = 3005
|
|
37
|
+
DIVISION_BY_ZERO = 3006
|
|
38
|
+
INVALID_INPUT = 3007
|
|
39
|
+
INVALID_OPERATION = 3008
|
|
40
|
+
INSUFFICIENT_INPUT = 3009
|
|
41
|
+
|
|
42
|
+
# 4000-4999: What we don't support
|
|
43
|
+
UNSUPPORTED_OPERATION = 4001
|
|
44
|
+
UNSUPPORTED_TYPE = 4002
|
|
45
|
+
|
|
46
|
+
# 5000-5999: Internal errors
|
|
47
|
+
INTERNAL_ERROR = 5001
|
|
48
|
+
TABLE_NOT_FOUND = 5002
|
|
49
|
+
COLUMN_NOT_FOUND = 5003
|
|
50
|
+
AMBIGUOUS_COLUMN_NAME = 5004
|