snowpark-connect 0.28.1__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client.py +65 -0
- snowflake/snowpark_connect/column_name_handler.py +6 -0
- snowflake/snowpark_connect/config.py +33 -5
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +21 -19
- snowflake/snowpark_connect/expression/map_extension.py +277 -1
- snowflake/snowpark_connect/expression/map_sql_expression.py +107 -2
- snowflake/snowpark_connect/expression/map_unresolved_function.py +425 -269
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/relation/io_utils.py +21 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +9 -4
- snowflake/snowpark_connect/relation/map_extension.py +21 -4
- snowflake/snowpark_connect/relation/map_join.py +8 -0
- snowflake/snowpark_connect/relation/map_map_partitions.py +7 -8
- snowflake/snowpark_connect/relation/map_relation.py +1 -3
- snowflake/snowpark_connect/relation/map_row_ops.py +116 -15
- snowflake/snowpark_connect/relation/map_show_string.py +14 -6
- snowflake/snowpark_connect/relation/map_sql.py +39 -5
- snowflake/snowpark_connect/relation/map_stats.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read.py +22 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +119 -29
- snowflake/snowpark_connect/relation/read/map_read_json.py +57 -36
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -1
- snowflake/snowpark_connect/relation/read/map_read_text.py +6 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +159 -0
- snowflake/snowpark_connect/relation/stage_locator.py +85 -53
- snowflake/snowpark_connect/relation/write/map_write.py +67 -4
- snowflake/snowpark_connect/server.py +29 -16
- snowflake/snowpark_connect/type_mapping.py +75 -3
- snowflake/snowpark_connect/utils/context.py +0 -14
- snowflake/snowpark_connect/utils/describe_query_cache.py +6 -3
- snowflake/snowpark_connect/utils/io_utils.py +36 -0
- snowflake/snowpark_connect/utils/session.py +4 -0
- snowflake/snowpark_connect/utils/telemetry.py +30 -5
- snowflake/snowpark_connect/utils/udf_cache.py +37 -7
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.30.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.30.0.dist-info}/RECORD +47 -45
- {snowpark_connect-0.28.1.data → snowpark_connect-0.30.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.28.1.data → snowpark_connect-0.30.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.28.1.data → snowpark_connect-0.30.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.30.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.30.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.30.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.30.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.30.0.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,9 @@
|
|
|
5
5
|
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
6
6
|
|
|
7
7
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
8
|
+
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
9
|
+
filter_metadata_columns,
|
|
10
|
+
)
|
|
8
11
|
from snowflake.snowpark_connect.type_mapping import (
|
|
9
12
|
SNOWPARK_TYPE_NAME_TO_PYSPARK_TYPE_NAME,
|
|
10
13
|
)
|
|
@@ -16,12 +19,13 @@ def map_tree_string(
|
|
|
16
19
|
# TODO: tracking the difference with pyspark in SNOW-1853347
|
|
17
20
|
tree_string = request.tree_string
|
|
18
21
|
snowpark_df_container = map_relation(tree_string.plan.root)
|
|
19
|
-
|
|
20
|
-
|
|
22
|
+
filtered_container = filter_metadata_columns(snowpark_df_container)
|
|
23
|
+
display_df = filtered_container.dataframe
|
|
24
|
+
filtered_column_mapping = filtered_container.column_map.snowpark_to_spark_map()
|
|
21
25
|
|
|
22
|
-
snowpark_tree_string =
|
|
26
|
+
snowpark_tree_string = display_df._format_schema(
|
|
23
27
|
level=tree_string.level if tree_string.HasField("level") else None,
|
|
24
|
-
translate_columns=
|
|
28
|
+
translate_columns=filtered_column_mapping,
|
|
25
29
|
translate_types=SNOWPARK_TYPE_NAME_TO_PYSPARK_TYPE_NAME,
|
|
26
30
|
)
|
|
27
31
|
# workaround for the capitalization of nullable boolean value.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import zlib
|
|
6
|
+
|
|
7
|
+
from pyspark.sql import DataFrame, SparkSession
|
|
8
|
+
|
|
9
|
+
SQL_PASS_THROUGH_MARKER = "PRIVATE-SNOWFLAKE-SQL"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def calculate_checksum(data: str) -> str:
|
|
13
|
+
checksum = zlib.crc32(data.encode("utf-8"))
|
|
14
|
+
return format(checksum, "08X")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SnowflakeSession:
|
|
18
|
+
"""
|
|
19
|
+
Provides a wrapper around SparkSession to enable Snowflake SQL pass-through functionality.
|
|
20
|
+
Also provides helper methods to switch to different database, schema, role, warehouse, etc.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, spark_session: SparkSession) -> None:
|
|
24
|
+
self.spark_session = spark_session
|
|
25
|
+
|
|
26
|
+
def sql(self, sql_stmt: str) -> DataFrame:
|
|
27
|
+
"""
|
|
28
|
+
Execute Snowflake specific SQL directly against Snowflake.
|
|
29
|
+
"""
|
|
30
|
+
checksum = calculate_checksum(sql_stmt)
|
|
31
|
+
return self.spark_session.sql(
|
|
32
|
+
f"{SQL_PASS_THROUGH_MARKER} {checksum} {sql_stmt}"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def use_database(self, database: str, preserve_case: bool = False) -> DataFrame:
|
|
36
|
+
"""
|
|
37
|
+
Switch to the database specified by `database`.
|
|
38
|
+
"""
|
|
39
|
+
if preserve_case:
|
|
40
|
+
database = f'"{database}"'
|
|
41
|
+
return self.sql(f"USE DATABASE {database}")
|
|
42
|
+
|
|
43
|
+
def use_schema(self, schema: str, preserve_case: bool = False) -> DataFrame:
|
|
44
|
+
"""
|
|
45
|
+
Switch to the schema specified by `schema`.
|
|
46
|
+
"""
|
|
47
|
+
if preserve_case:
|
|
48
|
+
schema = f'"{schema}"'
|
|
49
|
+
return self.sql(f"USE SCHEMA {schema}")
|
|
50
|
+
|
|
51
|
+
def use_role(self, role: str, preserve_case: bool = False) -> DataFrame:
|
|
52
|
+
"""
|
|
53
|
+
Switch to the role specified by `role`.
|
|
54
|
+
"""
|
|
55
|
+
if preserve_case:
|
|
56
|
+
role = f'"{role}"'
|
|
57
|
+
return self.sql(f"USE ROLE {role}")
|
|
58
|
+
|
|
59
|
+
def use_warehouse(self, warehouse: str, preserve_case: bool = False) -> DataFrame:
|
|
60
|
+
"""
|
|
61
|
+
Switch to the warehouse specified by `warehouse`.
|
|
62
|
+
"""
|
|
63
|
+
if preserve_case:
|
|
64
|
+
warehouse = f'"{warehouse}"'
|
|
65
|
+
return self.sql(f"USE WAREHOUSE {warehouse}")
|
|
@@ -75,7 +75,13 @@ def make_column_names_snowpark_compatible(
|
|
|
75
75
|
In this case the function call should be `make_column_names_snowpark_compatible(['a', 'b'], 5, 2)`,
|
|
76
76
|
to avoid naming conflicts between the new columns and the old columns.
|
|
77
77
|
"""
|
|
78
|
+
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
79
|
+
METADATA_FILENAME_COLUMN,
|
|
80
|
+
)
|
|
81
|
+
|
|
78
82
|
return [
|
|
83
|
+
# Skip METADATA$FILENAME - preserve original name without quoting
|
|
84
|
+
name if name == METADATA_FILENAME_COLUMN else
|
|
79
85
|
# Use `-` in the name to force df.column to return double-quoted names
|
|
80
86
|
quote_name(f"{unquote_if_quoted(name)}-{plan_id:08x}-{i + offset}")
|
|
81
87
|
for i, name in enumerate(names)
|
|
@@ -263,17 +263,24 @@ SESSION_CONFIG_KEY_WHITELIST = {
|
|
|
263
263
|
"snowpark.connect.sql.identifiers.auto-uppercase",
|
|
264
264
|
"snowpark.connect.udtf.compatibility_mode",
|
|
265
265
|
"snowpark.connect.views.duplicate_column_names_handling_mode",
|
|
266
|
-
"enable_snowflake_extension_behavior",
|
|
266
|
+
"snowpark.connect.enable_snowflake_extension_behavior",
|
|
267
|
+
"spark.hadoop.fs.s3a.server-side-encryption.key",
|
|
268
|
+
"spark.hadoop.fs.s3a.assumed.role.arn",
|
|
269
|
+
"snowpark.connect.describe_cache_ttl_seconds",
|
|
267
270
|
}
|
|
268
|
-
|
|
271
|
+
AZURE_ACCOUNT_KEY = re.compile(
|
|
269
272
|
r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
|
|
270
273
|
)
|
|
274
|
+
AZURE_SAS_KEY = re.compile(
|
|
275
|
+
r"^fs\.azure\.sas\.fixed\.token\.[^\.]+\.dfs\.core\.windows\.net$"
|
|
276
|
+
)
|
|
271
277
|
|
|
272
278
|
|
|
273
279
|
def valid_session_config_key(key: str):
|
|
274
280
|
return (
|
|
275
281
|
key in SESSION_CONFIG_KEY_WHITELIST # AWS session keys
|
|
276
282
|
or AZURE_SAS_KEY.match(key) # Azure session keys
|
|
283
|
+
or AZURE_ACCOUNT_KEY.match(key) # Azure account keys
|
|
277
284
|
)
|
|
278
285
|
|
|
279
286
|
|
|
@@ -288,7 +295,8 @@ class SessionConfig:
|
|
|
288
295
|
"snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
|
|
289
296
|
"spark.sql.execution.pythonUDTF.arrow.enabled": "false",
|
|
290
297
|
"spark.sql.tvf.allowMultipleTableArguments.enabled": "true",
|
|
291
|
-
"enable_snowflake_extension_behavior": "false",
|
|
298
|
+
"snowpark.connect.enable_snowflake_extension_behavior": "false",
|
|
299
|
+
"snowpark.connect.describe_cache_ttl_seconds": "300",
|
|
292
300
|
}
|
|
293
301
|
|
|
294
302
|
def __init__(self) -> None:
|
|
@@ -578,7 +586,10 @@ def set_snowflake_parameters(
|
|
|
578
586
|
cte_enabled = str_to_bool(value)
|
|
579
587
|
snowpark_session.cte_optimization_enabled = cte_enabled
|
|
580
588
|
logger.info(f"Updated snowpark session CTE optimization: {cte_enabled}")
|
|
581
|
-
|
|
589
|
+
case "snowpark.connect.structured_types.fix":
|
|
590
|
+
# TODO: SNOW-2367714 Remove this once the fix is automatically enabled in Snowpark
|
|
591
|
+
snowpark.context._enable_fix_2360274 = str_to_bool(value)
|
|
592
|
+
logger.info(f"Updated snowpark session structured types fix: {value}")
|
|
582
593
|
case _:
|
|
583
594
|
pass
|
|
584
595
|
|
|
@@ -598,6 +609,21 @@ def get_cte_optimization_enabled() -> bool:
|
|
|
598
609
|
return get_boolean_session_config_param("snowpark.connect.cte.optimization_enabled")
|
|
599
610
|
|
|
600
611
|
|
|
612
|
+
def get_describe_cache_ttl_seconds() -> int:
|
|
613
|
+
"""Get the describe query cache TTL from session config, with a default fallback."""
|
|
614
|
+
session_config: SessionConfig = sessions_config[get_session_id()]
|
|
615
|
+
default_ttl: str = SessionConfig.default_session_config[
|
|
616
|
+
"snowpark.connect.describe_cache_ttl_seconds"
|
|
617
|
+
]
|
|
618
|
+
try:
|
|
619
|
+
ttl_str = session_config.get(
|
|
620
|
+
"snowpark.connect.describe_cache_ttl_seconds", default_ttl
|
|
621
|
+
)
|
|
622
|
+
return int(ttl_str)
|
|
623
|
+
except ValueError: # fallback to default ttl
|
|
624
|
+
return int(default_ttl)
|
|
625
|
+
|
|
626
|
+
|
|
601
627
|
def auto_uppercase_column_identifiers() -> bool:
|
|
602
628
|
session_config = sessions_config[get_session_id()]
|
|
603
629
|
return session_config[
|
|
@@ -701,7 +727,9 @@ def check_table_supports_operation(table_identifier: str, operation: str) -> boo
|
|
|
701
727
|
session_id = get_session_id()
|
|
702
728
|
session_config = sessions_config[session_id]
|
|
703
729
|
enable_extensions = str_to_bool(
|
|
704
|
-
session_config.get(
|
|
730
|
+
session_config.get(
|
|
731
|
+
"snowpark.connect.enable_snowflake_extension_behavior", "false"
|
|
732
|
+
)
|
|
705
733
|
)
|
|
706
734
|
|
|
707
735
|
if enable_extensions:
|
|
@@ -26,6 +26,9 @@ from snowflake.snowpark_connect.execute_plan.utils import (
|
|
|
26
26
|
pandas_to_arrow_batches_bytes,
|
|
27
27
|
)
|
|
28
28
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
29
|
+
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
30
|
+
filter_metadata_columns,
|
|
31
|
+
)
|
|
29
32
|
from snowflake.snowpark_connect.type_mapping import (
|
|
30
33
|
map_snowpark_types_to_pyarrow_types,
|
|
31
34
|
snowpark_to_proto_type,
|
|
@@ -92,14 +95,19 @@ def map_execution_root(
|
|
|
92
95
|
) -> Iterator[proto_base.ExecutePlanResponse | QueryResult]:
|
|
93
96
|
result: DataFrameContainer | pandas.DataFrame = map_relation(request.plan.root)
|
|
94
97
|
if isinstance(result, pandas.DataFrame):
|
|
95
|
-
|
|
98
|
+
pandas_df = result
|
|
99
|
+
data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
|
|
100
|
+
row_count = len(pandas_df)
|
|
101
|
+
schema = None
|
|
102
|
+
yield _build_execute_plan_response(row_count, data_bytes, schema, request)
|
|
96
103
|
else:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
104
|
+
filtered_result = filter_metadata_columns(result)
|
|
105
|
+
filtered_result_df = filtered_result.dataframe
|
|
106
|
+
snowpark_schema = filtered_result_df.schema
|
|
107
|
+
schema = snowpark_to_proto_type(
|
|
108
|
+
snowpark_schema, filtered_result.column_map, filtered_result_df
|
|
109
|
+
)
|
|
110
|
+
spark_columns = filtered_result.column_map.get_spark_columns()
|
|
103
111
|
if tcm.TCM_MODE:
|
|
104
112
|
# TCM result handling:
|
|
105
113
|
# - small result (only one batch): just return the executePlanResponse
|
|
@@ -108,22 +116,22 @@ def map_execution_root(
|
|
|
108
116
|
is_large_result = False
|
|
109
117
|
second_batch = False
|
|
110
118
|
first_arrow_table = None
|
|
111
|
-
with
|
|
112
|
-
for arrow_table in to_arrow_batch_iter(
|
|
119
|
+
with filtered_result_df.session.query_history() as qh:
|
|
120
|
+
for arrow_table in to_arrow_batch_iter(filtered_result_df):
|
|
113
121
|
if second_batch:
|
|
114
122
|
is_large_result = True
|
|
115
123
|
break
|
|
116
124
|
first_arrow_table = arrow_table
|
|
117
125
|
second_batch = True
|
|
118
126
|
queries_cnt = len(
|
|
119
|
-
|
|
127
|
+
filtered_result_df._plan.execution_queries[PlanQueryType.QUERIES]
|
|
120
128
|
)
|
|
121
129
|
# get query uuid from the last query; this may not be the last queries in query history because snowpark
|
|
122
130
|
# may run some post action queries, e.g., drop temp table.
|
|
123
131
|
query_id = qh.queries[queries_cnt - 1].query_id
|
|
124
132
|
if first_arrow_table is None:
|
|
125
133
|
# empty arrow batch iterator
|
|
126
|
-
pandas_df =
|
|
134
|
+
pandas_df = filtered_result_df.to_pandas()
|
|
127
135
|
data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
|
|
128
136
|
yield _build_execute_plan_response(0, data_bytes, schema, request)
|
|
129
137
|
elif not tcm.TCM_RETURN_QUERY_ID_FOR_SMALL_RESULT and not is_large_result:
|
|
@@ -150,7 +158,7 @@ def map_execution_root(
|
|
|
150
158
|
spark_schema.SerializeToString(),
|
|
151
159
|
)
|
|
152
160
|
else:
|
|
153
|
-
arrow_table_iter = to_arrow_batch_iter(
|
|
161
|
+
arrow_table_iter = to_arrow_batch_iter(filtered_result_df)
|
|
154
162
|
batch_count = 0
|
|
155
163
|
for arrow_table in arrow_table_iter:
|
|
156
164
|
if arrow_table.num_rows > 0:
|
|
@@ -166,12 +174,6 @@ def map_execution_root(
|
|
|
166
174
|
|
|
167
175
|
# Empty result needs special processing
|
|
168
176
|
if batch_count == 0:
|
|
169
|
-
pandas_df =
|
|
177
|
+
pandas_df = filtered_result_df.to_pandas()
|
|
170
178
|
data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
|
|
171
179
|
yield _build_execute_plan_response(0, data_bytes, schema, request)
|
|
172
|
-
else:
|
|
173
|
-
pandas_df = result_df
|
|
174
|
-
data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
|
|
175
|
-
row_count = len(pandas_df)
|
|
176
|
-
schema = None
|
|
177
|
-
yield _build_execute_plan_response(row_count, data_bytes, schema, request)
|
|
@@ -6,7 +6,12 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
|
6
6
|
|
|
7
7
|
import snowflake.snowpark.functions as snowpark_fn
|
|
8
8
|
import snowflake.snowpark_connect.proto.snowflake_expression_ext_pb2 as snowflake_proto
|
|
9
|
-
from snowflake.snowpark.types import
|
|
9
|
+
from snowflake.snowpark.types import (
|
|
10
|
+
BooleanType,
|
|
11
|
+
DayTimeIntervalType,
|
|
12
|
+
StringType,
|
|
13
|
+
YearMonthIntervalType,
|
|
14
|
+
)
|
|
10
15
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
11
16
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
12
17
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
@@ -18,6 +23,30 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
18
23
|
SnowparkConnectNotImplementedError,
|
|
19
24
|
)
|
|
20
25
|
|
|
26
|
+
# Formatting constants for interval display
|
|
27
|
+
_TWO_DIGIT_FORMAT = (
|
|
28
|
+
"{:02d}" # Zero-padded 2-digit format for hours, minutes, and whole seconds
|
|
29
|
+
)
|
|
30
|
+
_THREE_DIGIT_FORMAT = (
|
|
31
|
+
"{:03d}" # Zero-padded 3-digit format for hours/minutes and whole seconds
|
|
32
|
+
)
|
|
33
|
+
_SECONDS_PRECISION_FORMAT = (
|
|
34
|
+
"{:09.6f}" # 6 decimal places with leading zeros for seconds
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _format_time_component(value: int, is_negative: bool = False) -> str:
|
|
39
|
+
"""Format a time component (hours, minutes, whole seconds) with proper zero-padding."""
|
|
40
|
+
if is_negative:
|
|
41
|
+
return _THREE_DIGIT_FORMAT.format(value)
|
|
42
|
+
else:
|
|
43
|
+
return _TWO_DIGIT_FORMAT.format(value)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _format_seconds_precise(seconds: float) -> str:
|
|
47
|
+
"""Format seconds with full precision, stripping trailing zeros."""
|
|
48
|
+
return _SECONDS_PRECISION_FORMAT.format(seconds).rstrip("0").rstrip(".")
|
|
49
|
+
|
|
21
50
|
|
|
22
51
|
def map_extension(
|
|
23
52
|
exp: expressions_proto.Expression,
|
|
@@ -54,6 +83,56 @@ def map_extension(
|
|
|
54
83
|
)
|
|
55
84
|
return [name], typed_col
|
|
56
85
|
|
|
86
|
+
case "interval_literal":
|
|
87
|
+
interval_ext = extension.interval_literal
|
|
88
|
+
literal = interval_ext.literal
|
|
89
|
+
start_field = (
|
|
90
|
+
interval_ext.start_field
|
|
91
|
+
if interval_ext.HasField("start_field")
|
|
92
|
+
else None
|
|
93
|
+
)
|
|
94
|
+
end_field = (
|
|
95
|
+
interval_ext.end_field if interval_ext.HasField("end_field") else None
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Format interval with proper context-aware formatting
|
|
99
|
+
if literal.HasField("year_month_interval"):
|
|
100
|
+
total_months = literal.year_month_interval
|
|
101
|
+
lit_value, lit_name = _format_year_month_interval(
|
|
102
|
+
total_months, start_field, end_field
|
|
103
|
+
)
|
|
104
|
+
if start_field is not None and end_field is not None:
|
|
105
|
+
interval_data_type = YearMonthIntervalType(start_field, end_field)
|
|
106
|
+
else:
|
|
107
|
+
interval_data_type = YearMonthIntervalType()
|
|
108
|
+
|
|
109
|
+
# Create column using SQL expression with context-aware formatting
|
|
110
|
+
col = snowpark_fn.sql_expr(lit_value)
|
|
111
|
+
|
|
112
|
+
elif literal.HasField("day_time_interval"):
|
|
113
|
+
total_microseconds = literal.day_time_interval
|
|
114
|
+
lit_value, lit_name = _format_day_time_interval(
|
|
115
|
+
total_microseconds, start_field, end_field
|
|
116
|
+
)
|
|
117
|
+
if start_field is not None and end_field is not None:
|
|
118
|
+
interval_data_type = DayTimeIntervalType(start_field, end_field)
|
|
119
|
+
else:
|
|
120
|
+
interval_data_type = DayTimeIntervalType()
|
|
121
|
+
|
|
122
|
+
# Create column using SQL expression to get proper interval type (same as year-month)
|
|
123
|
+
col = snowpark_fn.sql_expr(lit_value)
|
|
124
|
+
|
|
125
|
+
else:
|
|
126
|
+
# Fallback - shouldn't happen
|
|
127
|
+
lit_value = str(literal)
|
|
128
|
+
lit_name = str(literal)
|
|
129
|
+
interval_data_type = StringType()
|
|
130
|
+
col = snowpark_fn.lit(lit_value)
|
|
131
|
+
|
|
132
|
+
typed_col = TypedColumn(col, lambda: [interval_data_type])
|
|
133
|
+
|
|
134
|
+
return [lit_name], typed_col
|
|
135
|
+
|
|
57
136
|
case "subquery_expression":
|
|
58
137
|
from snowflake.snowpark_connect.dataframe_container import (
|
|
59
138
|
DataFrameContainer,
|
|
@@ -113,3 +192,200 @@ def map_extension(
|
|
|
113
192
|
|
|
114
193
|
case other:
|
|
115
194
|
raise SnowparkConnectNotImplementedError(f"Unexpected extension {other}")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _format_year_month_interval(
|
|
198
|
+
total_months: int, start_field: int | None, end_field: int | None
|
|
199
|
+
) -> tuple[str, str]:
|
|
200
|
+
"""Format year-month interval with context-aware precision."""
|
|
201
|
+
|
|
202
|
+
# Calculate years and months from absolute value
|
|
203
|
+
is_negative = total_months < 0
|
|
204
|
+
abs_months = abs(total_months)
|
|
205
|
+
years = abs_months // 12
|
|
206
|
+
months = abs_months % 12
|
|
207
|
+
|
|
208
|
+
# Determine interval type
|
|
209
|
+
is_year_only = (
|
|
210
|
+
start_field == YearMonthIntervalType.YEAR
|
|
211
|
+
and end_field == YearMonthIntervalType.YEAR
|
|
212
|
+
)
|
|
213
|
+
is_month_only = (
|
|
214
|
+
start_field == YearMonthIntervalType.MONTH
|
|
215
|
+
and end_field == YearMonthIntervalType.MONTH
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Format based on type and sign
|
|
219
|
+
if is_year_only:
|
|
220
|
+
sign = "-" if is_negative else ""
|
|
221
|
+
str_value = f"INTERVAL '{sign}{years}' YEAR"
|
|
222
|
+
elif is_month_only:
|
|
223
|
+
str_value = f"INTERVAL '{total_months}' MONTH" # Keep original sign
|
|
224
|
+
else: # YEAR TO MONTH (default)
|
|
225
|
+
if is_negative:
|
|
226
|
+
str_value = f"INTERVAL '-{years}-{months}' YEAR TO MONTH"
|
|
227
|
+
else:
|
|
228
|
+
str_value = f"INTERVAL '{years}-{months}' YEAR TO MONTH"
|
|
229
|
+
|
|
230
|
+
return str_value, str_value
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _format_day_time_interval(
|
|
234
|
+
total_microseconds: int, start_field: int | None, end_field: int | None
|
|
235
|
+
) -> tuple[str, str]:
|
|
236
|
+
"""Format day-time interval with context-aware precision."""
|
|
237
|
+
total_seconds = total_microseconds / 1_000_000
|
|
238
|
+
|
|
239
|
+
# Handle negative intervals correctly
|
|
240
|
+
is_negative = total_seconds < 0
|
|
241
|
+
|
|
242
|
+
# Work with integer microseconds to preserve precision
|
|
243
|
+
abs_total_microseconds = abs(total_microseconds)
|
|
244
|
+
|
|
245
|
+
# Convert to components using integer arithmetic
|
|
246
|
+
days = int(abs_total_microseconds // (86400 * 1_000_000))
|
|
247
|
+
remaining_microseconds = abs_total_microseconds % (86400 * 1_000_000)
|
|
248
|
+
hours = int(remaining_microseconds // (3600 * 1_000_000))
|
|
249
|
+
remaining_microseconds = remaining_microseconds % (3600 * 1_000_000)
|
|
250
|
+
minutes = int(remaining_microseconds // (60 * 1_000_000))
|
|
251
|
+
remaining_microseconds = remaining_microseconds % (60 * 1_000_000)
|
|
252
|
+
|
|
253
|
+
# Convert final microseconds to seconds with full precision
|
|
254
|
+
seconds = remaining_microseconds / 1_000_000
|
|
255
|
+
|
|
256
|
+
# Apply negative sign back to days if needed
|
|
257
|
+
if is_negative:
|
|
258
|
+
days = -days
|
|
259
|
+
|
|
260
|
+
# Format based on the specific start/end field context
|
|
261
|
+
if (
|
|
262
|
+
start_field == DayTimeIntervalType.DAY and end_field == DayTimeIntervalType.DAY
|
|
263
|
+
): # DAY only
|
|
264
|
+
str_value = f"INTERVAL '{days}' DAY"
|
|
265
|
+
elif (
|
|
266
|
+
start_field == DayTimeIntervalType.HOUR
|
|
267
|
+
and end_field == DayTimeIntervalType.HOUR
|
|
268
|
+
): # HOUR only
|
|
269
|
+
# For HOUR-only intervals, calculate total hours (don't break down into days)
|
|
270
|
+
total_hours = int(abs(total_microseconds) // (3600 * 1_000_000))
|
|
271
|
+
if total_microseconds < 0:
|
|
272
|
+
total_hours = -total_hours
|
|
273
|
+
if total_hours >= 0:
|
|
274
|
+
str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(total_hours)}' HOUR"
|
|
275
|
+
else:
|
|
276
|
+
str_value = f"INTERVAL '{_THREE_DIGIT_FORMAT.format(total_hours)}' HOUR"
|
|
277
|
+
elif (
|
|
278
|
+
start_field == DayTimeIntervalType.MINUTE
|
|
279
|
+
and end_field == DayTimeIntervalType.MINUTE
|
|
280
|
+
): # MINUTE only
|
|
281
|
+
# For MINUTE-only intervals, calculate total minutes (don't break down into hours/days)
|
|
282
|
+
total_minutes = int(abs(total_microseconds) // (60 * 1_000_000))
|
|
283
|
+
if total_microseconds < 0:
|
|
284
|
+
total_minutes = -total_minutes
|
|
285
|
+
if total_minutes >= 0:
|
|
286
|
+
str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(total_minutes)}' MINUTE"
|
|
287
|
+
else:
|
|
288
|
+
str_value = f"INTERVAL '{_THREE_DIGIT_FORMAT.format(total_minutes)}' MINUTE"
|
|
289
|
+
elif (
|
|
290
|
+
start_field == DayTimeIntervalType.SECOND
|
|
291
|
+
and end_field == DayTimeIntervalType.SECOND
|
|
292
|
+
): # SECOND only
|
|
293
|
+
# For SECOND-only intervals, use total seconds (don't break down into minutes/hours/days)
|
|
294
|
+
total_seconds_precise = total_microseconds / 1_000_000
|
|
295
|
+
if total_seconds_precise == int(total_seconds_precise):
|
|
296
|
+
if total_seconds_precise >= 0:
|
|
297
|
+
str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(int(total_seconds_precise))}' SECOND"
|
|
298
|
+
else:
|
|
299
|
+
str_value = f"INTERVAL '{_THREE_DIGIT_FORMAT.format(int(total_seconds_precise))}' SECOND"
|
|
300
|
+
else:
|
|
301
|
+
seconds_str = _format_seconds_precise(total_seconds_precise)
|
|
302
|
+
str_value = f"INTERVAL '{seconds_str}' SECOND"
|
|
303
|
+
elif (
|
|
304
|
+
start_field == DayTimeIntervalType.MINUTE
|
|
305
|
+
and end_field == DayTimeIntervalType.SECOND
|
|
306
|
+
): # MINUTE TO SECOND
|
|
307
|
+
# For MINUTE TO SECOND intervals, calculate total minutes and remaining seconds
|
|
308
|
+
total_minutes = int(abs_total_microseconds // (60 * 1_000_000))
|
|
309
|
+
remaining_microseconds_for_minutes = abs_total_microseconds % (60 * 1_000_000)
|
|
310
|
+
remaining_seconds_for_minutes = remaining_microseconds_for_minutes / 1_000_000
|
|
311
|
+
|
|
312
|
+
# Format seconds appropriately (whole or fractional)
|
|
313
|
+
if remaining_seconds_for_minutes == int(remaining_seconds_for_minutes):
|
|
314
|
+
seconds_str = _TWO_DIGIT_FORMAT.format(int(remaining_seconds_for_minutes))
|
|
315
|
+
else:
|
|
316
|
+
seconds_str = _format_seconds_precise(remaining_seconds_for_minutes)
|
|
317
|
+
|
|
318
|
+
# Apply sign and format
|
|
319
|
+
if is_negative:
|
|
320
|
+
str_value = f"INTERVAL '-{_TWO_DIGIT_FORMAT.format(total_minutes)}:{seconds_str}' MINUTE TO SECOND"
|
|
321
|
+
else:
|
|
322
|
+
str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(total_minutes)}:{seconds_str}' MINUTE TO SECOND"
|
|
323
|
+
elif (
|
|
324
|
+
start_field == DayTimeIntervalType.HOUR
|
|
325
|
+
and end_field == DayTimeIntervalType.MINUTE
|
|
326
|
+
): # HOUR TO MINUTE
|
|
327
|
+
str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
|
|
328
|
+
elif (
|
|
329
|
+
start_field == DayTimeIntervalType.HOUR
|
|
330
|
+
and end_field == DayTimeIntervalType.SECOND
|
|
331
|
+
): # HOUR TO SECOND
|
|
332
|
+
if seconds == int(seconds):
|
|
333
|
+
str_value = f"INTERVAL '{_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' HOUR TO SECOND"
|
|
334
|
+
else:
|
|
335
|
+
seconds_str = _format_seconds_precise(seconds)
|
|
336
|
+
str_value = f"INTERVAL '{_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' HOUR TO SECOND"
|
|
337
|
+
elif (
|
|
338
|
+
start_field == DayTimeIntervalType.DAY and end_field == DayTimeIntervalType.HOUR
|
|
339
|
+
): # DAY TO HOUR
|
|
340
|
+
str_value = f"INTERVAL '{days} {_format_time_component(hours)}' DAY TO HOUR"
|
|
341
|
+
elif (
|
|
342
|
+
start_field == DayTimeIntervalType.DAY
|
|
343
|
+
and end_field == DayTimeIntervalType.MINUTE
|
|
344
|
+
): # DAY TO MINUTE
|
|
345
|
+
str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}' DAY TO MINUTE"
|
|
346
|
+
elif (
|
|
347
|
+
start_field == DayTimeIntervalType.DAY
|
|
348
|
+
and end_field == DayTimeIntervalType.SECOND
|
|
349
|
+
): # DAY TO SECOND
|
|
350
|
+
if seconds == int(seconds):
|
|
351
|
+
str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
|
|
352
|
+
else:
|
|
353
|
+
seconds_str = _format_seconds_precise(seconds)
|
|
354
|
+
str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
|
|
355
|
+
else:
|
|
356
|
+
# Fallback - use smart formatting like the original literal.py logic
|
|
357
|
+
if days > 0:
|
|
358
|
+
if hours == 0 and minutes == 0 and seconds == 0:
|
|
359
|
+
str_value = f"INTERVAL '{int(days)}' DAY"
|
|
360
|
+
else:
|
|
361
|
+
if seconds == int(seconds):
|
|
362
|
+
str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
|
|
363
|
+
else:
|
|
364
|
+
seconds_str = _format_seconds_precise(seconds)
|
|
365
|
+
str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
|
|
366
|
+
elif hours > 0:
|
|
367
|
+
if minutes == 0 and seconds == 0:
|
|
368
|
+
str_value = f"INTERVAL '{_format_time_component(hours)}' HOUR"
|
|
369
|
+
else:
|
|
370
|
+
if seconds == int(seconds):
|
|
371
|
+
str_value = f"INTERVAL '{_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' HOUR TO SECOND"
|
|
372
|
+
else:
|
|
373
|
+
seconds_str = _format_seconds_precise(seconds)
|
|
374
|
+
str_value = f"INTERVAL '{_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' HOUR TO SECOND"
|
|
375
|
+
elif minutes > 0:
|
|
376
|
+
if seconds == 0:
|
|
377
|
+
str_value = f"INTERVAL '{_format_time_component(minutes)}' MINUTE"
|
|
378
|
+
else:
|
|
379
|
+
if seconds == int(seconds):
|
|
380
|
+
str_value = f"INTERVAL '{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' MINUTE TO SECOND"
|
|
381
|
+
else:
|
|
382
|
+
seconds_str = _format_seconds_precise(seconds)
|
|
383
|
+
str_value = f"INTERVAL '{_format_time_component(minutes)}:{seconds_str}' MINUTE TO SECOND"
|
|
384
|
+
else:
|
|
385
|
+
if seconds == int(seconds):
|
|
386
|
+
str_value = f"INTERVAL '{_format_time_component(int(seconds))}' SECOND"
|
|
387
|
+
else:
|
|
388
|
+
seconds_str = _format_seconds_precise(seconds)
|
|
389
|
+
str_value = f"INTERVAL '{seconds_str}' SECOND"
|
|
390
|
+
|
|
391
|
+
return str_value, str_value
|