snowpark-connect 0.29.0__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client.py +65 -0
- snowflake/snowpark_connect/column_name_handler.py +6 -0
- snowflake/snowpark_connect/config.py +22 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +21 -19
- snowflake/snowpark_connect/expression/map_extension.py +277 -1
- snowflake/snowpark_connect/expression/map_sql_expression.py +107 -2
- snowflake/snowpark_connect/expression/map_unresolved_function.py +253 -59
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/relation/map_column_ops.py +9 -4
- snowflake/snowpark_connect/relation/map_join.py +8 -0
- snowflake/snowpark_connect/relation/map_row_ops.py +116 -15
- snowflake/snowpark_connect/relation/map_show_string.py +14 -6
- snowflake/snowpark_connect/relation/map_sql.py +39 -5
- snowflake/snowpark_connect/relation/map_stats.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_csv.py +17 -6
- snowflake/snowpark_connect/relation/read/map_read_json.py +12 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +159 -0
- snowflake/snowpark_connect/relation/write/map_write.py +29 -0
- snowflake/snowpark_connect/server.py +11 -3
- snowflake/snowpark_connect/type_mapping.py +75 -3
- snowflake/snowpark_connect/utils/describe_query_cache.py +6 -3
- snowflake/snowpark_connect/utils/session.py +1 -0
- snowflake/snowpark_connect/utils/telemetry.py +30 -5
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.29.0.dist-info → snowpark_connect-0.30.0.dist-info}/METADATA +1 -1
- {snowpark_connect-0.29.0.dist-info → snowpark_connect-0.30.0.dist-info}/RECORD +37 -35
- {snowpark_connect-0.29.0.data → snowpark_connect-0.30.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.29.0.data → snowpark_connect-0.30.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.29.0.data → snowpark_connect-0.30.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.29.0.dist-info → snowpark_connect-0.30.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.29.0.dist-info → snowpark_connect-0.30.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.29.0.dist-info → snowpark_connect-0.30.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.29.0.dist-info → snowpark_connect-0.30.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.29.0.dist-info → snowpark_connect-0.30.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Utilities for handling internal metadata columns in file-based DataFrames.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
import pandas
|
|
12
|
+
from pyspark.errors.exceptions.base import AnalysisException
|
|
13
|
+
|
|
14
|
+
from snowflake import snowpark
|
|
15
|
+
from snowflake.snowpark.column import METADATA_FILENAME
|
|
16
|
+
from snowflake.snowpark.functions import col
|
|
17
|
+
from snowflake.snowpark.types import StructField
|
|
18
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
19
|
+
|
|
20
|
+
# Constant for the metadata filename column name
|
|
21
|
+
METADATA_FILENAME_COLUMN = "METADATA$FILENAME"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def add_filename_metadata_to_reader(
|
|
25
|
+
reader: snowpark.DataFrameReader,
|
|
26
|
+
options: dict | None = None,
|
|
27
|
+
) -> snowpark.DataFrameReader:
|
|
28
|
+
"""
|
|
29
|
+
Add filename metadata to a DataFrameReader based on configuration.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
reader: Snowpark DataFrameReader instance
|
|
33
|
+
options: Dictionary of options to check for metadata configuration
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
DataFrameReader with filename metadata enabled if configured, otherwise unchanged
|
|
37
|
+
"""
|
|
38
|
+
# NOTE: SNOWPARK_POPULATE_FILE_METADATA_DEFAULT is an internal environment variable
|
|
39
|
+
# used only for CI testing to verify no metadata columns leak in regular file operations.
|
|
40
|
+
# This environment variable should NOT be exposed to end users. Users should only use snowpark.populateFileMetadata
|
|
41
|
+
# to enable metadata population.
|
|
42
|
+
metadata_default = os.environ.get(
|
|
43
|
+
"SNOWPARK_POPULATE_FILE_METADATA_DEFAULT", "false"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
populate_metadata = (
|
|
47
|
+
options.get("snowpark.populateFileMetadata", metadata_default)
|
|
48
|
+
if options
|
|
49
|
+
else metadata_default
|
|
50
|
+
).lower() == "true"
|
|
51
|
+
|
|
52
|
+
if populate_metadata:
|
|
53
|
+
return reader.with_metadata(METADATA_FILENAME)
|
|
54
|
+
else:
|
|
55
|
+
return reader
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_non_metadata_fields(schema_fields: list[StructField]) -> list[StructField]:
|
|
59
|
+
"""
|
|
60
|
+
Filter out METADATA$FILENAME fields from a list of schema fields.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
schema_fields: List of StructField objects from a DataFrame schema
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
List of StructField objects excluding METADATA$FILENAME
|
|
67
|
+
"""
|
|
68
|
+
return [field for field in schema_fields if field.name != METADATA_FILENAME_COLUMN]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_non_metadata_column_names(schema_fields: list[StructField]) -> list[str]:
|
|
72
|
+
"""
|
|
73
|
+
Get column names from schema fields, excluding METADATA$FILENAME.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
schema_fields: List of StructField objects from a DataFrame schema
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
List of column names (strings) excluding METADATA$FILENAME
|
|
80
|
+
"""
|
|
81
|
+
return [
|
|
82
|
+
field.name for field in schema_fields if field.name != METADATA_FILENAME_COLUMN
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def filter_metadata_column_name(column_names: list[str]) -> list[str]:
|
|
87
|
+
"""
|
|
88
|
+
Get column names from column_names, excluding METADATA$FILENAME.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of column names (strings) excluding METADATA$FILENAME
|
|
92
|
+
"""
|
|
93
|
+
return [
|
|
94
|
+
col_name for col_name in column_names if col_name != METADATA_FILENAME_COLUMN
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def filter_metadata_columns(
|
|
99
|
+
result_container: DataFrameContainer | pandas.DataFrame | None,
|
|
100
|
+
) -> DataFrameContainer | pandas.DataFrame | None:
|
|
101
|
+
"""
|
|
102
|
+
Filter METADATA$FILENAME from DataFrame container for execution and write operations.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
result_container: DataFrameContainer or pandas DataFrame to filter
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Filtered container (callers can access dataframe via container.dataframe)
|
|
109
|
+
"""
|
|
110
|
+
# Handle pandas DataFrame case - return as-is
|
|
111
|
+
if isinstance(result_container, pandas.DataFrame):
|
|
112
|
+
return result_container
|
|
113
|
+
|
|
114
|
+
if result_container is None:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
result_df = result_container.dataframe
|
|
118
|
+
if not isinstance(result_df, snowpark.DataFrame):
|
|
119
|
+
return result_container
|
|
120
|
+
|
|
121
|
+
df_columns = result_container.column_map.get_snowpark_columns()
|
|
122
|
+
has_metadata_filename = any(name == METADATA_FILENAME_COLUMN for name in df_columns)
|
|
123
|
+
|
|
124
|
+
if not has_metadata_filename:
|
|
125
|
+
return result_container
|
|
126
|
+
|
|
127
|
+
non_metadata_columns = filter_metadata_column_name(df_columns)
|
|
128
|
+
|
|
129
|
+
if len(non_metadata_columns) == 0:
|
|
130
|
+
# DataFrame contains only metadata columns (METADATA$FILENAME), no actual data columns remaining.
|
|
131
|
+
# We don't have a way to return an empty dataframe.
|
|
132
|
+
raise AnalysisException(
|
|
133
|
+
"[DATAFRAME_MISSING_DATA_COLUMNS] Cannot perform operation on DataFrame that contains no data columns."
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
filtered_df = result_df.select([col(name) for name in non_metadata_columns])
|
|
137
|
+
|
|
138
|
+
original_spark_columns = result_container.column_map.get_spark_columns()
|
|
139
|
+
original_snowpark_columns = result_container.column_map.get_snowpark_columns()
|
|
140
|
+
|
|
141
|
+
filtered_spark_columns = []
|
|
142
|
+
filtered_snowpark_columns = []
|
|
143
|
+
|
|
144
|
+
for i, colname in enumerate(df_columns):
|
|
145
|
+
if colname != METADATA_FILENAME_COLUMN:
|
|
146
|
+
filtered_spark_columns.append(original_spark_columns[i])
|
|
147
|
+
filtered_snowpark_columns.append(original_snowpark_columns[i])
|
|
148
|
+
|
|
149
|
+
new_container = DataFrameContainer.create_with_column_mapping(
|
|
150
|
+
dataframe=filtered_df,
|
|
151
|
+
spark_column_names=filtered_spark_columns,
|
|
152
|
+
snowpark_column_names=filtered_snowpark_columns,
|
|
153
|
+
column_metadata=result_container.column_map.column_metadata,
|
|
154
|
+
table_name=result_container.table_name,
|
|
155
|
+
alias=result_container.alias,
|
|
156
|
+
partition_hint=result_container.partition_hint,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return new_container
|
|
@@ -40,6 +40,9 @@ from snowflake.snowpark_connect.relation.io_utils import (
|
|
|
40
40
|
supported_compressions_for_format,
|
|
41
41
|
)
|
|
42
42
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
43
|
+
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
44
|
+
filter_metadata_columns,
|
|
45
|
+
)
|
|
43
46
|
from snowflake.snowpark_connect.relation.read.reader_config import CsvWriterConfig
|
|
44
47
|
from snowflake.snowpark_connect.relation.stage_locator import get_paths_from_stage
|
|
45
48
|
from snowflake.snowpark_connect.relation.utils import (
|
|
@@ -129,6 +132,19 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
129
132
|
|
|
130
133
|
result = map_relation(write_op.input)
|
|
131
134
|
input_df: snowpark.DataFrame = handle_column_names(result, write_op.source)
|
|
135
|
+
|
|
136
|
+
# Create updated container with transformed dataframe, then filter METADATA$FILENAME columns
|
|
137
|
+
# Update the container to use the transformed dataframe from handle_column_names
|
|
138
|
+
updated_result = DataFrameContainer(
|
|
139
|
+
dataframe=input_df,
|
|
140
|
+
column_map=result.column_map,
|
|
141
|
+
table_name=result.table_name,
|
|
142
|
+
alias=result.alias,
|
|
143
|
+
partition_hint=result.partition_hint,
|
|
144
|
+
)
|
|
145
|
+
updated_result = filter_metadata_columns(updated_result)
|
|
146
|
+
input_df = updated_result.dataframe
|
|
147
|
+
|
|
132
148
|
session: snowpark.Session = get_or_create_snowpark_session()
|
|
133
149
|
|
|
134
150
|
# Snowflake saveAsTable doesn't support format
|
|
@@ -537,6 +553,19 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
537
553
|
snowpark_table_name = _spark_to_snowflake(write_op.table_name)
|
|
538
554
|
result = map_relation(write_op.input)
|
|
539
555
|
input_df: snowpark.DataFrame = handle_column_names(result, "table")
|
|
556
|
+
|
|
557
|
+
# Create updated container with transformed dataframe, then filter METADATA$FILENAME columns
|
|
558
|
+
# Update the container to use the transformed dataframe from handle_column_names
|
|
559
|
+
updated_result = DataFrameContainer(
|
|
560
|
+
dataframe=input_df,
|
|
561
|
+
column_map=result.column_map,
|
|
562
|
+
table_name=result.table_name,
|
|
563
|
+
alias=result.alias,
|
|
564
|
+
partition_hint=result.partition_hint,
|
|
565
|
+
)
|
|
566
|
+
updated_result = filter_metadata_columns(updated_result)
|
|
567
|
+
input_df = updated_result.dataframe
|
|
568
|
+
|
|
540
569
|
session: snowpark.Session = get_or_create_snowpark_session()
|
|
541
570
|
|
|
542
571
|
if write_op.table_name is None or write_op.table_name == "":
|
|
@@ -232,12 +232,20 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
232
232
|
match request.WhichOneof("analyze"):
|
|
233
233
|
case "schema":
|
|
234
234
|
result = map_relation(request.schema.plan.root)
|
|
235
|
-
|
|
236
|
-
|
|
235
|
+
|
|
236
|
+
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
237
|
+
filter_metadata_columns,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
filtered_result = filter_metadata_columns(result)
|
|
241
|
+
filtered_df = filtered_result.dataframe
|
|
242
|
+
|
|
237
243
|
schema = proto_base.AnalyzePlanResponse.Schema(
|
|
238
244
|
schema=types_proto.DataType(
|
|
239
245
|
**snowpark_to_proto_type(
|
|
240
|
-
|
|
246
|
+
filtered_df.schema,
|
|
247
|
+
filtered_result.column_map,
|
|
248
|
+
filtered_df,
|
|
241
249
|
)
|
|
242
250
|
)
|
|
243
251
|
)
|
|
@@ -30,6 +30,10 @@ from snowflake.snowpark_connect.date_time_format_mapping import (
|
|
|
30
30
|
convert_spark_format_to_snowflake,
|
|
31
31
|
)
|
|
32
32
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
33
|
+
from snowflake.snowpark_connect.expression.map_sql_expression import (
|
|
34
|
+
_INTERVAL_DAYTIME_PATTERN_RE,
|
|
35
|
+
_INTERVAL_YEARMONTH_PATTERN_RE,
|
|
36
|
+
)
|
|
33
37
|
from snowflake.snowpark_connect.utils.context import get_is_evaluating_sql
|
|
34
38
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
35
39
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
@@ -274,6 +278,18 @@ def snowpark_to_proto_type(
|
|
|
274
278
|
case snowpark.types.VariantType:
|
|
275
279
|
# For now we are returning a string type for variant types.
|
|
276
280
|
return {"string": types_proto.DataType.String()}
|
|
281
|
+
case snowpark.types.YearMonthIntervalType:
|
|
282
|
+
return {
|
|
283
|
+
"year_month_interval": types_proto.DataType.YearMonthInterval(
|
|
284
|
+
start_field=data_type.start_field, end_field=data_type.end_field
|
|
285
|
+
)
|
|
286
|
+
}
|
|
287
|
+
case snowpark.types.DayTimeIntervalType:
|
|
288
|
+
return {
|
|
289
|
+
"day_time_interval": types_proto.DataType.DayTimeInterval(
|
|
290
|
+
start_field=data_type.start_field, end_field=data_type.end_field
|
|
291
|
+
)
|
|
292
|
+
}
|
|
277
293
|
case _:
|
|
278
294
|
raise SnowparkConnectNotImplementedError(
|
|
279
295
|
f"Unsupported snowpark data type: {data_type}"
|
|
@@ -328,6 +344,24 @@ def cast_to_match_snowpark_type(
|
|
|
328
344
|
return str(content)
|
|
329
345
|
case snowpark.types.TimestampType:
|
|
330
346
|
return str(content)
|
|
347
|
+
case snowpark.types.YearMonthIntervalType:
|
|
348
|
+
if isinstance(content, (int, float)):
|
|
349
|
+
total_months = int(content)
|
|
350
|
+
years = total_months // 12
|
|
351
|
+
months = total_months % 12
|
|
352
|
+
return f"INTERVAL '{years}-{months}' YEAR TO MONTH"
|
|
353
|
+
elif isinstance(content, str) and content.startswith(("+", "-")):
|
|
354
|
+
# Handle Snowflake's native interval format (e.g., "+11-08" or "-2-3")
|
|
355
|
+
# Convert to Spark's format: "INTERVAL 'Y-M' YEAR TO MONTH"
|
|
356
|
+
sign = content[0]
|
|
357
|
+
interval_part = content[1:] # Remove sign
|
|
358
|
+
if sign == "-":
|
|
359
|
+
return f"INTERVAL '-{interval_part}' YEAR TO MONTH"
|
|
360
|
+
else:
|
|
361
|
+
return f"INTERVAL '{interval_part}' YEAR TO MONTH"
|
|
362
|
+
return str(content)
|
|
363
|
+
case snowpark.types.DayTimeIntervalType:
|
|
364
|
+
return str(content)
|
|
331
365
|
case _:
|
|
332
366
|
raise SnowparkConnectNotImplementedError(
|
|
333
367
|
f"Unsupported snowpark data type in casting: {data_type}"
|
|
@@ -411,6 +445,18 @@ def proto_to_snowpark_type(
|
|
|
411
445
|
# For UDT types, return the underlying SQL type
|
|
412
446
|
logger.debug("Returning underlying sql type for udt")
|
|
413
447
|
return proto_to_snowpark_type(data_type.udt.sql_type)
|
|
448
|
+
case "year_month_interval":
|
|
449
|
+
# Preserve start_field and end_field from protobuf
|
|
450
|
+
return snowpark.types.YearMonthIntervalType(
|
|
451
|
+
start_field=data_type.year_month_interval.start_field,
|
|
452
|
+
end_field=data_type.year_month_interval.end_field,
|
|
453
|
+
)
|
|
454
|
+
case "day_time_interval":
|
|
455
|
+
# Preserve start_field and end_field from protobuf
|
|
456
|
+
return snowpark.types.DayTimeIntervalType(
|
|
457
|
+
start_field=data_type.day_time_interval.start_field,
|
|
458
|
+
end_field=data_type.day_time_interval.end_field,
|
|
459
|
+
)
|
|
414
460
|
case _:
|
|
415
461
|
return map_simple_types(data_type.WhichOneof("kind"))
|
|
416
462
|
|
|
@@ -523,6 +569,12 @@ def map_snowpark_types_to_pyarrow_types(
|
|
|
523
569
|
return pa.timestamp(unit, tz=tz)
|
|
524
570
|
case snowpark.types.VariantType:
|
|
525
571
|
return pa.string()
|
|
572
|
+
case snowpark.types.YearMonthIntervalType:
|
|
573
|
+
# Return string type so formatted intervals are preserved in display
|
|
574
|
+
return pa.string()
|
|
575
|
+
case snowpark.types.DayTimeIntervalType:
|
|
576
|
+
# Return string type so formatted intervals are preserved in display
|
|
577
|
+
return pa.string()
|
|
526
578
|
case _:
|
|
527
579
|
raise SnowparkConnectNotImplementedError(
|
|
528
580
|
f"Unsupported snowpark data type: {snowpark_type}"
|
|
@@ -676,6 +728,14 @@ def map_pyspark_types_to_snowpark_types(
|
|
|
676
728
|
return snowpark.types.TimestampType()
|
|
677
729
|
if isinstance(type_to_map, pyspark.sql.types.TimestampNTZType):
|
|
678
730
|
return snowpark.types.TimestampType(timezone=TimestampTimeZone.NTZ)
|
|
731
|
+
if isinstance(type_to_map, pyspark.sql.types.YearMonthIntervalType):
|
|
732
|
+
return snowpark.types.YearMonthIntervalType(
|
|
733
|
+
type_to_map.startField, type_to_map.endField
|
|
734
|
+
)
|
|
735
|
+
if isinstance(type_to_map, pyspark.sql.types.DayTimeIntervalType):
|
|
736
|
+
return snowpark.types.DayTimeIntervalType(
|
|
737
|
+
type_to_map.startField, type_to_map.endField
|
|
738
|
+
)
|
|
679
739
|
raise SnowparkConnectNotImplementedError(
|
|
680
740
|
f"Unsupported spark data type: {type_to_map}"
|
|
681
741
|
)
|
|
@@ -743,6 +803,14 @@ def map_snowpark_to_pyspark_types(
|
|
|
743
803
|
if type_to_map.tz == snowpark.types.TimestampTimeZone.NTZ:
|
|
744
804
|
return pyspark.sql.types.TimestampNTZType()
|
|
745
805
|
return pyspark.sql.types.TimestampType()
|
|
806
|
+
if isinstance(type_to_map, snowpark.types.YearMonthIntervalType):
|
|
807
|
+
return pyspark.sql.types.YearMonthIntervalType(
|
|
808
|
+
type_to_map.start_field, type_to_map.end_field
|
|
809
|
+
)
|
|
810
|
+
if isinstance(type_to_map, snowpark.types.DayTimeIntervalType):
|
|
811
|
+
return pyspark.sql.types.DayTimeIntervalType(
|
|
812
|
+
type_to_map.start_field, type_to_map.end_field
|
|
813
|
+
)
|
|
746
814
|
raise SnowparkConnectNotImplementedError(f"Unsupported data type: {type_to_map}")
|
|
747
815
|
|
|
748
816
|
|
|
@@ -785,10 +853,14 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
|
|
|
785
853
|
return snowpark.types.TimestampType(snowpark.types.TimestampTimeZone.NTZ)
|
|
786
854
|
case "timestamp_ltz":
|
|
787
855
|
return snowpark.types.TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
856
|
+
case "year_month_interval":
|
|
857
|
+
return snowpark.types.YearMonthIntervalType()
|
|
788
858
|
case "day_time_interval":
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
return snowpark.types.
|
|
859
|
+
return snowpark.types.DayTimeIntervalType()
|
|
860
|
+
case type_name if _INTERVAL_YEARMONTH_PATTERN_RE.match(type_name):
|
|
861
|
+
return snowpark.types.YearMonthIntervalType()
|
|
862
|
+
case type_name if _INTERVAL_DAYTIME_PATTERN_RE.match(type_name):
|
|
863
|
+
return snowpark.types.DayTimeIntervalType()
|
|
792
864
|
case _:
|
|
793
865
|
if simple_type.startswith("decimal"):
|
|
794
866
|
precision = int(simple_type.split("(")[1].split(",")[0])
|
|
@@ -16,7 +16,6 @@ from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
|
16
16
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
17
17
|
from snowflake.snowpark_connect.utils.telemetry import telemetry
|
|
18
18
|
|
|
19
|
-
DESCRIBE_CACHE_TTL_SECONDS = 15
|
|
20
19
|
USE_DESCRIBE_QUERY_CACHE = True
|
|
21
20
|
|
|
22
21
|
DDL_DETECTION_PATTERN = re.compile(r"\s*(CREATE|ALTER|DROP)\b", re.IGNORECASE)
|
|
@@ -51,6 +50,8 @@ class DescribeQueryCache:
|
|
|
51
50
|
return sql_query
|
|
52
51
|
|
|
53
52
|
def get(self, sql_query: str) -> list[ResultMetadataV2] | None:
|
|
53
|
+
from snowflake.snowpark_connect.config import get_describe_cache_ttl_seconds
|
|
54
|
+
|
|
54
55
|
telemetry.report_describe_query_cache_lookup()
|
|
55
56
|
|
|
56
57
|
cache_key = self._get_cache_key(sql_query)
|
|
@@ -59,7 +60,9 @@ class DescribeQueryCache:
|
|
|
59
60
|
|
|
60
61
|
if key in self._cache:
|
|
61
62
|
result, timestamp = self._cache[key]
|
|
62
|
-
|
|
63
|
+
|
|
64
|
+
expired_by = current_time - (timestamp + get_describe_cache_ttl_seconds())
|
|
65
|
+
if expired_by < 0:
|
|
63
66
|
logger.debug(
|
|
64
67
|
f"Returning query result from cache for query: {sql_query[:20]}"
|
|
65
68
|
)
|
|
@@ -92,7 +95,7 @@ class DescribeQueryCache:
|
|
|
92
95
|
telemetry.report_describe_query_cache_hit()
|
|
93
96
|
return result
|
|
94
97
|
else:
|
|
95
|
-
telemetry.report_describe_query_cache_expired()
|
|
98
|
+
telemetry.report_describe_query_cache_expired(expired_by)
|
|
96
99
|
del self._cache[key]
|
|
97
100
|
return None
|
|
98
101
|
|
|
@@ -120,6 +120,7 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
120
120
|
"PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false", # this is required for creating udfs from sproc
|
|
121
121
|
"ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
|
|
122
122
|
"QUERY_TAG": f"'{query_tag}'",
|
|
123
|
+
"FEATURE_INTERVAL_TYPES": "enabled",
|
|
123
124
|
}
|
|
124
125
|
|
|
125
126
|
session.sql(
|
|
@@ -15,6 +15,7 @@ from enum import Enum, unique
|
|
|
15
15
|
from typing import Dict
|
|
16
16
|
|
|
17
17
|
import google.protobuf.message
|
|
18
|
+
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
18
19
|
|
|
19
20
|
from snowflake.connector.cursor import SnowflakeCursor
|
|
20
21
|
from snowflake.connector.telemetry import (
|
|
@@ -290,10 +291,7 @@ class Telemetry:
|
|
|
290
291
|
|
|
291
292
|
self._request_summary.set(summary)
|
|
292
293
|
|
|
293
|
-
|
|
294
|
-
summary["query_plan"] = _protobuf_to_json_with_redaction(
|
|
295
|
-
request.plan, REDACTED_PLAN_SUFFIXES
|
|
296
|
-
)
|
|
294
|
+
_set_query_plan(request, summary)
|
|
297
295
|
|
|
298
296
|
def _not_in_request(self):
|
|
299
297
|
# we don't want to add things to the summary if it's not initialized
|
|
@@ -454,7 +452,7 @@ class Telemetry:
|
|
|
454
452
|
summary["describe_cache_hits"] += 1
|
|
455
453
|
|
|
456
454
|
@safe
|
|
457
|
-
def report_describe_query_cache_expired(self):
|
|
455
|
+
def report_describe_query_cache_expired(self, expired_by: float):
|
|
458
456
|
"""Report a describe query cache hit."""
|
|
459
457
|
if self._not_in_request():
|
|
460
458
|
return
|
|
@@ -466,6 +464,11 @@ class Telemetry:
|
|
|
466
464
|
|
|
467
465
|
summary["describe_cache_expired"] += 1
|
|
468
466
|
|
|
467
|
+
if "describe_cache_expired_by" not in summary:
|
|
468
|
+
summary["describe_cache_expired_by"] = []
|
|
469
|
+
|
|
470
|
+
summary["describe_cache_expired_by"].append(expired_by)
|
|
471
|
+
|
|
469
472
|
@safe
|
|
470
473
|
def report_describe_query_cache_clear(self, query_prefix: str):
|
|
471
474
|
"""Report a describe query cache clear."""
|
|
@@ -697,6 +700,28 @@ def _protobuf_to_json_with_redaction(
|
|
|
697
700
|
)
|
|
698
701
|
|
|
699
702
|
|
|
703
|
+
def _set_query_plan(request: google.protobuf.message.Message, summary: dict) -> None:
|
|
704
|
+
if isinstance(request, proto_base.ExecutePlanRequest):
|
|
705
|
+
# ExecutePlanRequest has plan at top level
|
|
706
|
+
if hasattr(request, "plan"):
|
|
707
|
+
summary["query_plan"] = (
|
|
708
|
+
_protobuf_to_json_with_redaction(request.plan, REDACTED_PLAN_SUFFIXES),
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
elif isinstance(request, proto_base.AnalyzePlanRequest):
|
|
712
|
+
# AnalyzePlanRequest has plan under oneof analyze
|
|
713
|
+
analyze_type = request.WhichOneof("analyze")
|
|
714
|
+
if not analyze_type:
|
|
715
|
+
return
|
|
716
|
+
|
|
717
|
+
summary["analyze_type"] = analyze_type
|
|
718
|
+
analyze_field = getattr(request, analyze_type)
|
|
719
|
+
if hasattr(analyze_field, "plan"):
|
|
720
|
+
summary["query_plan"] = _protobuf_to_json_with_redaction(
|
|
721
|
+
analyze_field.plan, REDACTED_PLAN_SUFFIXES
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
|
|
700
725
|
# global telemetry client
|
|
701
726
|
telemetry = Telemetry(is_enabled="SNOWPARK_CONNECT_DISABLE_TELEMETRY" not in os.environ)
|
|
702
727
|
|