snowpark-connect 0.31.0__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (38) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +73 -100
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/dataframe_container.py +3 -2
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +4 -2
  6. snowflake/snowpark_connect/expression/map_expression.py +5 -4
  7. snowflake/snowpark_connect/expression/map_extension.py +12 -6
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +38 -3
  9. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +5 -5
  10. snowflake/snowpark_connect/expression/map_unresolved_function.py +869 -107
  11. snowflake/snowpark_connect/expression/map_unresolved_star.py +9 -7
  12. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +4 -1
  13. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  14. snowflake/snowpark_connect/relation/map_column_ops.py +4 -3
  15. snowflake/snowpark_connect/relation/map_extension.py +10 -9
  16. snowflake/snowpark_connect/relation/map_join.py +5 -2
  17. snowflake/snowpark_connect/relation/map_sql.py +33 -1
  18. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  19. snowflake/snowpark_connect/relation/read/map_read_table.py +6 -3
  20. snowflake/snowpark_connect/relation/write/map_write.py +29 -14
  21. snowflake/snowpark_connect/server.py +1 -2
  22. snowflake/snowpark_connect/type_mapping.py +36 -3
  23. snowflake/snowpark_connect/typed_column.py +8 -6
  24. snowflake/snowpark_connect/utils/session.py +19 -3
  25. snowflake/snowpark_connect/version.py +1 -1
  26. snowflake/snowpark_decoder/dp_session.py +1 -1
  27. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +5 -2
  28. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +36 -37
  29. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  30. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  31. {snowpark_connect-0.31.0.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  32. {snowpark_connect-0.31.0.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  33. {snowpark_connect-0.31.0.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  34. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  35. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  36. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  37. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  38. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
11
  )
12
12
  from snowflake.snowpark.types import StructType
13
13
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
14
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
14
15
  from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
16
  from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
17
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
@@ -28,7 +29,7 @@ def check_struct_and_get_field_datatype(field_name, schema):
28
29
  else:
29
30
  return None
30
31
  else:
31
- None
32
+ return None
32
33
 
33
34
 
34
35
  def map_unresolved_star(
@@ -55,16 +56,17 @@ def map_unresolved_star(
55
56
  return spark_names, typed_column
56
57
 
57
58
  # scenario where it is expanding * to mulitple columns
58
- spark_names = []
59
- snowpark_names = []
60
- qualifiers = []
59
+ spark_names: list[str] = []
60
+ snowpark_names: list[str] = []
61
+ qualifiers: list[set[ColumnQualifier]] = []
61
62
 
63
+ target_qualifier = ColumnQualifier(tuple(name_parts[:-1]))
62
64
  (
63
65
  spark_names,
64
66
  snowpark_names,
65
67
  qualifiers,
66
68
  ) = column_mapping.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
67
- name_parts[:-1]
69
+ target_qualifier
68
70
  )
69
71
 
70
72
  if len(spark_names) == 0:
@@ -75,7 +77,7 @@ def map_unresolved_star(
75
77
  snowpark_names,
76
78
  qualifiers,
77
79
  ) = column_mapping_for_outer_df.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
78
- name_parts[:-1]
80
+ target_qualifier
79
81
  )
80
82
  if len(spark_names) > 0:
81
83
  break
@@ -141,7 +143,7 @@ def map_unresolved_star(
141
143
  final_sql_expr,
142
144
  lambda final_sql_expr=final_sql_expr: typer.type(final_sql_expr),
143
145
  )
144
- typed_column.set_multi_col_qualifiers([[] for _ in spark_names])
146
+ typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
145
147
  return spark_names, typed_column
146
148
  else:
147
149
  result_exp = snowpark_fn.sql_expr(
@@ -19,6 +19,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
19
19
  )
20
20
  from snowflake.snowpark.functions import lit
21
21
  from snowflake.snowpark.types import BooleanType, StringType
22
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
22
23
  from snowflake.snowpark_connect.config import (
23
24
  auto_uppercase_non_column_identifiers,
24
25
  global_config,
@@ -743,7 +744,9 @@ class SnowflakeCatalog(AbstractSparkCatalog):
743
744
  sp_schema = proto_to_snowpark_type(schema)
744
745
  columns = [c.name for c in schema.struct.fields]
745
746
  table_name_parts = split_fully_qualified_spark_name(tableName)
746
- qualifiers = [table_name_parts for _ in columns]
747
+ qualifiers: list[set[ColumnQualifier]] = [
748
+ {ColumnQualifier(tuple(table_name_parts))} for _ in columns
749
+ ]
747
750
  column_types = [f.datatype for f in sp_schema.fields]
748
751
  return DataFrameContainer.create_with_column_mapping(
749
752
  dataframe=session.createDataFrame([], sp_schema),
@@ -16,6 +16,7 @@ from snowflake.snowpark.types import DataType
16
16
  from snowflake.snowpark_connect.column_name_handler import (
17
17
  make_column_names_snowpark_compatible,
18
18
  )
19
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
19
20
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
20
21
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
21
22
  from snowflake.snowpark_connect.expression.map_expression import (
@@ -200,7 +201,9 @@ def map_pivot_aggregate(
200
201
  dataframe=result.select(*column_selectors),
201
202
  spark_column_names=reordered_spark_names,
202
203
  snowpark_column_names=reordered_snowpark_names,
203
- column_qualifiers=[[]] * len(reordered_spark_names),
204
+ column_qualifiers=[
205
+ {ColumnQualifier.no_qualifier()} for _ in reordered_spark_names
206
+ ],
204
207
  parent_column_name_map=input_container.column_map,
205
208
  snowpark_column_types=reordered_types,
206
209
  )
@@ -349,7 +352,7 @@ class _ColumnMetadata:
349
352
  spark_name: str
350
353
  snowpark_name: str
351
354
  data_type: DataType
352
- qualifiers: list[str]
355
+ qualifiers: set[ColumnQualifier]
353
356
 
354
357
 
355
358
  @dataclass(frozen=True)
@@ -385,7 +388,7 @@ class _Columns:
385
388
  col.spark_name for col in self.grouping_columns + self.aggregation_columns
386
389
  ]
387
390
 
388
- def get_qualifiers(self) -> list[list[str]]:
391
+ def get_qualifiers(self) -> list[set[ColumnQualifier]]:
389
392
  return [
390
393
  col.qualifiers for col in self.grouping_columns + self.aggregation_columns
391
394
  ]
@@ -429,7 +432,7 @@ def map_aggregate_helper(
429
432
  new_name,
430
433
  None if skip_alias else alias,
431
434
  None if pivot else snowpark_column.typ,
432
- snowpark_column.get_qualifiers(),
435
+ qualifiers=snowpark_column.get_qualifiers(),
433
436
  )
434
437
  )
435
438
 
@@ -469,7 +472,7 @@ def map_aggregate_helper(
469
472
  new_name,
470
473
  None if skip_alias else alias,
471
474
  agg_col_typ,
472
- [],
475
+ qualifiers={ColumnQualifier.no_qualifier()},
473
476
  )
474
477
  )
475
478
 
@@ -29,6 +29,7 @@ from snowflake.snowpark.column import Column
29
29
  from snowflake.snowpark.table_function import _ExplodeFunctionCall
30
30
  from snowflake.snowpark.types import DataType, StructField, StructType, _NumericType
31
31
  from snowflake.snowpark_connect.column_name_handler import (
32
+ ColumnQualifier,
32
33
  make_column_names_snowpark_compatible,
33
34
  )
34
35
  from snowflake.snowpark_connect.config import global_config
@@ -1014,7 +1015,7 @@ def map_unpivot(
1014
1015
  column_project = []
1015
1016
  column_reverse_project = []
1016
1017
  snowpark_columns = []
1017
- qualifiers = []
1018
+ qualifiers: list[set[ColumnQualifier]] = []
1018
1019
  for c in input_container.column_map.get_snowpark_columns():
1019
1020
  c_name = snowpark_functions_col(c, input_container.column_map).get_name()
1020
1021
  if c_name in unpivot_col_names:
@@ -1042,7 +1043,7 @@ def map_unpivot(
1042
1043
  )
1043
1044
  snowpark_columns.append(c)
1044
1045
  qualifiers.append(
1045
- input_container.column_map.get_qualifier_for_spark_column(c)
1046
+ input_container.column_map.get_qualifiers_for_spark_column(c)
1046
1047
  )
1047
1048
 
1048
1049
  # Without the case when postprocessing, the result Spark dataframe is:
@@ -1087,7 +1088,7 @@ def map_unpivot(
1087
1088
  snowpark_functions_col(snowpark_value_column_name, input_container.column_map)
1088
1089
  )
1089
1090
  snowpark_columns.append(snowpark_value_column_name)
1090
- qualifiers.extend([[]] * 2)
1091
+ qualifiers.extend([set() for _ in range(2)])
1091
1092
 
1092
1093
  result = (
1093
1094
  input_df.select(*column_project)
@@ -15,6 +15,7 @@ from snowflake.snowpark_connect.column_name_handler import (
15
15
  ColumnNameMap,
16
16
  make_column_names_snowpark_compatible,
17
17
  )
18
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
18
19
  from snowflake.snowpark_connect.config import get_boolean_session_config_param
19
20
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
20
21
  from snowflake.snowpark_connect.error.error_codes import ErrorCodes
@@ -178,7 +179,7 @@ def get_udtf_project(relation: relation_proto.Relation) -> bool:
178
179
 
179
180
  def handle_udtf_with_table_arguments(
180
181
  udtf_info: snowflake_proto.UDTFWithTableArguments,
181
- ) -> snowpark.DataFrame:
182
+ ) -> DataFrameContainer:
182
183
  """
183
184
  Handle UDTF with one or more table arguments using Snowpark's join_table_function.
184
185
  For multiple table arguments, this creates a Cartesian product of all input tables.
@@ -286,7 +287,7 @@ def handle_lateral_join_with_udtf(
286
287
  left_result: DataFrameContainer,
287
288
  udtf_relation: relation_proto.Relation,
288
289
  udtf_info: tuple[snowpark.udtf.UserDefinedTableFunction, list],
289
- ) -> snowpark.DataFrame:
290
+ ) -> DataFrameContainer:
290
291
  """
291
292
  Handle lateral join with UDTF on the right side using join_table_function.
292
293
  """
@@ -319,7 +320,7 @@ def handle_lateral_join_with_udtf(
319
320
 
320
321
  def map_aggregate(
321
322
  aggregate: snowflake_proto.Aggregate, plan_id: int
322
- ) -> snowpark.DataFrame:
323
+ ) -> DataFrameContainer:
323
324
  input_container = map_relation(aggregate.input)
324
325
  input_df: snowpark.DataFrame = input_container.dataframe
325
326
 
@@ -363,7 +364,7 @@ def map_aggregate(
363
364
  return new_names[0], snowpark_column
364
365
 
365
366
  raw_groupings: list[tuple[str, TypedColumn]] = []
366
- raw_aggregations: list[tuple[str, TypedColumn, list[str]]] = []
367
+ raw_aggregations: list[tuple[str, TypedColumn, set[ColumnQualifier]]] = []
367
368
 
368
369
  if not is_group_by_all:
369
370
  raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
@@ -401,11 +402,11 @@ def map_aggregate(
401
402
  col = _map_column(exp)
402
403
  if exp.WhichOneof("expr_type") == "unresolved_attribute":
403
404
  spark_name = col[0]
404
- qualifiers = input_container.column_map.get_qualifier_for_spark_column(
405
- spark_name
406
- )
405
+ qualifiers: set[
406
+ ColumnQualifier
407
+ ] = input_container.column_map.get_qualifiers_for_spark_column(spark_name)
407
408
  else:
408
- qualifiers = []
409
+ qualifiers = set()
409
410
 
410
411
  raw_aggregations.append((col[0], col[1], qualifiers))
411
412
 
@@ -438,7 +439,7 @@ def map_aggregate(
438
439
  spark_columns: list[str] = []
439
440
  snowpark_columns: list[str] = []
440
441
  snowpark_column_types: list[snowpark_types.DataType] = []
441
- all_qualifiers: list[list[str]] = []
442
+ all_qualifiers: list[set[ColumnQualifier]] = []
442
443
 
443
444
  # Use grouping columns directly without aliases
444
445
  groupings = [col.col for _, col in raw_groupings]
@@ -10,6 +10,7 @@ from pyspark.errors import AnalysisException
10
10
  import snowflake.snowpark.functions as snowpark_fn
11
11
  from snowflake import snowpark
12
12
  from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
13
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
13
14
  from snowflake.snowpark_connect.config import global_config
14
15
  from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
15
16
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
@@ -267,8 +268,10 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
267
268
  ] # this is to make sure we only remove the column once
268
269
  ]
269
270
 
270
- qualifiers = list(left_container.column_map.get_qualifiers()) + [
271
- right_container.column_map.get_qualifier_for_spark_column(spark_col)
271
+ qualifiers: list[set[ColumnQualifier]] = list(
272
+ left_container.column_map.get_qualifiers()
273
+ ) + [
274
+ {right_container.column_map.get_qualifier_for_spark_column(spark_col)}
272
275
  for i, spark_col in enumerate(
273
276
  right_container.column_map.get_spark_columns()
274
277
  )
@@ -1343,6 +1343,33 @@ def map_sql_to_pandas_df(
1343
1343
  )
1344
1344
  SNOWFLAKE_CATALOG.refreshTable(table_name_unquoted)
1345
1345
 
1346
+ return pandas.DataFrame({"": [""]}), ""
1347
+ case "RepairTable":
1348
+ # No-Op. Snowflake doesn't have explicit partitions to repair.
1349
+ table_relation = logical_plan.child()
1350
+ db_and_table_name = as_java_list(table_relation.multipartIdentifier())
1351
+ multi_part_len = len(db_and_table_name)
1352
+
1353
+ if multi_part_len == 1:
1354
+ table_name = db_and_table_name[0]
1355
+ db_name = None
1356
+ full_table_name = table_name
1357
+ else:
1358
+ db_name = db_and_table_name[0]
1359
+ table_name = db_and_table_name[1]
1360
+ full_table_name = db_name + "." + table_name
1361
+
1362
+ df = SNOWFLAKE_CATALOG.tableExists(table_name, db_name)
1363
+
1364
+ table_exist = df.iloc[0, 0]
1365
+
1366
+ if not table_exist:
1367
+ exception = AnalysisException(
1368
+ f"[TABLE_OR_VIEW_NOT_FOUND] Table not found `{full_table_name}`."
1369
+ )
1370
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
1371
+ raise exception
1372
+
1346
1373
  return pandas.DataFrame({"": [""]}), ""
1347
1374
  case _:
1348
1375
  execute_logical_plan(logical_plan)
@@ -1483,7 +1510,12 @@ def map_sql(
1483
1510
  snowpark_connect_sql_passthrough, sql_stmt = is_valid_passthrough_sql(rel.sql.query)
1484
1511
 
1485
1512
  if not snowpark_connect_sql_passthrough:
1486
- logical_plan = sql_parser().parseQuery(sql_stmt)
1513
+ # Changed from parseQuery to parsePlan as Spark parseQuery() call generating wrong logical plan for
1514
+ # query like this: SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
1515
+ # As such other place in this file we use parsePlan.
1516
+ # Main difference between parsePlan() and parseQuery() is, parsePlan() can be called for any SQL statement, while
1517
+ # parseQuery() can only be called for query statements.
1518
+ logical_plan = sql_parser().parsePlan(sql_stmt)
1487
1519
 
1488
1520
  parsed_pos_args = parse_pos_args(logical_plan, rel.sql.pos_args)
1489
1521
  set_sql_args(rel.sql.args, parsed_pos_args)
@@ -4,6 +4,7 @@
4
4
 
5
5
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
6
6
 
7
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
7
8
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
8
9
  from snowflake.snowpark_connect.relation.map_relation import map_relation
9
10
 
@@ -18,7 +19,9 @@ def map_alias(
18
19
  # we set reuse_parsed_plan=False because we need new expr_id for the attributes (output columns) in aliased snowpark dataframe
19
20
  # reuse_parsed_plan will lead to ambiguous column name for operations like joining two dataframes that are aliased from the same dataframe
20
21
  input_container = map_relation(rel.subquery_alias.input, reuse_parsed_plan=False)
21
- qualifiers = [[alias]] * len(input_container.column_map.columns)
22
+ qualifiers = [
23
+ {ColumnQualifier((alias,))} for _ in input_container.column_map.columns
24
+ ]
22
25
 
23
26
  return DataFrameContainer.create_with_column_mapping(
24
27
  dataframe=input_container.dataframe,
@@ -16,6 +16,7 @@ from snowflake.snowpark_connect.column_name_handler import (
16
16
  ColumnNameMap,
17
17
  make_column_names_snowpark_compatible,
18
18
  )
19
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
19
20
  from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
20
21
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
21
22
  from snowflake.snowpark_connect.error.error_codes import ErrorCodes
@@ -58,7 +59,7 @@ def post_process_df(
58
59
  spark_column_names=true_names,
59
60
  snowpark_column_names=snowpark_column_names,
60
61
  snowpark_column_types=[f.datatype for f in df.schema.fields],
61
- column_qualifiers=[name_parts] * len(true_names)
62
+ column_qualifiers=[{ColumnQualifier(tuple(name_parts))} for _ in true_names]
62
63
  if source_table_name
63
64
  else None,
64
65
  )
@@ -94,8 +95,10 @@ def _get_temporary_view(
94
95
  spark_column_names=temp_view.column_map.get_spark_columns(),
95
96
  snowpark_column_names=snowpark_column_names,
96
97
  column_metadata=temp_view.column_map.column_metadata,
97
- column_qualifiers=[split_fully_qualified_spark_name(table_name)]
98
- * len(temp_view.column_map.get_spark_columns()),
98
+ column_qualifiers=[
99
+ {ColumnQualifier(tuple(split_fully_qualified_spark_name(table_name)))}
100
+ for _ in range(len(temp_view.column_map.get_spark_columns()))
101
+ ],
99
102
  parent_column_name_map=temp_view.column_map.get_parent_column_name_map(),
100
103
  )
101
104
 
@@ -16,7 +16,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
16
16
  unquote_if_quoted,
17
17
  )
18
18
  from snowflake.snowpark.exceptions import SnowparkSQLException
19
- from snowflake.snowpark.functions import col, lit, object_construct, sql_expr
19
+ from snowflake.snowpark.functions import col, lit, object_construct, sql_expr, when
20
20
  from snowflake.snowpark.types import (
21
21
  ArrayType,
22
22
  DataType,
@@ -1083,20 +1083,35 @@ def rewrite_df(input_df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
1083
1083
  json: construct the dataframe to 1 column in json format
1084
1084
  1. Append columns which represents the column name
1085
1085
  2. Use object_construct to aggregate the dataframe into 1 column
1086
-
1086
+ csv:
1087
+ Use "" to replace empty string
1087
1088
  """
1088
- if source != "json":
1089
- return input_df
1090
- rand_salt = random_string(10, "_")
1091
- rewritten_df = input_df.with_columns(
1092
- [co + rand_salt for co in input_df.columns],
1093
- [lit(unquote_if_quoted(co)) for co in input_df.columns],
1094
- )
1095
- construct_key_values = []
1096
- for co in input_df.columns:
1097
- construct_key_values.append(col(co + rand_salt))
1098
- construct_key_values.append(col(co))
1099
- return rewritten_df.select(object_construct(*construct_key_values))
1089
+ match source:
1090
+ case "json":
1091
+ rand_salt = random_string(10, "_")
1092
+ rewritten_df = input_df.with_columns(
1093
+ [co + rand_salt for co in input_df.columns],
1094
+ [lit(unquote_if_quoted(co)) for co in input_df.columns],
1095
+ )
1096
+ construct_key_values = []
1097
+ for co in input_df.columns:
1098
+ construct_key_values.append(col(co + rand_salt))
1099
+ construct_key_values.append(col(co))
1100
+ return rewritten_df.select(object_construct(*construct_key_values))
1101
+ case "csv":
1102
+ new_cols = []
1103
+ for co in input_df.columns:
1104
+ if isinstance(input_df.schema[co].datatype, StringType):
1105
+ new_col = col(co)
1106
+ new_col = when(
1107
+ new_col.isNotNull() & (new_col == ""), lit('""')
1108
+ ).otherwise(new_col)
1109
+ new_cols.append(new_col.alias(co))
1110
+ else:
1111
+ new_cols.append(col(co))
1112
+ return input_df.select(new_cols)
1113
+ case _:
1114
+ return input_df
1100
1115
 
1101
1116
 
1102
1117
  def handle_column_names(
@@ -158,9 +158,8 @@ def _handle_exception(context, e: Exception):
158
158
  logger.error("Error: %s - %s", type(e).__name__, str(e))
159
159
 
160
160
  telemetry.report_request_failure(e)
161
-
162
161
  if tcm.TCM_MODE:
163
- # TODO: SNOW-2009834 gracefully return error back in TCM
162
+ # spark decoder will catch the error and return it to GS gracefully
164
163
  attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
165
164
  raise e
166
165
 
@@ -582,11 +582,14 @@ def map_snowpark_types_to_pyarrow_types(
582
582
  attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
583
583
  raise exception
584
584
  case snowpark.types.TimestampType:
585
- unit = pa_type.unit
586
- tz = pa_type.tz
585
+ # Check if pa_type has unit attribute (it should be a timestamp type)
586
+ unit = pa_type.unit if hasattr(pa_type, "unit") else "us"
587
+ tz = pa_type.tz if hasattr(pa_type, "tz") else None
588
+
589
+ # Spark truncates nanosecond precision to microseconds
587
590
  if unit == "ns":
588
- # Spark truncates nanosecond precision to microseconds
589
591
  unit = "us"
592
+
590
593
  return pa.timestamp(unit, tz=tz)
591
594
  case snowpark.types.VariantType:
592
595
  return pa.string()
@@ -670,6 +673,9 @@ def map_pyarrow_to_snowpark_types(pa_type: pa.DataType) -> snowpark.types.DataTy
670
673
  return snowpark.types.TimestampType()
671
674
  elif pa.types.is_null(pa_type):
672
675
  return snowpark.types.NullType()
676
+ elif pa.types.is_duration(pa_type):
677
+ # Map PyArrow duration[us] to DayTimeIntervalType
678
+ return snowpark.types.DayTimeIntervalType()
673
679
  else:
674
680
  exception = SnowparkConnectNotImplementedError(
675
681
  f"Unsupported PyArrow data type: {pa_type}"
@@ -892,6 +898,33 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
892
898
  return snowpark.types.YearMonthIntervalType()
893
899
  case type_name if _INTERVAL_DAYTIME_PATTERN_RE.match(type_name):
894
900
  return snowpark.types.DayTimeIntervalType()
901
+ # Year-Month interval cases
902
+ case "interval year":
903
+ return snowpark.types.YearMonthIntervalType(0)
904
+ case "interval month":
905
+ return snowpark.types.YearMonthIntervalType(1)
906
+ case "interval year to month":
907
+ return snowpark.types.YearMonthIntervalType(0, 1)
908
+ case "interval day":
909
+ return snowpark.types.DayTimeIntervalType(0)
910
+ case "interval hour":
911
+ return snowpark.types.DayTimeIntervalType(1)
912
+ case "interval minute":
913
+ return snowpark.types.DayTimeIntervalType(2)
914
+ case "interval second":
915
+ return snowpark.types.DayTimeIntervalType(3)
916
+ case "interval day to hour":
917
+ return snowpark.types.DayTimeIntervalType(0, 1)
918
+ case "interval day to minute":
919
+ return snowpark.types.DayTimeIntervalType(0, 2)
920
+ case "interval day to second":
921
+ return snowpark.types.DayTimeIntervalType(0, 3)
922
+ case "interval hour to minute":
923
+ return snowpark.types.DayTimeIntervalType(1, 2)
924
+ case "interval hour to second":
925
+ return snowpark.types.DayTimeIntervalType(1, 3)
926
+ case "interval minute to second":
927
+ return snowpark.types.DayTimeIntervalType(2, 3)
895
928
  case _:
896
929
  if simple_type.startswith("decimal"):
897
930
  precision = int(simple_type.split("(")[1].split(",")[0])
@@ -8,6 +8,7 @@ from functools import cached_property
8
8
  import snowflake.snowpark.functions as snowpark_fn
9
9
  from snowflake import snowpark
10
10
  from snowflake.snowpark.column import Column
11
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
11
12
 
12
13
  _EMPTY_COLUMN = Column("")
13
14
 
@@ -44,11 +45,11 @@ class TypedColumn:
44
45
  def alias(self, alias_name: str):
45
46
  return TypedColumn(self.col.alias(alias_name), self._type_resolver)
46
47
 
47
- def set_qualifiers(self, qualifiers: list[str]):
48
+ def set_qualifiers(self, qualifiers: set[ColumnQualifier]) -> None:
48
49
  self.qualifiers = qualifiers
49
50
 
50
- def get_qualifiers(self) -> list[str]:
51
- return getattr(self, "qualifiers", [])
51
+ def get_qualifiers(self) -> set[ColumnQualifier]:
52
+ return getattr(self, "qualifiers", {ColumnQualifier.no_qualifier()})
52
53
 
53
54
  def set_catalog_database_info(self, catalog_database_info: dict[str, str]) -> None:
54
55
  self._catalog_database_info = catalog_database_info
@@ -63,12 +64,13 @@ class TypedColumn:
63
64
  def get_database(self) -> str | None:
64
65
  return self._catalog_database_info.get("database")
65
66
 
66
- def set_multi_col_qualifiers(self, qualifiers: list[list[str]]):
67
+ def set_multi_col_qualifiers(self, qualifiers: list[set[ColumnQualifier]]) -> None:
67
68
  self.multi_col_qualifiers = qualifiers
68
69
 
69
- def get_multi_col_qualifiers(self, num_columns) -> list[list[str]]:
70
+ def get_multi_col_qualifiers(self, num_columns) -> list[set[ColumnQualifier]]:
70
71
  if not hasattr(self, "multi_col_qualifiers"):
71
- return [[] for i in range(num_columns)]
72
+
73
+ return [{ColumnQualifier.no_qualifier()} for i in range(num_columns)]
72
74
  assert (
73
75
  len(self.multi_col_qualifiers) == num_columns
74
76
  ), f"Expected {num_columns} multi-column qualifiers, got {len(self.multi_col_qualifiers)}"
@@ -23,6 +23,13 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
23
23
  from snowflake.snowpark_connect.utils.telemetry import telemetry
24
24
  from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
25
25
 
26
+ SKIP_SESSION_CONFIGURATION = False
27
+
28
+
29
+ def skip_session_configuration(skip: bool):
30
+ global SKIP_SESSION_CONFIGURATION
31
+ SKIP_SESSION_CONFIGURATION = skip
32
+
26
33
 
27
34
  # Suppress experimental warnings from snowflake.snowpark logger
28
35
  def _filter_experimental_warnings(record):
@@ -57,6 +64,8 @@ def configure_snowpark_session(session: snowpark.Session):
57
64
  global_config,
58
65
  )
59
66
 
67
+ global SKIP_SESSION_CONFIGURATION
68
+
60
69
  logger.info(f"Configuring session {session}")
61
70
 
62
71
  telemetry.initialize(session)
@@ -124,9 +133,16 @@ def configure_snowpark_session(session: snowpark.Session):
124
133
  "QUERY_TAG": f"'{query_tag}'",
125
134
  }
126
135
 
127
- session.sql(
128
- f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
129
- ).collect()
136
+ # SNOW-2245971: Stored procedures inside Native Apps run as Execute As Owner and hence cannot set session params.
137
+ if not SKIP_SESSION_CONFIGURATION:
138
+ session.sql(
139
+ f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
140
+ ).collect()
141
+ else:
142
+ session_param_names = ", ".join(session_params.keys())
143
+ logger.info(
144
+ f"Skipping Snowpark Connect session configuration as requested. Please make sure following session parameters are set correctly: {session_param_names}"
145
+ )
130
146
 
131
147
  # Instrument the snowpark session to use a cache for describe queries.
132
148
  instrument_session_for_describe_cache(session)
@@ -2,4 +2,4 @@
2
2
  #
3
3
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
4
4
  #
5
- VERSION = (0,31,0)
5
+ VERSION = (0,32,0)
@@ -107,5 +107,5 @@ class DataframeProcessorSession:
107
107
  )
108
108
  return proto2str(dp_res_proto)
109
109
  except Exception:
110
- # TODO: SNOW-1857056 error handling
110
+ # raise the error to GS
111
111
  raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-connect
3
- Version: 0.31.0
3
+ Version: 0.32.0
4
4
  Summary: Snowpark Connect for Spark
5
5
  Author: Snowflake, Inc
6
6
  License: Apache License, Version 2.0
@@ -13,7 +13,7 @@ Requires-Dist: certifi>=2025.1.31
13
13
  Requires-Dist: cloudpickle
14
14
  Requires-Dist: fsspec[http]
15
15
  Requires-Dist: jpype1
16
- Requires-Dist: protobuf<5.0,>=4.25.3
16
+ Requires-Dist: protobuf<6.32.0,>=4.25.3
17
17
  Requires-Dist: s3fs>=2025.3.0
18
18
  Requires-Dist: snowflake.core<2,>=1.0.5
19
19
  Requires-Dist: snowflake-snowpark-python[pandas]<1.41.0,==1.40.0
@@ -29,11 +29,14 @@ Requires-Dist: grpcio-status<1.63,>=1.56.0
29
29
  Requires-Dist: googleapis-common-protos>=1.56.4
30
30
  Requires-Dist: numpy<2,>=1.15
31
31
  Requires-Dist: gcsfs>=2025.9.0
32
+ Provides-Extra: jdk
33
+ Requires-Dist: jdk4py==17.0.9.2; extra == "jdk"
32
34
  Dynamic: author
33
35
  Dynamic: description
34
36
  Dynamic: description-content-type
35
37
  Dynamic: license
36
38
  Dynamic: license-file
39
+ Dynamic: provides-extra
37
40
  Dynamic: requires-dist
38
41
  Dynamic: requires-python
39
42
  Dynamic: summary