snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (84) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +47 -17
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/error/error_utils.py +25 -0
  6. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  7. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  8. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  13. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  14. snowflake/snowpark_connect/expression/typer.py +6 -6
  15. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  16. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  17. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  18. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  19. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  20. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  21. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  22. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  26. snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
  27. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  28. snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
  29. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  30. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  31. snowflake/snowpark_connect/relation/map_join.py +72 -63
  32. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  33. snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
  34. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  35. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  36. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  37. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  38. snowflake/snowpark_connect/relation/map_sql.py +141 -237
  39. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  40. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  41. snowflake/snowpark_connect/relation/map_udtf.py +10 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  50. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  51. snowflake/snowpark_connect/relation/utils.py +11 -5
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  53. snowflake/snowpark_connect/relation/write/map_write.py +259 -56
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  55. snowflake/snowpark_connect/server.py +43 -4
  56. snowflake/snowpark_connect/type_mapping.py +6 -23
  57. snowflake/snowpark_connect/utils/cache.py +27 -22
  58. snowflake/snowpark_connect/utils/context.py +33 -17
  59. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  60. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  61. snowflake/snowpark_connect/utils/session.py +41 -38
  62. snowflake/snowpark_connect/utils/telemetry.py +214 -63
  63. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  64. snowflake/snowpark_connect/version.py +1 -1
  65. snowflake/snowpark_decoder/__init__.py +0 -0
  66. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  67. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  68. snowflake/snowpark_decoder/dp_session.py +111 -0
  69. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  70. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
  71. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
  72. snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
  73. snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
  74. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  75. spark/__init__.py +0 -0
  76. spark/connect/__init__.py +0 -0
  77. spark/connect/envelope_pb2.py +31 -0
  78. spark/connect/envelope_pb2.pyi +46 -0
  79. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  80. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,21 +1,13 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
5
4
  from collections import Counter
6
5
 
7
6
  import pyspark.sql.connect.proto.base_pb2 as proto_base
8
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
9
8
 
10
- from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
- quote_name_without_upper_casing,
12
- )
13
9
  from snowflake.snowpark_connect.column_name_handler import ColumnNames
14
- from snowflake.snowpark_connect.config import (
15
- auto_uppercase_ddl,
16
- global_config,
17
- sessions_config,
18
- )
10
+ from snowflake.snowpark_connect.config import global_config, sessions_config
19
11
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
20
12
  from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
21
13
  from snowflake.snowpark_connect.expression import map_udf
@@ -24,24 +16,23 @@ from snowflake.snowpark_connect.relation.map_relation import map_relation
24
16
  from snowflake.snowpark_connect.relation.map_sql import map_sql_to_pandas_df
25
17
  from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
26
18
  from snowflake.snowpark_connect.utils.context import get_session_id
19
+ from snowflake.snowpark_connect.utils.identifiers import (
20
+ spark_to_sf_single_id,
21
+ spark_to_sf_single_id_with_unquoting,
22
+ )
27
23
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
28
24
  from snowflake.snowpark_connect.utils.telemetry import (
29
25
  SnowparkConnectNotImplementedError,
30
26
  )
31
27
 
32
28
 
33
- def _spark_to_snowflake_single_id(name: str) -> str:
34
- name = quote_name_without_upper_casing(name)
35
- return name.upper() if auto_uppercase_ddl() else name
36
-
37
-
38
29
  def _create_column_rename_map(
39
30
  columns: list[ColumnNames], rename_duplicated: bool
40
31
  ) -> dict:
41
32
  if rename_duplicated is False:
42
33
  # if we are not renaming duplicated columns, we can just return the original names
43
34
  return {
44
- col.snowpark_name: _spark_to_snowflake_single_id(col.spark_name)
35
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
45
36
  for col in columns
46
37
  }
47
38
 
@@ -64,7 +55,7 @@ def _create_column_rename_map(
64
55
 
65
56
  if len(renamed_cols) == 0:
66
57
  return {
67
- col.snowpark_name: _spark_to_snowflake_single_id(col.spark_name)
58
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
68
59
  for col in not_renamed_cols
69
60
  }
70
61
 
@@ -95,12 +86,9 @@ def map_execution_command(
95
86
  match request.plan.command.WhichOneof("command_type"):
96
87
  case "create_dataframe_view":
97
88
  req = request.plan.command.create_dataframe_view
98
- input_df = map_relation(req.input)
99
- # Use real column names when writing to sf.
100
- assert hasattr(
101
- input_df, "_column_map"
102
- ), "input_df does not have the _column_map attribute"
103
- column_map = input_df._column_map
89
+ input_df_container = map_relation(req.input)
90
+ input_df = input_df_container.dataframe
91
+ column_map = input_df_container.column_map
104
92
 
105
93
  session_config = sessions_config[get_session_id()]
106
94
  duplicate_column_names_handling_mode = session_config[
@@ -133,7 +121,9 @@ def map_execution_command(
133
121
  view_name = [global_config.spark_sql_globalTempDatabase, req.name]
134
122
  else:
135
123
  view_name = [req.name]
136
- view_name = [_spark_to_snowflake_single_id(part) for part in view_name]
124
+ view_name = [
125
+ spark_to_sf_single_id_with_unquoting(part) for part in view_name
126
+ ]
137
127
 
138
128
  if req.replace:
139
129
  input_df.create_or_replace_temp_view(view_name)
@@ -20,6 +20,7 @@ from snowflake.snowpark._internal.utils import (
20
20
  create_or_update_statement_params_with_query_tag,
21
21
  )
22
22
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
23
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
23
24
  from snowflake.snowpark_connect.execute_plan.utils import (
24
25
  arrow_table_to_arrow_bytes,
25
26
  pandas_to_arrow_batches_bytes,
@@ -89,13 +90,16 @@ def to_arrow_batch_iter(result_df: snowpark.DataFrame) -> Iterator[Table]:
89
90
  def map_execution_root(
90
91
  request: proto_base.ExecutePlanRequest,
91
92
  ) -> Iterator[proto_base.ExecutePlanResponse | QueryResult]:
92
- result_df: snowpark.DataFrame | pandas.DataFrame = map_relation(request.plan.root)
93
+ result: DataFrameContainer | pandas.DataFrame = map_relation(request.plan.root)
94
+ if isinstance(result, pandas.DataFrame):
95
+ result_df = result
96
+ else:
97
+ result_df = result.dataframe
98
+
93
99
  if isinstance(result_df, snowpark.DataFrame):
94
100
  snowpark_schema = result_df.schema
95
- schema = snowpark_to_proto_type(
96
- snowpark_schema, result_df._column_map, result_df
97
- )
98
- spark_columns = result_df._column_map.get_spark_columns()
101
+ schema = snowpark_to_proto_type(snowpark_schema, result.column_map, result_df)
102
+ spark_columns = result.column_map.get_spark_columns()
99
103
  if tcm.TCM_MODE:
100
104
  # TCM result handling:
101
105
  # - small result (only one batch): just return the executePlanResponse
@@ -58,7 +58,8 @@ def map_extension(
58
58
  from snowflake.snowpark_connect.relation.map_relation import map_relation
59
59
 
60
60
  with push_evaluating_sql_scope():
61
- df = map_relation(extension.subquery_expression.input)
61
+ df_container = map_relation(extension.subquery_expression.input)
62
+ df = df_container.dataframe
62
63
 
63
64
  queries = df.queries["queries"]
64
65
  if len(queries) != 1:
@@ -13,10 +13,7 @@ from snowflake.snowpark_connect.config import global_config
13
13
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
14
14
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
15
15
  from snowflake.snowpark_connect.typed_column import TypedColumn
16
- from snowflake.snowpark_connect.utils.session import (
17
- get_or_create_snowpark_session,
18
- get_python_udxf_import_files,
19
- )
16
+ from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
20
17
  from snowflake.snowpark_connect.utils.udf_helper import (
21
18
  SnowparkUDF,
22
19
  gen_input_types,
@@ -28,6 +25,9 @@ from snowflake.snowpark_connect.utils.udf_helper import (
28
25
  from snowflake.snowpark_connect.utils.udf_utils import (
29
26
  ProcessCommonInlineUserDefinedFunction,
30
27
  )
28
+ from snowflake.snowpark_connect.utils.udxf_import_utils import (
29
+ get_python_udxf_import_files,
30
+ )
31
31
 
32
32
 
33
33
  def process_udf_return_type(
@@ -17,15 +17,15 @@ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
17
17
  from snowflake.snowpark_connect.config import global_config
18
18
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
19
19
  from snowflake.snowpark_connect.typed_column import TypedColumn
20
- from snowflake.snowpark_connect.utils.attribute_handling import (
21
- split_fully_qualified_spark_name,
22
- )
23
20
  from snowflake.snowpark_connect.utils.context import (
24
21
  get_is_evaluating_sql,
25
22
  get_outer_dataframes,
26
23
  get_plan_id_map,
27
24
  resolve_lca_alias,
28
25
  )
26
+ from snowflake.snowpark_connect.utils.identifiers import (
27
+ split_fully_qualified_spark_name,
28
+ )
29
29
 
30
30
  SPARK_QUOTED = re.compile("^(`.*`)$", re.DOTALL)
31
31
 
@@ -46,11 +46,12 @@ def map_unresolved_attribute(
46
46
 
47
47
  if has_plan_id:
48
48
  plan_id = exp.unresolved_attribute.plan_id
49
- target_df = get_plan_id_map(plan_id)
49
+ target_df_container = get_plan_id_map(plan_id)
50
+ target_df = target_df_container.dataframe
50
51
  assert (
51
52
  target_df is not None
52
53
  ), f"resolving an attribute of a unresolved dataframe {plan_id}"
53
- column_mapping = target_df._column_map
54
+ column_mapping = target_df_container.column_map
54
55
  typer = ExpressionTyper(target_df)
55
56
 
56
57
  def get_col(snowpark_name):
@@ -146,8 +147,8 @@ def map_unresolved_attribute(
146
147
  name_parts[0], allow_non_exists=True
147
148
  )
148
149
  if snowpark_name is None:
149
- for outer_df in get_outer_dataframes():
150
- snowpark_name = outer_df._column_map.get_snowpark_column_name_from_spark_column_name(
150
+ for outer_df_container in get_outer_dataframes():
151
+ snowpark_name = outer_df_container.column_map.get_snowpark_column_name_from_spark_column_name(
151
152
  name_parts[0], allow_non_exists=True
152
153
  )
153
154
  if snowpark_name is not None: