snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (84) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +47 -17
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/error/error_utils.py +25 -0
  6. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  7. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  8. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  13. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  14. snowflake/snowpark_connect/expression/typer.py +6 -6
  15. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  16. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  17. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  18. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  19. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  20. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  21. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  22. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  26. snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
  27. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  28. snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
  29. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  30. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  31. snowflake/snowpark_connect/relation/map_join.py +72 -63
  32. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  33. snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
  34. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  35. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  36. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  37. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  38. snowflake/snowpark_connect/relation/map_sql.py +141 -237
  39. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  40. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  41. snowflake/snowpark_connect/relation/map_udtf.py +10 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  50. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  51. snowflake/snowpark_connect/relation/utils.py +11 -5
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  53. snowflake/snowpark_connect/relation/write/map_write.py +259 -56
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  55. snowflake/snowpark_connect/server.py +43 -4
  56. snowflake/snowpark_connect/type_mapping.py +6 -23
  57. snowflake/snowpark_connect/utils/cache.py +27 -22
  58. snowflake/snowpark_connect/utils/context.py +33 -17
  59. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  60. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  61. snowflake/snowpark_connect/utils/session.py +41 -38
  62. snowflake/snowpark_connect/utils/telemetry.py +214 -63
  63. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  64. snowflake/snowpark_connect/version.py +1 -1
  65. snowflake/snowpark_decoder/__init__.py +0 -0
  66. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  67. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  68. snowflake/snowpark_decoder/dp_session.py +111 -0
  69. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  70. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
  71. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
  72. snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
  73. snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
  74. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  75. spark/__init__.py +0 -0
  76. spark/connect/__init__.py +0 -0
  77. spark/connect/envelope_pb2.py +31 -0
  78. spark/connect/envelope_pb2.pyi +46 -0
  79. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  80. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -11,12 +11,13 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
11
  unquote_if_quoted,
12
12
  )
13
13
  from snowflake.snowpark.exceptions import SnowparkSQLException
14
- from snowflake.snowpark_connect.column_name_handler import with_column_map
15
- from snowflake.snowpark_connect.config import auto_uppercase_dml
14
+ from snowflake.snowpark_connect.column_name_handler import ALREADY_QUOTED
15
+ from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
16
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
16
17
  from snowflake.snowpark_connect.relation.read.utils import (
17
18
  rename_columns_as_snowflake_standard,
18
19
  )
19
- from snowflake.snowpark_connect.utils.attribute_handling import (
20
+ from snowflake.snowpark_connect.utils.identifiers import (
20
21
  split_fully_qualified_spark_name,
21
22
  )
22
23
  from snowflake.snowpark_connect.utils.session import _get_current_snowpark_session
@@ -27,9 +28,16 @@ from snowflake.snowpark_connect.utils.telemetry import (
27
28
 
28
29
  def post_process_df(
29
30
  df: snowpark.DataFrame, plan_id: int, source_table_name: str = None
30
- ) -> snowpark.DataFrame:
31
+ ) -> DataFrameContainer:
32
+ def _lower_or_unquote(string):
33
+ return (
34
+ string[1:-1].replace('""', '"')
35
+ if ALREADY_QUOTED.match(string)
36
+ else string.lower()
37
+ )
38
+
31
39
  try:
32
- true_names = list(map(lambda x: unquote_if_quoted(x).lower(), df.columns))
40
+ true_names = list(map(lambda x: _lower_or_unquote(x), df.columns))
33
41
  renamed_df, snowpark_column_names = rename_columns_as_snowflake_standard(
34
42
  df, plan_id
35
43
  )
@@ -44,11 +52,11 @@ def post_process_df(
44
52
  if current_schema:
45
53
  name_parts = [unquote_if_quoted(current_schema)] + name_parts
46
54
 
47
- return with_column_map(
48
- renamed_df,
49
- true_names,
50
- snowpark_column_names,
51
- [f.datatype for f in df.schema.fields],
55
+ return DataFrameContainer.create_with_column_mapping(
56
+ dataframe=renamed_df,
57
+ spark_column_names=true_names,
58
+ snowpark_column_names=snowpark_column_names,
59
+ snowpark_column_types=[f.datatype for f in df.schema.fields],
52
60
  column_qualifiers=[name_parts] * len(true_names)
53
61
  if source_table_name
54
62
  else None,
@@ -66,19 +74,18 @@ def post_process_df(
66
74
 
67
75
  def get_table_from_name(
68
76
  table_name: str, session: snowpark.Session, plan_id: int
69
- ) -> snowpark.DataFrame:
77
+ ) -> DataFrameContainer:
78
+ """Get table from name returning a container."""
70
79
  snowpark_name = ".".join(
71
80
  quote_name_without_upper_casing(part)
72
81
  for part in split_fully_qualified_spark_name(table_name)
73
82
  )
74
83
 
75
- if auto_uppercase_dml():
84
+ if auto_uppercase_non_column_identifiers():
76
85
  snowpark_name = snowpark_name.upper()
77
86
 
78
87
  df = session.read.table(snowpark_name)
79
- post_processed_df = post_process_df(df, plan_id, table_name)
80
- post_processed_df._table_name = table_name
81
- return post_processed_df
88
+ return post_process_df(df, plan_id, table_name)
82
89
 
83
90
 
84
91
  def get_table_from_query(
@@ -88,7 +95,9 @@ def get_table_from_query(
88
95
  return post_process_df(df, plan_id)
89
96
 
90
97
 
91
- def map_read_table(rel: relation_proto.Relation) -> snowpark.DataFrame:
98
+ def map_read_table(
99
+ rel: relation_proto.Relation,
100
+ ) -> DataFrameContainer:
92
101
  """
93
102
  Read a table into a Snowpark DataFrame.
94
103
  """
@@ -7,7 +7,7 @@ import typing
7
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
8
 
9
9
  from snowflake import snowpark
10
- from snowflake.snowpark_connect.column_name_handler import with_column_map
10
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
11
  from snowflake.snowpark_connect.relation.read.utils import (
12
12
  get_spark_column_names_from_snowpark_columns,
13
13
  rename_columns_as_snowflake_standard,
@@ -71,7 +71,7 @@ def map_read_text(
71
71
  schema: snowpark.types.StructType | None,
72
72
  session: snowpark.Session,
73
73
  paths: list[str],
74
- ) -> snowpark.DataFrame:
74
+ ) -> DataFrameContainer:
75
75
  """
76
76
  Read a TEXT file into a Snowpark DataFrame.
77
77
  """
@@ -98,9 +98,9 @@ def map_read_text(
98
98
  renamed_df, snowpark_column_names = rename_columns_as_snowflake_standard(
99
99
  df, rel.common.plan_id
100
100
  )
101
- return with_column_map(
102
- renamed_df,
103
- spark_column_names,
104
- snowpark_column_names,
105
- [f.datatype for f in df.schema.fields],
101
+ return DataFrameContainer.create_with_column_mapping(
102
+ dataframe=renamed_df,
103
+ spark_column_names=spark_column_names,
104
+ snowpark_column_names=snowpark_column_names,
105
+ snowpark_column_types=[f.datatype for f in df.schema.fields],
106
106
  )
@@ -346,6 +346,7 @@ class JsonReaderConfig(ReaderWriterConfig):
346
346
  "compression",
347
347
  # "ignoreNullFields",
348
348
  "rowsToInferSchema",
349
+ # "inferTimestamp",
349
350
  },
350
351
  boolean_config_list=[
351
352
  "multiLine",
@@ -32,6 +32,7 @@ from snowflake.snowpark_connect.column_name_handler import (
32
32
  ColumnNameMap,
33
33
  make_column_names_snowpark_compatible,
34
34
  )
35
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
35
36
  from snowflake.snowpark_connect.relation.map_relation import map_relation
36
37
 
37
38
  TYPE_MAP_FOR_TO_SCHEMA = {
@@ -91,7 +92,9 @@ TYPE_MAP_FOR_TO_SCHEMA = {
91
92
 
92
93
 
93
94
  def get_df_with_partition_row_number(
94
- df: snowpark.DataFrame, plan_id: int | None, row_number_column_name: str
95
+ container: DataFrameContainer,
96
+ plan_id: int | None,
97
+ row_number_column_name: str,
95
98
  ) -> snowpark.DataFrame:
96
99
  """
97
100
  Add a row number for each row in each partition for the given df, where
@@ -106,21 +109,24 @@ def get_df_with_partition_row_number(
106
109
  | c| 4| | c| 4| 0 |
107
110
  +---+---+ +---+---+------------+
108
111
  """
112
+ df = container.dataframe
113
+ column_map = container.column_map
114
+
109
115
  row_number_snowpark_column_name = make_column_names_snowpark_compatible(
110
- [row_number_column_name], plan_id, len(df._column_map.get_spark_columns())
116
+ [row_number_column_name], plan_id, len(column_map.get_spark_columns())
111
117
  )[0]
112
118
  row_number_snowpark_column = (
113
119
  snowpark_fn.row_number()
114
120
  .over(
115
121
  snowpark.window.Window.partition_by(
116
- *df._column_map.get_snowpark_columns()
122
+ *column_map.get_snowpark_columns()
117
123
  ).order_by(snowpark_fn.lit(1))
118
124
  )
119
125
  .alias(row_number_snowpark_column_name)
120
126
  )
121
127
 
122
128
  df_with_partition_number = df.select(
123
- *df._column_map.get_snowpark_columns(), row_number_snowpark_column
129
+ *column_map.get_snowpark_columns(), row_number_snowpark_column
124
130
  )
125
131
  return df_with_partition_number
126
132
 
@@ -197,7 +203,7 @@ def get_semantic_string(rel: relation_proto.Relation) -> str:
197
203
  """
198
204
  queries = [
199
205
  query
200
- for query_list in map_relation(rel)._plan.execution_queries.values()
206
+ for query_list in map_relation(rel).dataframe._plan.execution_queries.values()
201
207
  for query in query_list
202
208
  ]
203
209
 
@@ -10,6 +10,7 @@ import snowflake.snowpark
10
10
  from snowflake import snowpark
11
11
  from snowflake.snowpark import DataFrameWriter
12
12
  from snowflake.snowpark.dataframe import DataFrame
13
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
13
14
  from snowflake.snowpark_connect.relation.read import jdbc_read_dbapi
14
15
  from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDialect
15
16
  from snowflake.snowpark_connect.relation.read.utils import Connection
@@ -36,7 +37,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
36
37
 
37
38
  def jdbc_write_dbapi(
38
39
  self,
39
- input_df: DataFrame,
40
+ container: DataFrameContainer,
40
41
  create_connection: Callable[[dict[str, str]], "Connection"],
41
42
  close_connection: Callable[[Connection], None],
42
43
  table: str,
@@ -46,6 +47,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
46
47
  Write a Snowpark Dataframe data into table of a JDBC datasource.
47
48
  """
48
49
 
50
+ input_df = container.dataframe
49
51
  conn = create_connection(self.jdbc_options)
50
52
  try:
51
53
  url = self.jdbc_options.get("url", None)
@@ -53,32 +55,32 @@ class JdbcDataFrameWriter(DataFrameWriter):
53
55
 
54
56
  table_exist = self._does_table_exist(conn, table)
55
57
  insert_query = self._generate_insert_query(
56
- input_df,
58
+ container,
57
59
  table,
58
60
  )
59
61
 
60
62
  match write_mode:
61
63
  case "append":
62
64
  if not table_exist:
63
- self._create_table(conn, table, input_df, jdbc_dialect)
65
+ self._create_table(conn, table, container, jdbc_dialect)
64
66
  case "errorifexists":
65
67
  if table_exist:
66
68
  raise ValueError(
67
69
  "table is already exist and write mode is ERROR_IF_EXISTS"
68
70
  )
69
71
  else:
70
- self._create_table(conn, table, input_df, jdbc_dialect)
72
+ self._create_table(conn, table, container, jdbc_dialect)
71
73
  case "overwrite":
72
74
  if table_exist:
73
75
  self._drop_table(conn, table)
74
- self._create_table(conn, table, input_df, jdbc_dialect)
76
+ self._create_table(conn, table, container, jdbc_dialect)
75
77
  case "ignore":
76
78
  if table_exist:
77
79
  # With Ignore write mode, if table already exists, the save operation is expected
78
80
  # to not save the contents of the DataFrame and to not change the existing data.
79
81
  return
80
82
  else:
81
- self._create_table(conn, table, input_df, jdbc_dialect)
83
+ self._create_table(conn, table, container, jdbc_dialect)
82
84
  case _:
83
85
  raise ValueError(f"Invalid write mode value{write_mode}")
84
86
 
@@ -92,14 +94,14 @@ class JdbcDataFrameWriter(DataFrameWriter):
92
94
  finally:
93
95
  close_connection(conn)
94
96
 
95
- def _generate_insert_query(self, input_df: DataFrame, table: str) -> str:
97
+ def _generate_insert_query(self, container: DataFrameContainer, table: str) -> str:
96
98
  """
97
99
  Generates INSERT statement with placeholders.
98
- :param input_df: Snowpark dataframe to save
100
+ :param container: Snowpark dataframe container
99
101
  :param table: JDBC datasource table name
100
102
  :return: INSERT SQL statement
101
103
  """
102
- true_names = input_df._column_map.get_spark_columns()
104
+ true_names = container.column_map.get_spark_columns()
103
105
  # quote each column name to match PySpark's case-sensitive column naming behavior.
104
106
  quoted_column_names = ",".join([f'"{col}"' for col in true_names])
105
107
  place_holders = ",".join(["?"] * len(true_names))
@@ -145,7 +147,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
145
147
  self,
146
148
  conn: Connection,
147
149
  table: str,
148
- input_df: DataFrame,
150
+ container,
149
151
  jdbc_dialect: JdbcDialect,
150
152
  ) -> None:
151
153
  """
@@ -154,14 +156,15 @@ class JdbcDataFrameWriter(DataFrameWriter):
154
156
 
155
157
  :param conn: A Python DBAPI connection over JDBC connection
156
158
  :param table: DBC datasource table name
157
- :param input_df: Snowpark dataframe to save
159
+ :param container: Snowpark dataframe container
158
160
  :param jdbc_dialect: JDBC specific dialect
159
161
  :return: None
160
162
  """
163
+ input_df = container.dataframe
161
164
  columns_str = ""
162
165
  fields = input_df.schema.fields
163
166
  total_columns = len(fields)
164
- column_map = input_df._column_map
167
+ column_map = container.column_map
165
168
 
166
169
  column_index = 0
167
170
  for field in fields: