snowpark-connect 0.20.2__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (67) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +28 -14
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  6. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  7. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  8. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  9. snowflake/snowpark_connect/expression/map_unresolved_function.py +279 -43
  10. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  11. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  12. snowflake/snowpark_connect/expression/typer.py +6 -6
  13. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  14. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  15. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  16. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  17. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  18. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  19. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  20. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  21. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  22. snowflake/snowpark_connect/relation/map_aggregate.py +72 -47
  23. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  24. snowflake/snowpark_connect/relation/map_column_ops.py +207 -144
  25. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  26. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  27. snowflake/snowpark_connect/relation/map_join.py +72 -63
  28. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  29. snowflake/snowpark_connect/relation/map_map_partitions.py +21 -16
  30. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  31. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  32. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  33. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  34. snowflake/snowpark_connect/relation/map_sql.py +155 -78
  35. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  36. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  37. snowflake/snowpark_connect/relation/map_udtf.py +6 -9
  38. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  39. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  40. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  41. snowflake/snowpark_connect/relation/read/map_read_json.py +7 -7
  42. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  43. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  44. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  45. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  46. snowflake/snowpark_connect/relation/utils.py +11 -5
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  48. snowflake/snowpark_connect/relation/write/map_write.py +199 -40
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  50. snowflake/snowpark_connect/server.py +34 -4
  51. snowflake/snowpark_connect/type_mapping.py +2 -23
  52. snowflake/snowpark_connect/utils/cache.py +27 -22
  53. snowflake/snowpark_connect/utils/context.py +33 -17
  54. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  55. snowflake/snowpark_connect/utils/session.py +41 -34
  56. snowflake/snowpark_connect/utils/telemetry.py +1 -2
  57. snowflake/snowpark_connect/version.py +1 -1
  58. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/METADATA +5 -3
  59. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/RECORD +67 -64
  60. snowpark_connect-0.21.0.dist-info/licenses/LICENSE-binary +568 -0
  61. snowpark_connect-0.21.0.dist-info/licenses/NOTICE-binary +1533 -0
  62. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-connect +0 -0
  63. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-session +0 -0
  64. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-submit +0 -0
  65. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/WHEEL +0 -0
  66. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
  67. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,6 @@ import pyspark.sql.connect.proto.common_pb2 as common_proto
10
10
  import pyspark.sql.connect.proto.types_pb2 as types_proto
11
11
  from snowflake.core.exceptions import NotFoundError
12
12
 
13
- from snowflake import snowpark
14
13
  from snowflake.snowpark import functions
15
14
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
16
15
  quote_name_without_upper_casing,
@@ -18,8 +17,11 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
18
17
  )
19
18
  from snowflake.snowpark.functions import lit
20
19
  from snowflake.snowpark.types import BooleanType, StringType
21
- from snowflake.snowpark_connect import column_name_handler
22
- from snowflake.snowpark_connect.config import auto_uppercase_ddl, global_config
20
+ from snowflake.snowpark_connect.config import (
21
+ auto_uppercase_non_column_identifiers,
22
+ global_config,
23
+ )
24
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
23
25
  from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
24
26
  AbstractSparkCatalog,
25
27
  _get_current_snowflake_schema,
@@ -27,7 +29,7 @@ from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import
27
29
  _process_multi_layer_identifier,
28
30
  )
29
31
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
30
- from snowflake.snowpark_connect.utils.attribute_handling import (
32
+ from snowflake.snowpark_connect.utils.identifiers import (
31
33
  split_fully_qualified_spark_name,
32
34
  )
33
35
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
@@ -40,7 +42,7 @@ from snowflake.snowpark_connect.utils.udf_cache import cached_udf
40
42
  def _normalize_identifier(identifier: str | None) -> str | None:
41
43
  if identifier is None:
42
44
  return None
43
- return identifier.upper() if auto_uppercase_ddl() else identifier
45
+ return identifier.upper() if auto_uppercase_non_column_identifiers() else identifier
44
46
 
45
47
 
46
48
  def sf_quote(name: str | None) -> str | None:
@@ -345,7 +347,7 @@ class SnowflakeCatalog(AbstractSparkCatalog):
345
347
  def dropGlobalTempView(
346
348
  self,
347
349
  spark_view_name: str,
348
- ) -> snowpark.DataFrame:
350
+ ) -> DataFrameContainer:
349
351
  session = get_or_create_snowpark_session()
350
352
  schema = global_config.spark_sql_globalTempDatabase
351
353
  result_df = session.sql(
@@ -358,14 +360,17 @@ class SnowflakeCatalog(AbstractSparkCatalog):
358
360
  )
359
361
  )
360
362
  columns = ["value"]
361
- return column_name_handler.with_column_map(
362
- result_df, columns, columns, [BooleanType()]
363
+ return DataFrameContainer.create_with_column_mapping(
364
+ dataframe=result_df,
365
+ spark_column_names=columns,
366
+ snowpark_column_names=columns,
367
+ snowpark_column_types=[BooleanType()],
363
368
  )
364
369
 
365
370
  def dropTempView(
366
371
  self,
367
372
  spark_view_name: str,
368
- ) -> snowpark.DataFrame:
373
+ ) -> DataFrameContainer:
369
374
  """Drop the current temporary view."""
370
375
  session = get_or_create_snowpark_session()
371
376
  result = session.sql(
@@ -377,8 +382,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
377
382
  )
378
383
  result_df = session.createDataFrame([(view_was_dropped,)], schema=["value"])
379
384
  columns = ["value"]
380
- return column_name_handler.with_column_map(
381
- result_df, columns, columns, [BooleanType()]
385
+ return DataFrameContainer.create_with_column_mapping(
386
+ dataframe=result_df,
387
+ spark_column_names=columns,
388
+ snowpark_column_names=columns,
389
+ snowpark_column_types=[BooleanType()],
382
390
  )
383
391
 
384
392
  def createTable(
@@ -389,7 +397,7 @@ class SnowflakeCatalog(AbstractSparkCatalog):
389
397
  schema: types_proto.DataType,
390
398
  description: str,
391
399
  **options: typing.Any,
392
- ) -> snowpark.DataFrame:
400
+ ) -> DataFrameContainer:
393
401
  """Create either an external, or a managed table.
394
402
 
395
403
  If path is supplied in which the data for this table exists. When path is specified, an external table is
@@ -422,11 +430,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
422
430
  table_name_parts = split_fully_qualified_spark_name(tableName)
423
431
  qualifiers = [table_name_parts for _ in columns]
424
432
  column_types = [f.datatype for f in sp_schema.fields]
425
- return column_name_handler.with_column_map(
426
- session.createDataFrame([], sp_schema),
427
- columns,
428
- columns,
429
- column_types,
433
+ return DataFrameContainer.create_with_column_mapping(
434
+ dataframe=session.createDataFrame([], sp_schema),
435
+ spark_column_names=columns,
436
+ snowpark_column_names=columns,
437
+ snowpark_column_types=column_types,
430
438
  column_qualifiers=qualifiers,
431
439
  )
432
440
 
@@ -11,8 +11,8 @@ from snowflake import snowpark
11
11
  from snowflake.snowpark.types import DataType
12
12
  from snowflake.snowpark_connect.column_name_handler import (
13
13
  make_column_names_snowpark_compatible,
14
- with_column_map,
15
14
  )
15
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
16
16
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
17
17
  from snowflake.snowpark_connect.expression.map_expression import (
18
18
  map_single_column_expression,
@@ -20,103 +20,124 @@ from snowflake.snowpark_connect.expression.map_expression import (
20
20
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
21
21
  from snowflake.snowpark_connect.relation.map_relation import map_relation
22
22
  from snowflake.snowpark_connect.typed_column import TypedColumn
23
- from snowflake.snowpark_connect.utils.context import temporary_pivot_expression
23
+ from snowflake.snowpark_connect.utils.context import (
24
+ set_current_grouping_columns,
25
+ temporary_pivot_expression,
26
+ )
24
27
 
25
28
 
26
- def map_group_by_aggregate(rel: relation_proto.Relation) -> snowpark.DataFrame:
29
+ def map_group_by_aggregate(
30
+ rel: relation_proto.Relation,
31
+ ) -> DataFrameContainer:
27
32
  """
28
33
  Groups the DataFrame using the specified columns.
29
34
 
30
35
  Aggregations come in as expressions, which are mapped to `snowpark.Column`
31
36
  objects.
32
37
  """
33
- input_df, columns = map_aggregate_helper(rel)
38
+ input_df_container, columns = map_aggregate_helper(rel)
39
+ input_df_actual = input_df_container.dataframe
40
+
34
41
  if len(columns.grouping_expressions()) == 0:
35
- result = input_df.agg(*columns.aggregation_expressions())
42
+ result = input_df_actual.agg(*columns.aggregation_expressions())
36
43
  else:
37
- result = input_df.group_by(*columns.grouping_expressions()).agg(
44
+ result = input_df_actual.group_by(*columns.grouping_expressions()).agg(
38
45
  *columns.aggregation_expressions()
39
46
  )
40
- return with_column_map(
41
- result,
42
- columns.spark_names(),
43
- columns.snowpark_names(),
44
- columns.data_types(),
47
+ return DataFrameContainer.create_with_column_mapping(
48
+ dataframe=result,
49
+ spark_column_names=columns.spark_names(),
50
+ snowpark_column_names=columns.snowpark_names(),
51
+ snowpark_column_types=columns.data_types(),
45
52
  column_qualifiers=columns.get_qualifiers(),
46
- parent_column_name_map=input_df._column_map,
53
+ parent_column_name_map=input_df_container.column_map,
47
54
  )
48
55
 
49
56
 
50
- def map_rollup_aggregate(rel: relation_proto.Relation) -> snowpark.DataFrame:
57
+ def map_rollup_aggregate(
58
+ rel: relation_proto.Relation,
59
+ ) -> DataFrameContainer:
51
60
  """
52
61
  Create a multidimensional rollup for the current DataFrame using the specified columns.
53
62
 
54
63
  Aggregations come in as expressions, which are mapped to `snowpark.Column`
55
64
  objects.
56
65
  """
57
- input_df, columns = map_aggregate_helper(rel)
66
+ input_container, columns = map_aggregate_helper(rel)
67
+ input_df_actual = input_container.dataframe
68
+
58
69
  if len(columns.grouping_expressions()) == 0:
59
- result = input_df.agg(*columns.aggregation_expressions())
70
+ result = input_df_actual.agg(*columns.aggregation_expressions())
60
71
  else:
61
- result = input_df.rollup(*columns.grouping_expressions()).agg(
72
+ result = input_df_actual.rollup(*columns.grouping_expressions()).agg(
62
73
  *columns.aggregation_expressions()
63
74
  )
64
- return with_column_map(
65
- result,
66
- columns.spark_names(),
67
- columns.snowpark_names(),
68
- columns.data_types(),
75
+ return DataFrameContainer.create_with_column_mapping(
76
+ dataframe=result,
77
+ spark_column_names=columns.spark_names(),
78
+ snowpark_column_names=columns.snowpark_names(),
79
+ snowpark_column_types=columns.data_types(),
69
80
  column_qualifiers=columns.get_qualifiers(),
70
- parent_column_name_map=input_df._column_map,
81
+ parent_column_name_map=input_container.column_map,
71
82
  )
72
83
 
73
84
 
74
- def map_cube_aggregate(rel: relation_proto.Relation) -> snowpark.DataFrame:
85
+ def map_cube_aggregate(
86
+ rel: relation_proto.Relation,
87
+ ) -> DataFrameContainer:
75
88
  """
76
89
  Create a multidimensional cube for the current DataFrame using the specified columns.
77
90
 
78
91
  Aggregations come in as expressions, which are mapped to `snowpark.Column`
79
92
  objects.
80
93
  """
81
- input_df, columns = map_aggregate_helper(rel)
94
+ input_container, columns = map_aggregate_helper(rel)
95
+ input_df_actual = input_container.dataframe
96
+
82
97
  if len(columns.grouping_expressions()) == 0:
83
- result = input_df.agg(*columns.aggregation_expressions())
98
+ result = input_df_actual.agg(*columns.aggregation_expressions())
84
99
  else:
85
- result = input_df.cube(*columns.grouping_expressions()).agg(
100
+ result = input_df_actual.cube(*columns.grouping_expressions()).agg(
86
101
  *columns.aggregation_expressions()
87
102
  )
88
- return with_column_map(
89
- result,
90
- columns.spark_names(),
91
- columns.snowpark_names(),
92
- columns.data_types(),
103
+ return DataFrameContainer.create_with_column_mapping(
104
+ dataframe=result,
105
+ spark_column_names=columns.spark_names(),
106
+ snowpark_column_names=columns.snowpark_names(),
107
+ snowpark_column_types=columns.data_types(),
93
108
  column_qualifiers=columns.get_qualifiers(),
94
- parent_column_name_map=input_df._column_map,
109
+ parent_column_name_map=input_container.column_map,
95
110
  )
96
111
 
97
112
 
98
- def map_pivot_aggregate(rel: relation_proto.Relation) -> snowpark.DataFrame:
113
+ def map_pivot_aggregate(
114
+ rel: relation_proto.Relation,
115
+ ) -> DataFrameContainer:
99
116
  """
100
117
  Pivots a column of the current DataFrame and performs the specified aggregation.
101
118
 
102
119
  There are 2 versions of the pivot function: one that requires the caller to specify the list of the distinct values
103
120
  to pivot on and one that does not.
104
121
  """
105
- input_df, columns = map_aggregate_helper(rel, pivot=True, skip_alias=True)
122
+ input_container, columns = map_aggregate_helper(rel, pivot=True, skip_alias=True)
123
+ input_df_actual = input_container.dataframe
124
+
106
125
  pivot_column = map_single_column_expression(
107
- rel.aggregate.pivot.col, input_df._column_map, ExpressionTyper(input_df)
126
+ rel.aggregate.pivot.col,
127
+ input_container.column_map,
128
+ ExpressionTyper(input_df_actual),
108
129
  )
109
130
  pivot_values = [
110
131
  get_literal_field_and_name(lit)[0] for lit in rel.aggregate.pivot.values
111
132
  ]
112
133
 
113
134
  if len(columns.grouping_expressions()) == 0:
114
- result = input_df.pivot(
135
+ result = input_df_actual.pivot(
115
136
  pivot_column[1].col, pivot_values if pivot_values else None
116
137
  ).agg(*columns.aggregation_expressions())
117
138
  else:
118
139
  result = (
119
- input_df.group_by(*columns.grouping_expressions())
140
+ input_df_actual.group_by(*columns.grouping_expressions())
120
141
  .pivot(pivot_column[1].col, pivot_values if pivot_values else None)
121
142
  .agg(*columns.aggregation_expressions())
122
143
  )
@@ -124,7 +145,7 @@ def map_pivot_aggregate(rel: relation_proto.Relation) -> snowpark.DataFrame:
124
145
  spark_columns = []
125
146
  for col in [string_parser(s) for s in result.columns]:
126
147
  spark_col = (
127
- input_df._column_map.get_spark_column_name_from_snowpark_column_name(
148
+ input_container.column_map.get_spark_column_name_from_snowpark_column_name(
128
149
  col, allow_non_exists=True
129
150
  )
130
151
  )
@@ -135,15 +156,15 @@ def map_pivot_aggregate(rel: relation_proto.Relation) -> snowpark.DataFrame:
135
156
  spark_columns.append(col)
136
157
 
137
158
  agg_name_list = [c.spark_name for c in columns.grouping_columns]
138
- return with_column_map(
139
- result,
140
- agg_name_list + spark_columns[len(agg_name_list) :],
141
- result.columns,
159
+ return DataFrameContainer.create_with_column_mapping(
160
+ dataframe=result,
161
+ spark_column_names=agg_name_list + spark_columns[len(agg_name_list) :],
162
+ snowpark_column_names=result.columns,
142
163
  column_qualifiers=(
143
164
  columns.get_qualifiers()[: len(agg_name_list)]
144
165
  + [[]] * (len(spark_columns) - len(agg_name_list))
145
166
  ),
146
- parent_column_name_map=input_df._column_map,
167
+ parent_column_name_map=input_container.column_map,
147
168
  )
148
169
 
149
170
 
@@ -246,7 +267,8 @@ class _Columns:
246
267
  def map_aggregate_helper(
247
268
  rel: relation_proto.Relation, pivot: bool = False, skip_alias: bool = False
248
269
  ):
249
- input_df = map_relation(rel.aggregate.input)
270
+ input_container = map_relation(rel.aggregate.input)
271
+ input_df = input_container.dataframe
250
272
  grouping_expressions = rel.aggregate.grouping_expressions
251
273
  expressions = rel.aggregate.aggregate_expressions
252
274
  groupings: list[_ColumnMetadata] = []
@@ -258,7 +280,7 @@ def map_aggregate_helper(
258
280
  with temporary_pivot_expression(pivot):
259
281
  for exp in grouping_expressions:
260
282
  new_name, snowpark_column = map_single_column_expression(
261
- exp, input_df._column_map, typer
283
+ exp, input_container.column_map, typer
262
284
  )
263
285
  alias = make_column_names_snowpark_compatible(
264
286
  [new_name], rel.common.plan_id, len(groupings)
@@ -275,9 +297,12 @@ def map_aggregate_helper(
275
297
  )
276
298
  )
277
299
 
300
+ grouping_cols = [g.spark_name for g in groupings]
301
+ set_current_grouping_columns(grouping_cols)
302
+
278
303
  for exp in expressions:
279
304
  new_name, snowpark_column = map_single_column_expression(
280
- exp, input_df._column_map, typer
305
+ exp, input_container.column_map, typer
281
306
  )
282
307
  alias = make_column_names_snowpark_compatible(
283
308
  [new_name], rel.common.plan_id, len(groupings) + len(aggregations)
@@ -313,7 +338,7 @@ def map_aggregate_helper(
313
338
  )
314
339
 
315
340
  return (
316
- input_df,
341
+ input_container,
317
342
  _Columns(
318
343
  grouping_columns=groupings,
319
344
  aggregation_columns=aggregations,
@@ -7,7 +7,7 @@ import re
7
7
  import pandas
8
8
  import pyspark.sql.connect.proto.catalog_pb2 as catalog_proto
9
9
 
10
- from snowflake import snowpark
10
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
11
  from snowflake.snowpark_connect.relation.catalogs import CATALOGS
12
12
  from snowflake.snowpark_connect.relation.catalogs.utils import (
13
13
  CURRENT_CATALOG_NAME,
@@ -22,7 +22,7 @@ from snowflake.snowpark_connect.utils.telemetry import (
22
22
 
23
23
  def map_catalog(
24
24
  rel: catalog_proto.Catalog,
25
- ) -> pandas.DataFrame | snowpark.DataFrame:
25
+ ) -> DataFrameContainer | pandas.DataFrame:
26
26
  match rel.WhichOneof("cat_type"):
27
27
  # Database related APIs
28
28
  case "current_database":