snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (84) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +47 -17
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/error/error_utils.py +25 -0
  6. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  7. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  8. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  13. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  14. snowflake/snowpark_connect/expression/typer.py +6 -6
  15. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  16. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  17. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  18. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  19. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  20. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  21. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  22. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  26. snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
  27. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  28. snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
  29. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  30. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  31. snowflake/snowpark_connect/relation/map_join.py +72 -63
  32. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  33. snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
  34. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  35. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  36. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  37. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  38. snowflake/snowpark_connect/relation/map_sql.py +141 -237
  39. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  40. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  41. snowflake/snowpark_connect/relation/map_udtf.py +10 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  50. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  51. snowflake/snowpark_connect/relation/utils.py +11 -5
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  53. snowflake/snowpark_connect/relation/write/map_write.py +259 -56
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  55. snowflake/snowpark_connect/server.py +43 -4
  56. snowflake/snowpark_connect/type_mapping.py +6 -23
  57. snowflake/snowpark_connect/utils/cache.py +27 -22
  58. snowflake/snowpark_connect/utils/context.py +33 -17
  59. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  60. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  61. snowflake/snowpark_connect/utils/session.py +41 -38
  62. snowflake/snowpark_connect/utils/telemetry.py +214 -63
  63. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  64. snowflake/snowpark_connect/version.py +1 -1
  65. snowflake/snowpark_decoder/__init__.py +0 -0
  66. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  67. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  68. snowflake/snowpark_decoder/dp_session.py +111 -0
  69. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  70. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
  71. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
  72. snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
  73. snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
  74. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  75. spark/__init__.py +0 -0
  76. spark/connect/__init__.py +0 -0
  77. spark/connect/envelope_pb2.py +31 -0
  78. spark/connect/envelope_pb2.pyi +46 -0
  79. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  80. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -6,7 +6,7 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
7
7
 
8
8
  from snowflake import snowpark
9
- from snowflake.snowpark_connect.column_name_handler import set_schema_getter
9
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
10
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
11
11
  from snowflake.snowpark_connect.expression.map_expression import (
12
12
  map_single_column_expression,
@@ -15,21 +15,28 @@ from snowflake.snowpark_connect.expression.typer import ExpressionTyper
15
15
  from snowflake.snowpark_connect.relation.map_relation import map_relation
16
16
 
17
17
 
18
- def map_sample_by(rel: relation_proto.Relation) -> snowpark.DataFrame:
18
+ def map_sample_by(
19
+ rel: relation_proto.Relation,
20
+ ) -> DataFrameContainer:
19
21
  """
20
22
  Sample by an expression on the input DataFrame.
21
23
  """
22
- input_df: snowpark.DataFrame = map_relation(rel.sample_by.input)
24
+ input_container = map_relation(rel.sample_by.input)
25
+ input_df = input_container.dataframe
26
+
23
27
  exp: expressions_proto.Expression = rel.sample_by.col
24
28
  _, col_expr = map_single_column_expression(
25
- exp, input_df._column_map, ExpressionTyper(input_df)
29
+ exp, input_container.column_map, ExpressionTyper(input_df)
26
30
  )
27
31
  fractions = {
28
32
  get_literal_field_and_name(frac.stratum)[0]: frac.fraction
29
33
  for frac in rel.sample_by.fractions
30
34
  }
31
35
  result: snowpark.DataFrame = input_df.sampleBy(col_expr.col, fractions)
32
- result._column_map = input_df._column_map
33
- result._table_name = input_df._table_name
34
- set_schema_getter(result, lambda: input_df.schema)
35
- return result
36
+ return DataFrameContainer(
37
+ result,
38
+ column_map=input_container.column_map,
39
+ table_name=input_container.table_name,
40
+ alias=input_container.alias,
41
+ cached_schema_getter=lambda: input_df.schema,
42
+ )
@@ -2,11 +2,17 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
 
5
+ import copy
6
+
5
7
  import pandas
6
8
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
7
9
 
8
10
  from snowflake import snowpark
9
11
  from snowflake.snowpark._internal.analyzer import analyzer_utils
12
+ from snowflake.snowpark.functions import col
13
+ from snowflake.snowpark.types import DateType, StringType, StructField, StructType
14
+ from snowflake.snowpark_connect.column_name_handler import set_schema_getter
15
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
16
  from snowflake.snowpark_connect.relation.map_relation import map_relation
11
17
 
12
18
 
@@ -18,12 +24,15 @@ def map_show_string(rel: relation_proto.Relation) -> pandas.DataFrame:
18
24
  message creates a string. The client expects this string to be packed into an Arrow
19
25
  Buffer object as a single cell.
20
26
  """
21
- input_df: snowpark.DataFrame = map_relation(rel.show_string.input)
27
+ input_df_container: DataFrameContainer = map_relation(rel.show_string.input)
28
+ raw_input_df = input_df_container.dataframe
29
+ input_df = _handle_datetype_columns(raw_input_df)
30
+
22
31
  show_string = input_df._show_string_spark(
23
32
  num_rows=rel.show_string.num_rows,
24
33
  truncate=rel.show_string.truncate,
25
34
  vertical=rel.show_string.vertical,
26
- _spark_column_names=input_df._column_map.get_spark_columns(),
35
+ _spark_column_names=input_df_container.column_map.get_spark_columns(),
27
36
  )
28
37
  return pandas.DataFrame({"show_string": [show_string]})
29
38
 
@@ -32,13 +41,15 @@ def map_repr_html(rel: relation_proto.Relation) -> pandas.DataFrame:
32
41
  """
33
42
  Generate the html string representation of the input dataframe.
34
43
  """
35
- input_df: snowpark.DataFrame = map_relation(rel.html_string.input)
44
+ input_df_container: DataFrameContainer = map_relation(rel.html_string.input)
45
+ input_df = input_df_container.dataframe
46
+
36
47
  input_panda = input_df.toPandas()
37
48
  input_panda.rename(
38
49
  columns={
39
50
  analyzer_utils.unquote_if_quoted(
40
- input_df._column_map.get_snowpark_columns()[i]
41
- ): input_df._column_map.get_spark_columns()[i]
51
+ input_df_container.column_map.get_snowpark_columns()[i]
52
+ ): input_df_container.column_map.get_spark_columns()[i]
42
53
  for i in range(len(input_panda.columns))
43
54
  },
44
55
  inplace=True,
@@ -48,3 +59,29 @@ def map_repr_html(rel: relation_proto.Relation) -> pandas.DataFrame:
48
59
  max_rows=rel.html_string.num_rows,
49
60
  )
50
61
  return pandas.DataFrame({"html_string": [html_string]})
62
+
63
+
64
+ def _handle_datetype_columns(input_df: snowpark.DataFrame) -> snowpark.DataFrame:
65
+ """
66
+ Maps DateType columns to strings it aims to allow showing the dates which are out of range of datetime.datetime.
67
+ """
68
+ new_column_mapping = []
69
+ new_fields = []
70
+ transformation_required = False
71
+ for field in input_df.schema:
72
+ if isinstance(field.datatype, DateType):
73
+ transformation_required = True
74
+ new_column_mapping.append(col(field.name).cast(StringType()))
75
+ new_fields.append(StructField(field.name, StringType()))
76
+ else:
77
+ new_column_mapping.append(col(field.name))
78
+ new_fields.append(field)
79
+
80
+ if not transformation_required:
81
+ return input_df
82
+
83
+ transformed_df = input_df.select(new_column_mapping)
84
+ set_schema_getter(transformed_df, lambda: StructType(new_fields))
85
+ transformed_df._column_map = copy.deepcopy(input_df._column_map)
86
+
87
+ return transformed_df