snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -9,8 +9,11 @@ import snowflake.snowpark.functions as snowpark_fn
9
9
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
10
10
  quote_name_without_upper_casing,
11
11
  )
12
- from snowflake.snowpark.types import StructType
12
+ from snowflake.snowpark.types import StringType, StructType, VariantType
13
13
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
14
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
15
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
16
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
17
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
15
18
  from snowflake.snowpark_connect.typed_column import TypedColumn
16
19
  from snowflake.snowpark_connect.utils.context import get_outer_dataframes
@@ -26,7 +29,7 @@ def check_struct_and_get_field_datatype(field_name, schema):
26
29
  else:
27
30
  return None
28
31
  else:
29
- None
32
+ return None
30
33
 
31
34
 
32
35
  def map_unresolved_star(
@@ -34,7 +37,6 @@ def map_unresolved_star(
34
37
  column_mapping: ColumnNameMap,
35
38
  typer: ExpressionTyper,
36
39
  ) -> tuple[list[str], TypedColumn]:
37
-
38
40
  if exp.unresolved_star.HasField("unparsed_target"):
39
41
  unparsed_target = exp.unresolved_star.unparsed_target
40
42
  name_parts = split_fully_qualified_spark_name(unparsed_target)
@@ -54,16 +56,17 @@ def map_unresolved_star(
54
56
  return spark_names, typed_column
55
57
 
56
58
  # scenario where it is expanding * to mulitple columns
57
- spark_names = []
58
- snowpark_names = []
59
- qualifiers = []
59
+ spark_names: list[str] = []
60
+ snowpark_names: list[str] = []
61
+ qualifiers: list[set[ColumnQualifier]] = []
60
62
 
63
+ target_qualifier = ColumnQualifier(tuple(name_parts[:-1]))
61
64
  (
62
65
  spark_names,
63
66
  snowpark_names,
64
67
  qualifiers,
65
68
  ) = column_mapping.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
66
- name_parts[:-1]
69
+ target_qualifier
67
70
  )
68
71
 
69
72
  if len(spark_names) == 0:
@@ -74,7 +77,7 @@ def map_unresolved_star(
74
77
  snowpark_names,
75
78
  qualifiers,
76
79
  ) = column_mapping_for_outer_df.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
77
- name_parts[:-1]
80
+ target_qualifier
78
81
  )
79
82
  if len(spark_names) > 0:
80
83
  break
@@ -103,7 +106,7 @@ def map_unresolved_star(
103
106
  prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
104
107
  prefix_candidate = (
105
108
  column_mapping.get_snowpark_column_name_from_spark_column_name(
106
- prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
109
+ prefix_candidate_str, allow_non_exists=True
107
110
  )
108
111
  )
109
112
  if prefix_candidate is None:
@@ -140,29 +143,86 @@ def map_unresolved_star(
140
143
  final_sql_expr,
141
144
  lambda final_sql_expr=final_sql_expr: typer.type(final_sql_expr),
142
145
  )
143
- typed_column.set_multi_col_qualifiers([[] for _ in spark_names])
146
+ typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
144
147
  return spark_names, typed_column
145
148
  else:
146
- result_exp = snowpark_fn.sql_expr(
147
- ", ".join(column_mapping.get_snowpark_columns())
148
- )
149
+ snowpark_columns = column_mapping.get_snowpark_columns()
150
+ result_exp = snowpark_fn.sql_expr(", ".join(snowpark_columns))
149
151
  spark_names = column_mapping.get_spark_columns()
150
152
  typed_column = TypedColumn(
151
- result_exp, lambda: [f.datatype for f in typer.df.schema]
153
+ result_exp,
154
+ lambda: [f.datatype for f in typer.df.schema if f.name in snowpark_columns],
152
155
  )
153
156
  typed_column.set_multi_col_qualifiers(column_mapping.get_qualifiers())
154
157
  return spark_names, typed_column
155
158
 
156
- raise AnalysisException(
159
+ exception = AnalysisException(
157
160
  f"[UNRESOLVED_STAR] The unresolved star expression {exp} is not supported."
158
161
  )
162
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
163
+ raise exception
164
+
165
+
166
+ def map_unresolved_star_as_single_column(
167
+ exp: expressions_proto.Expression,
168
+ column_mapping: ColumnNameMap,
169
+ typer: ExpressionTyper,
170
+ ) -> tuple[str, TypedColumn]:
171
+ """
172
+ Similar to map_unresolved_star but returns a single tuple containing
173
+ a combined spark column name and a TypedColumn representing a struct instead of many columns. .
174
+ If star resolves to single column, it works the same.
175
+ """
176
+ if exp.unresolved_star.HasField("unparsed_target"):
177
+ names, tc = map_unresolved_star(exp, column_mapping, typer)
178
+ assert len(names) == 1, "Expected single column"
179
+ return names[0], tc
180
+ else:
181
+ snowpark_columns = column_mapping.get_snowpark_columns()
182
+ spark_names = column_mapping.get_spark_columns()
183
+
184
+ if len(spark_names) == 1:
185
+ names, tc = map_unresolved_star(exp, column_mapping, typer)
186
+ return names[0], tc
187
+
188
+ fields_cols = [
189
+ (
190
+ spark_name,
191
+ TypedColumn(
192
+ snowpark_fn.sql_expr(snowpark_name),
193
+ lambda snowpark_name=snowpark_name: typer.type(
194
+ snowpark_fn.sql_expr(snowpark_name)
195
+ ),
196
+ ),
197
+ )
198
+ for spark_name, snowpark_name in zip(spark_names, snowpark_columns)
199
+ ]
200
+
201
+ result_exp = snowpark_fn.object_construct_keep_null(
202
+ *[
203
+ name_with_col
204
+ for name, typed_col in fields_cols
205
+ for name_with_col in (
206
+ snowpark_fn.lit(name),
207
+ typed_col.column(to_semi_structure=True),
208
+ )
209
+ ]
210
+ ).cast(VariantType())
211
+
212
+ combined_spark_name = "value"
213
+ typed_column = TypedColumn(
214
+ result_exp,
215
+ lambda: [VariantType()],
216
+ )
217
+ typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
218
+ return combined_spark_name, typed_column
159
219
 
160
220
 
161
221
  def map_unresolved_star_struct(
162
222
  exp: expressions_proto.Expression,
163
223
  column_mapping: ColumnNameMap,
164
224
  typer: ExpressionTyper,
165
- ) -> tuple[list[str], list]:
225
+ ) -> tuple[list[str], list[TypedColumn]]:
166
226
  unparsed_target = exp.unresolved_star.unparsed_target
167
227
  name_parts = split_fully_qualified_spark_name(unparsed_target)
168
228
 
@@ -170,7 +230,7 @@ def map_unresolved_star_struct(
170
230
  len(name_parts) > 1 and name_parts[-1] == "*"
171
231
  ), f"Unable to parse unparsed_target {unparsed_target}"
172
232
 
173
- expanded_args = []
233
+ expanded_args: list[TypedColumn] = []
174
234
  for i in range(0, len(name_parts) - 1):
175
235
  if i == 0:
176
236
  prefix_candidate_str = name_parts[i]
@@ -181,7 +241,7 @@ def map_unresolved_star_struct(
181
241
  prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
182
242
  prefix_candidate = (
183
243
  column_mapping.get_snowpark_column_name_from_spark_column_name(
184
- prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
244
+ prefix_candidate_str, allow_non_exists=True
185
245
  )
186
246
  )
187
247
  if prefix_candidate is None:
@@ -207,13 +267,17 @@ def map_unresolved_star_struct(
207
267
  if prefix_candidate is None:
208
268
  continue
209
269
 
210
- spark_names = candidate_leaf_field.names
270
+ spark_names: list[str] = candidate_leaf_field.names
211
271
  prefix_candidate = ":".join(fields)
212
272
 
213
- for spark_name in spark_names:
214
- expanded_args.append(snowpark_fn.lit(spark_name))
215
- field_snowpark_name = f"{prefix_candidate}:{spark_name}"
273
+ for struct_field in candidate_leaf_field.fields:
274
+ lit_col = snowpark_fn.lit(struct_field.name)
275
+ expanded_args.append(TypedColumn(lit_col, lambda: [StringType()]))
276
+ field_snowpark_name = f"{prefix_candidate}:{struct_field.name}"
216
277
  field_col = snowpark_fn.sql_expr(field_snowpark_name)
217
- expanded_args.append(field_col)
278
+ field_type = struct_field.datatype
279
+ expanded_args.append(
280
+ TypedColumn(field_col, lambda field_type=field_type: [field_type])
281
+ )
218
282
 
219
283
  return spark_names, expanded_args
@@ -6,8 +6,17 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
  from pyspark.errors.exceptions.base import AnalysisException
7
7
 
8
8
  import snowflake.snowpark.functions as snowpark_fn
9
- from snowflake.snowpark.types import DataType, StringType, StructField, StructType
9
+ from snowflake.snowpark.types import (
10
+ DataType,
11
+ MapType,
12
+ StringType,
13
+ StructField,
14
+ StructType,
15
+ VariantType,
16
+ )
10
17
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
18
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
19
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
20
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
12
21
  from snowflake.snowpark_connect.typed_column import TypedColumn
13
22
  from snowflake.snowpark_connect.utils.identifiers import (
@@ -39,9 +48,11 @@ def update_field_in_schema(
39
48
  field.name, updated_subschema, field.nullable, _is_column=False
40
49
  )
41
50
  else:
42
- raise AnalysisException(
51
+ exception = AnalysisException(
43
52
  message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}` in `{field}`"
44
53
  )
54
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
55
+ raise exception
45
56
  field_updated = True
46
57
  else:
47
58
  new_field = field # leave unchanged
@@ -59,9 +70,11 @@ def update_field_in_schema(
59
70
  # if the value type is None that means we want to drop the field and spark does not throw an error if the field does not exists
60
71
  # but if the value type is not None, it means we should add or update this field which has already been covered above
61
72
  # if we reach this code, it means the field should have existed
62
- raise AnalysisException(
73
+ exception = AnalysisException(
63
74
  message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}`"
64
75
  )
76
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
77
+ raise exception
65
78
  return StructType(new_fields)
66
79
 
67
80
 
@@ -99,13 +112,16 @@ def map_update_fields(
99
112
  )
100
113
 
101
114
  if not isinstance(struct_typed_column.typ, StructType):
102
- raise AnalysisException(
115
+ exception = AnalysisException(
103
116
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "update_fields({struct_name}, ...)" due to data type mismatch: Parameter 1 requires the "STRUCT" type'
104
117
  )
118
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
119
+ raise exception
105
120
 
106
121
  final_schema = struct_typed_column.typ
107
122
  value_column_list = []
108
- input_types_to_the_udf = [struct_typed_column.typ]
123
+ # Snowflake UDFs don't support StructType/MapType, convert to VariantType
124
+ input_types_to_the_udf = [VariantType()]
109
125
  update_operation_strs = []
110
126
  array_of_named_parts = []
111
127
  for field_expression, value_expression in zip(field_expressions, value_expressions):
@@ -129,7 +145,11 @@ def map_update_fields(
129
145
  )
130
146
  update_operation_strs.append(f"WithField({value_spark_name})")
131
147
  value_column_list.append(value_typed_column.col)
132
- input_types_to_the_udf.append(value_typed_column.typ)
148
+ # Convert StructType/MapType to VariantType for Snowflake UDFs (ArrayType is supported)
149
+ if isinstance(value_typed_column.typ, (StructType, MapType)):
150
+ input_types_to_the_udf.append(VariantType())
151
+ else:
152
+ input_types_to_the_udf.append(value_typed_column.typ)
133
153
 
134
154
  array_of_named_parts.append(name_parts)
135
155
 
@@ -137,28 +157,60 @@ def map_update_fields(
137
157
  final_name = f"update_fields({struct_name}, {update_operations_str})"
138
158
 
139
159
  if len(final_schema.fields) == 0:
140
- raise AnalysisException(
160
+ exception = AnalysisException(
141
161
  f'[DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "{final_name}" due to data type mismatch: Cannot drop all fields in struct.'
142
162
  )
163
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
164
+ raise exception
143
165
 
166
+ # Snowflake UDFs don't support StructType, so we use VariantType
167
+ # The result will be automatically cast back to the struct type
144
168
  @snowpark_fn.udf(
145
169
  input_types=input_types_to_the_udf,
146
- return_type=final_schema,
170
+ return_type=VariantType(),
147
171
  )
148
172
  def _update(dictionary, *array_of_value):
173
+ if dictionary is None:
174
+ return None
175
+
176
+ # Recursively copy to create mutable dict from Snowflake's VARIANT objects
177
+ def make_mutable_copy(obj):
178
+ if obj is None:
179
+ return None
180
+ elif isinstance(obj, dict):
181
+ return {k: make_mutable_copy(v) for k, v in obj.items()}
182
+ elif isinstance(obj, (list, tuple)):
183
+ return [make_mutable_copy(item) for item in obj]
184
+ else:
185
+ return obj
186
+
187
+ result = make_mutable_copy(dictionary)
188
+
149
189
  for fields_array, value in zip(array_of_named_parts, array_of_value):
150
- current = dictionary
190
+ current = result
151
191
  for k in fields_array[:-1]:
152
192
  current = current.get(k)
153
- if value == "_SNOWPARK_CONNECT_UPDATE_FIELD_DROP_":
154
- current.pop(fields_array[-1], None)
155
- else:
156
- current[fields_array[-1]] = value
157
- return dictionary
193
+ if current is None:
194
+ break
158
195
 
159
- final_exp = _update(
160
- struct_typed_column.col,
161
- *value_column_list,
162
- )
196
+ if current is not None and isinstance(current, dict):
197
+ if value == "_SNOWPARK_CONNECT_UPDATE_FIELD_DROP_":
198
+ current.pop(fields_array[-1], None)
199
+ else:
200
+ current[fields_array[-1]] = value
201
+
202
+ return result
203
+
204
+ # Cast inputs to VARIANT (Snowflake UDFs don't support complex types directly)
205
+ struct_as_variant = struct_typed_column.col.cast(VariantType())
206
+ variant_value_list = [
207
+ col.cast(VariantType()) if isinstance(udf_type, VariantType) else col
208
+ for col, udf_type in zip(value_column_list, input_types_to_the_udf[1:])
209
+ ]
210
+
211
+ udf_result = _update(struct_as_variant, *variant_value_list)
212
+
213
+ # Cast the VariantType result back to the target StructType
214
+ final_exp = udf_result.cast(final_schema)
163
215
 
164
216
  return [final_name], TypedColumn(final_exp, lambda: typer.type(final_exp))
@@ -6,7 +6,11 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
 
7
7
  from snowflake import snowpark
8
8
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
9
- from snowflake.snowpark_connect.error.error_utils import SparkException
9
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
10
+ from snowflake.snowpark_connect.error.error_utils import (
11
+ SparkException,
12
+ attach_custom_error_code,
13
+ )
10
14
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
11
15
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
12
16
  from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -29,6 +33,8 @@ SPARK_RANKING_FUNCTIONS = frozenset(
29
33
  ]
30
34
  )
31
35
 
36
+ RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS = frozenset(["percent_rank"])
37
+
32
38
  CAPITAL_FUNCTION_NAMES = frozenset(["rank()", "dense_rank()", "percent_rank()"])
33
39
 
34
40
 
@@ -128,6 +134,11 @@ def map_window_function(
128
134
  case expressions_proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW:
129
135
  frame_name.append("ROWS BETWEEN")
130
136
  frame_type_func_string = "rows_between"
137
+ if proto_func_name in RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS:
138
+ # Seems like Snowflake and Spark have different understanding of some functions. For those,
139
+ # Spark only allows rows_between while Snowflake only allows range_between. To be compatible
140
+ # with Spark, we have to use range_between here.
141
+ frame_type_func_string = "range_between"
131
142
  lower_name, lower = parse_frame_boundary(
132
143
  exp.window.frame_spec.lower, is_upper=False
133
144
  )
@@ -138,9 +149,11 @@ def map_window_function(
138
149
  lower != snowpark.Window.UNBOUNDED_PRECEDING
139
150
  or upper != snowpark.Window.CURRENT_ROW
140
151
  ):
141
- raise SparkException.invalid_ranking_function_window_frame(
152
+ exception = SparkException.invalid_ranking_function_window_frame(
142
153
  window_frame=f"specifiedwindowframe(RowFrame, {lower_name}, {upper_name})"
143
154
  )
155
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
156
+ raise exception
144
157
 
145
158
  is_unbounded = (
146
159
  lower == snowpark.Window.UNBOUNDED_PRECEDING
@@ -165,9 +178,11 @@ def map_window_function(
165
178
  orders = orders[:1]
166
179
 
167
180
  if proto_func_name in SPARK_RANKING_FUNCTIONS:
168
- raise SparkException.invalid_ranking_function_window_frame(
181
+ exception = SparkException.invalid_ranking_function_window_frame(
169
182
  window_frame=f"specifiedwindowframe(RangeFrame, {lower_name}, {upper_name})"
170
183
  )
184
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
185
+ raise exception
171
186
 
172
187
  is_unbounded = (
173
188
  lower == snowpark.Window.UNBOUNDED_PRECEDING
@@ -49,7 +49,7 @@ def main(infile: IO, outfile: IO) -> None:
49
49
  )
50
50
 
51
51
  spark_connect_session = SparkSession.builder.remote(connect_url).getOrCreate()
52
- spark_connect_session._client._session_id = session_id # type: ignore[attr-defined]
52
+ spark_connect_session._client.session_id = session_id # type: ignore[attr-defined]
53
53
 
54
54
  # TODO(SPARK-44460): Pass credentials.
55
55
  # TODO(SPARK-44461): Enable Process Isolation
@@ -57,7 +57,7 @@ def main(infile: IO, outfile: IO) -> None:
57
57
  )
58
58
 
59
59
  spark_connect_session = SparkSession.builder.remote(connect_url).getOrCreate()
60
- spark_connect_session._client._session_id = session_id # type: ignore[attr-defined]
60
+ spark_connect_session._client.session_id = session_id # type: ignore[attr-defined]
61
61
 
62
62
  # TODO(SPARK-44460): Pass credentials.
63
63
  # TODO(SPARK-44461): Enable Process Isolation
@@ -16,7 +16,7 @@ from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_e
16
16
  from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
17
17
 
18
18
 
19
- DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1esnowflake_expression_ext.proto\x12\rsnowflake.ext\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\"\x98\x01\n\x0c\x45xpExtension\x12@\n\x0enamed_argument\x18\x01 \x01(\x0b\x32&.snowflake.ext.NamedArgumentExpressionH\x00\x12@\n\x13subquery_expression\x18\x02 \x01(\x0b\x32!.snowflake.ext.SubqueryExpressionH\x00\x42\x04\n\x02op\"P\n\x17NamedArgumentExpression\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression\"\xf4\x04\n\x12SubqueryExpression\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x45\n\rsubquery_type\x18\x02 \x01(\x0e\x32..snowflake.ext.SubqueryExpression.SubqueryType\x12Q\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.snowflake.ext.SubqueryExpression.TableArgOptionsH\x00\x88\x01\x01\x12\x35\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x1a\xbb\x01\n\x0fTableArgOptions\x12\x31\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x37\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrder\x12\"\n\x15with_single_partition\x18\x03 \x01(\x08H\x00\x88\x01\x01\x42\x18\n\x16_with_single_partition\"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_optionsb\x06proto3')
19
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1esnowflake_expression_ext.proto\x12\rsnowflake.ext\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\"\xde\x01\n\x0c\x45xpExtension\x12@\n\x0enamed_argument\x18\x01 \x01(\x0b\x32&.snowflake.ext.NamedArgumentExpressionH\x00\x12@\n\x13subquery_expression\x18\x02 \x01(\x0b\x32!.snowflake.ext.SubqueryExpressionH\x00\x12\x44\n\x10interval_literal\x18\x03 \x01(\x0b\x32(.snowflake.ext.IntervalLiteralExpressionH\x00\x42\x04\n\x02op\"P\n\x17NamedArgumentExpression\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.Expression\"\xf4\x04\n\x12SubqueryExpression\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x45\n\rsubquery_type\x18\x02 \x01(\x0e\x32..snowflake.ext.SubqueryExpression.SubqueryType\x12Q\n\x11table_arg_options\x18\x03 \x01(\x0b\x32\x31.snowflake.ext.SubqueryExpression.TableArgOptionsH\x00\x88\x01\x01\x12\x35\n\x12in_subquery_values\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x1a\xbb\x01\n\x0fTableArgOptions\x12\x31\n\x0epartition_spec\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x37\n\norder_spec\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrder\x12\"\n\x15with_single_partition\x18\x03 \x01(\x08H\x00\x88\x01\x01\x42\x18\n\x16_with_single_partition\"\x90\x01\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x12\x1b\n\x17SUBQUERY_TYPE_TABLE_ARG\x10\x03\x12\x14\n\x10SUBQUERY_TYPE_IN\x10\x04\x42\x14\n\x12_table_arg_options\"\x9f\x01\n\x19IntervalLiteralExpression\x12\x32\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.Literal\x12\x18\n\x0bstart_field\x18\x02 \x01(\x05H\x00\x88\x01\x01\x12\x16\n\tend_field\x18\x03 \x01(\x05H\x01\x88\x01\x01\x42\x0e\n\x0c_start_fieldB\x0c\n\n_end_fieldb\x06proto3')
20
20
 
21
21
  _globals = globals()
22
22
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -24,13 +24,15 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'snowflake_expression_ext_pb
24
24
  if _descriptor._USE_C_DESCRIPTORS == False:
25
25
  DESCRIPTOR._options = None
26
26
  _globals['_EXPEXTENSION']._serialized_start=114
27
- _globals['_EXPEXTENSION']._serialized_end=266
28
- _globals['_NAMEDARGUMENTEXPRESSION']._serialized_start=268
29
- _globals['_NAMEDARGUMENTEXPRESSION']._serialized_end=348
30
- _globals['_SUBQUERYEXPRESSION']._serialized_start=351
31
- _globals['_SUBQUERYEXPRESSION']._serialized_end=979
32
- _globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_start=623
33
- _globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_end=810
34
- _globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_start=813
35
- _globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_end=957
27
+ _globals['_EXPEXTENSION']._serialized_end=336
28
+ _globals['_NAMEDARGUMENTEXPRESSION']._serialized_start=338
29
+ _globals['_NAMEDARGUMENTEXPRESSION']._serialized_end=418
30
+ _globals['_SUBQUERYEXPRESSION']._serialized_start=421
31
+ _globals['_SUBQUERYEXPRESSION']._serialized_end=1049
32
+ _globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_start=693
33
+ _globals['_SUBQUERYEXPRESSION_TABLEARGOPTIONS']._serialized_end=880
34
+ _globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_start=883
35
+ _globals['_SUBQUERYEXPRESSION_SUBQUERYTYPE']._serialized_end=1027
36
+ _globals['_INTERVALLITERALEXPRESSION']._serialized_start=1052
37
+ _globals['_INTERVALLITERALEXPRESSION']._serialized_end=1211
36
38
  # @@protoc_insertion_point(module_scope)
@@ -9,12 +9,14 @@ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Map
9
9
  DESCRIPTOR: _descriptor.FileDescriptor
10
10
 
11
11
  class ExpExtension(_message.Message):
12
- __slots__ = ("named_argument", "subquery_expression")
12
+ __slots__ = ("named_argument", "subquery_expression", "interval_literal")
13
13
  NAMED_ARGUMENT_FIELD_NUMBER: _ClassVar[int]
14
14
  SUBQUERY_EXPRESSION_FIELD_NUMBER: _ClassVar[int]
15
+ INTERVAL_LITERAL_FIELD_NUMBER: _ClassVar[int]
15
16
  named_argument: NamedArgumentExpression
16
17
  subquery_expression: SubqueryExpression
17
- def __init__(self, named_argument: _Optional[_Union[NamedArgumentExpression, _Mapping]] = ..., subquery_expression: _Optional[_Union[SubqueryExpression, _Mapping]] = ...) -> None: ...
18
+ interval_literal: IntervalLiteralExpression
19
+ def __init__(self, named_argument: _Optional[_Union[NamedArgumentExpression, _Mapping]] = ..., subquery_expression: _Optional[_Union[SubqueryExpression, _Mapping]] = ..., interval_literal: _Optional[_Union[IntervalLiteralExpression, _Mapping]] = ...) -> None: ...
18
20
 
19
21
  class NamedArgumentExpression(_message.Message):
20
22
  __slots__ = ("key", "value")
@@ -56,3 +58,13 @@ class SubqueryExpression(_message.Message):
56
58
  table_arg_options: SubqueryExpression.TableArgOptions
57
59
  in_subquery_values: _containers.RepeatedCompositeFieldContainer[_expressions_pb2.Expression]
58
60
  def __init__(self, input: _Optional[_Union[_relations_pb2.Relation, _Mapping]] = ..., subquery_type: _Optional[_Union[SubqueryExpression.SubqueryType, str]] = ..., table_arg_options: _Optional[_Union[SubqueryExpression.TableArgOptions, _Mapping]] = ..., in_subquery_values: _Optional[_Iterable[_Union[_expressions_pb2.Expression, _Mapping]]] = ...) -> None: ...
61
+
62
+ class IntervalLiteralExpression(_message.Message):
63
+ __slots__ = ("literal", "start_field", "end_field")
64
+ LITERAL_FIELD_NUMBER: _ClassVar[int]
65
+ START_FIELD_FIELD_NUMBER: _ClassVar[int]
66
+ END_FIELD_FIELD_NUMBER: _ClassVar[int]
67
+ literal: _expressions_pb2.Expression.Literal
68
+ start_field: int
69
+ end_field: int
70
+ def __init__(self, literal: _Optional[_Union[_expressions_pb2.Expression.Literal, _Mapping]] = ..., start_field: _Optional[int] = ..., end_field: _Optional[int] = ...) -> None: ...
@@ -16,7 +16,7 @@ from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_rel
16
16
  from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
17
17
 
18
18
 
19
- DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1csnowflake_relation_ext.proto\x12\rsnowflake.ext\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto\"\xe3\x02\n\tExtension\x12(\n\x07rdd_map\x18\x01 \x01(\x0b\x32\x15.snowflake.ext.RddMapH\x00\x12.\n\nrdd_reduce\x18\x02 \x01(\x0b\x32\x18.snowflake.ext.RddReduceH\x00\x12G\n\x17subquery_column_aliases\x18\x03 \x01(\x0b\x32$.snowflake.ext.SubqueryColumnAliasesH\x00\x12\x32\n\x0clateral_join\x18\x04 \x01(\x0b\x32\x1a.snowflake.ext.LateralJoinH\x00\x12J\n\x19udtf_with_table_arguments\x18\x05 \x01(\x0b\x32%.snowflake.ext.UDTFWithTableArgumentsH\x00\x12-\n\taggregate\x18\x06 \x01(\x0b\x32\x18.snowflake.ext.AggregateH\x00\x42\x04\n\x02op\">\n\x06RddMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"A\n\tRddReduce\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"P\n\x15SubqueryColumnAliases\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0f\n\x07\x61liases\x18\x02 \x03(\t\"\\\n\x0bLateralJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\"\x98\x01\n\x16UDTFWithTableArguments\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12,\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x39\n\x0ftable_arguments\x18\x03 \x03(\x0b\x32 .snowflake.ext.TableArgumentInfo\"`\n\x11TableArgumentInfo\x12/\n\x0etable_argument\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x1a\n\x12table_argument_idx\x18\x02 \x01(\x05\"\xc7\x05\n\tAggregate\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x36\n\ngroup_type\x18\x02 \x01(\x0e\x32\".snowflake.ext.Aggregate.GroupType\x12\x37\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x38\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12-\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.snowflake.ext.Aggregate.Pivot\x12<\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.snowflake.ext.Aggregate.GroupingSets\x12\x33\n\x10having_condition\x18\x07 \x01(\x0b\x32\x19.spark.connect.Expression\x1a\x62\n\x05Pivot\x12&\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.Expression\x12\x31\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.Literal\x1a?\n\x0cGroupingSets\x12/\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05\x62\x06proto3')
19
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1csnowflake_relation_ext.proto\x12\rsnowflake.ext\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto\"\xe3\x02\n\tExtension\x12(\n\x07rdd_map\x18\x01 \x01(\x0b\x32\x15.snowflake.ext.RddMapH\x00\x12.\n\nrdd_reduce\x18\x02 \x01(\x0b\x32\x18.snowflake.ext.RddReduceH\x00\x12G\n\x17subquery_column_aliases\x18\x03 \x01(\x0b\x32$.snowflake.ext.SubqueryColumnAliasesH\x00\x12\x32\n\x0clateral_join\x18\x04 \x01(\x0b\x32\x1a.snowflake.ext.LateralJoinH\x00\x12J\n\x19udtf_with_table_arguments\x18\x05 \x01(\x0b\x32%.snowflake.ext.UDTFWithTableArgumentsH\x00\x12-\n\taggregate\x18\x06 \x01(\x0b\x32\x18.snowflake.ext.AggregateH\x00\x42\x04\n\x02op\">\n\x06RddMap\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"A\n\tRddReduce\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0c\n\x04\x66unc\x18\x02 \x01(\x0c\"P\n\x15SubqueryColumnAliases\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x0f\n\x07\x61liases\x18\x02 \x03(\t\"\\\n\x0bLateralJoin\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.Relation\"\x98\x01\n\x16UDTFWithTableArguments\x12\x15\n\rfunction_name\x18\x01 \x01(\t\x12,\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x39\n\x0ftable_arguments\x18\x03 \x03(\x0b\x32 .snowflake.ext.TableArgumentInfo\"`\n\x11TableArgumentInfo\x12/\n\x0etable_argument\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x1a\n\x12table_argument_idx\x18\x02 \x01(\x05\"\xbf\x06\n\tAggregate\x12&\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.Relation\x12\x36\n\ngroup_type\x18\x02 \x01(\x0e\x32\".snowflake.ext.Aggregate.GroupType\x12\x37\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.Expression\x12\x38\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.Expression\x12-\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.snowflake.ext.Aggregate.Pivot\x12<\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.snowflake.ext.Aggregate.GroupingSets\x12\x33\n\x10having_condition\x18\x07 \x01(\x0b\x32\x19.spark.connect.Expression\x1a\xd9\x01\n\x05Pivot\x12\x30\n\rpivot_columns\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\x12?\n\x0cpivot_values\x18\x02 \x03(\x0b\x32).snowflake.ext.Aggregate.Pivot.PivotValue\x1a]\n\nPivotValue\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32!.spark.connect.Expression.Literal\x12\x12\n\x05\x61lias\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_alias\x1a?\n\x0cGroupingSets\x12/\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.Expression\"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05\x62\x06proto3')
20
20
 
21
21
  _globals = globals()
22
22
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -38,11 +38,13 @@ if _descriptor._USE_C_DESCRIPTORS == False:
38
38
  _globals['_TABLEARGUMENTINFO']._serialized_start=931
39
39
  _globals['_TABLEARGUMENTINFO']._serialized_end=1027
40
40
  _globals['_AGGREGATE']._serialized_start=1030
41
- _globals['_AGGREGATE']._serialized_end=1741
42
- _globals['_AGGREGATE_PIVOT']._serialized_start=1416
43
- _globals['_AGGREGATE_PIVOT']._serialized_end=1514
44
- _globals['_AGGREGATE_GROUPINGSETS']._serialized_start=1516
45
- _globals['_AGGREGATE_GROUPINGSETS']._serialized_end=1579
46
- _globals['_AGGREGATE_GROUPTYPE']._serialized_start=1582
47
- _globals['_AGGREGATE_GROUPTYPE']._serialized_end=1741
41
+ _globals['_AGGREGATE']._serialized_end=1861
42
+ _globals['_AGGREGATE_PIVOT']._serialized_start=1417
43
+ _globals['_AGGREGATE_PIVOT']._serialized_end=1634
44
+ _globals['_AGGREGATE_PIVOT_PIVOTVALUE']._serialized_start=1541
45
+ _globals['_AGGREGATE_PIVOT_PIVOTVALUE']._serialized_end=1634
46
+ _globals['_AGGREGATE_GROUPINGSETS']._serialized_start=1636
47
+ _globals['_AGGREGATE_GROUPINGSETS']._serialized_end=1699
48
+ _globals['_AGGREGATE_GROUPTYPE']._serialized_start=1702
49
+ _globals['_AGGREGATE_GROUPTYPE']._serialized_end=1861
48
50
  # @@protoc_insertion_point(module_scope)
@@ -91,12 +91,19 @@ class Aggregate(_message.Message):
91
91
  GROUP_TYPE_PIVOT: Aggregate.GroupType
92
92
  GROUP_TYPE_GROUPING_SETS: Aggregate.GroupType
93
93
  class Pivot(_message.Message):
94
- __slots__ = ("col", "values")
95
- COL_FIELD_NUMBER: _ClassVar[int]
96
- VALUES_FIELD_NUMBER: _ClassVar[int]
97
- col: _expressions_pb2.Expression
98
- values: _containers.RepeatedCompositeFieldContainer[_expressions_pb2.Expression.Literal]
99
- def __init__(self, col: _Optional[_Union[_expressions_pb2.Expression, _Mapping]] = ..., values: _Optional[_Iterable[_Union[_expressions_pb2.Expression.Literal, _Mapping]]] = ...) -> None: ...
94
+ __slots__ = ("pivot_columns", "pivot_values")
95
+ class PivotValue(_message.Message):
96
+ __slots__ = ("values", "alias")
97
+ VALUES_FIELD_NUMBER: _ClassVar[int]
98
+ ALIAS_FIELD_NUMBER: _ClassVar[int]
99
+ values: _containers.RepeatedCompositeFieldContainer[_expressions_pb2.Expression.Literal]
100
+ alias: str
101
+ def __init__(self, values: _Optional[_Iterable[_Union[_expressions_pb2.Expression.Literal, _Mapping]]] = ..., alias: _Optional[str] = ...) -> None: ...
102
+ PIVOT_COLUMNS_FIELD_NUMBER: _ClassVar[int]
103
+ PIVOT_VALUES_FIELD_NUMBER: _ClassVar[int]
104
+ pivot_columns: _containers.RepeatedCompositeFieldContainer[_expressions_pb2.Expression]
105
+ pivot_values: _containers.RepeatedCompositeFieldContainer[Aggregate.Pivot.PivotValue]
106
+ def __init__(self, pivot_columns: _Optional[_Iterable[_Union[_expressions_pb2.Expression, _Mapping]]] = ..., pivot_values: _Optional[_Iterable[_Union[Aggregate.Pivot.PivotValue, _Mapping]]] = ...) -> None: ...
100
107
  class GroupingSets(_message.Message):
101
108
  __slots__ = ("grouping_set",)
102
109
  GROUPING_SET_FIELD_NUMBER: _ClassVar[int]