snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -21,11 +21,16 @@ from snowflake.snowpark._internal.utils import (
21
21
  )
22
22
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
23
23
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
24
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
25
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
24
26
  from snowflake.snowpark_connect.execute_plan.utils import (
25
27
  arrow_table_to_arrow_bytes,
26
28
  pandas_to_arrow_batches_bytes,
27
29
  )
28
30
  from snowflake.snowpark_connect.relation.map_relation import map_relation
31
+ from snowflake.snowpark_connect.relation.read.metadata_utils import (
32
+ without_internal_columns,
33
+ )
29
34
  from snowflake.snowpark_connect.type_mapping import (
30
35
  map_snowpark_types_to_pyarrow_types,
31
36
  snowpark_to_proto_type,
@@ -53,7 +58,9 @@ def sproc_connector_fetch_arrow_batches_fix(self) -> Iterator[Table]:
53
58
  if self._prefetch_hook is not None:
54
59
  self._prefetch_hook()
55
60
  if self._query_result_format != "arrow":
56
- raise NotSupportedError
61
+ exception = NotSupportedError()
62
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
63
+ raise exception
57
64
  return self._result_set._fetch_arrow_batches()
58
65
 
59
66
 
@@ -92,14 +99,19 @@ def map_execution_root(
92
99
  ) -> Iterator[proto_base.ExecutePlanResponse | QueryResult]:
93
100
  result: DataFrameContainer | pandas.DataFrame = map_relation(request.plan.root)
94
101
  if isinstance(result, pandas.DataFrame):
95
- result_df = result
102
+ pandas_df = result
103
+ data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
104
+ row_count = len(pandas_df)
105
+ schema = None
106
+ yield _build_execute_plan_response(row_count, data_bytes, schema, request)
96
107
  else:
97
- result_df = result.dataframe
98
-
99
- if isinstance(result_df, snowpark.DataFrame):
100
- snowpark_schema = result_df.schema
101
- schema = snowpark_to_proto_type(snowpark_schema, result.column_map, result_df)
102
- spark_columns = result.column_map.get_spark_columns()
108
+ filtered_result = without_internal_columns(result)
109
+ filtered_result_df = filtered_result.dataframe
110
+ snowpark_schema = filtered_result_df.schema
111
+ schema = snowpark_to_proto_type(
112
+ snowpark_schema, filtered_result.column_map, filtered_result_df
113
+ )
114
+ spark_columns = filtered_result.column_map.get_spark_columns()
103
115
  if tcm.TCM_MODE:
104
116
  # TCM result handling:
105
117
  # - small result (only one batch): just return the executePlanResponse
@@ -108,22 +120,22 @@ def map_execution_root(
108
120
  is_large_result = False
109
121
  second_batch = False
110
122
  first_arrow_table = None
111
- with result_df.session.query_history() as qh:
112
- for arrow_table in to_arrow_batch_iter(result_df):
123
+ with filtered_result_df.session.query_history() as qh:
124
+ for arrow_table in to_arrow_batch_iter(filtered_result_df):
113
125
  if second_batch:
114
126
  is_large_result = True
115
127
  break
116
128
  first_arrow_table = arrow_table
117
129
  second_batch = True
118
130
  queries_cnt = len(
119
- result_df._plan.execution_queries[PlanQueryType.QUERIES]
131
+ filtered_result_df._plan.execution_queries[PlanQueryType.QUERIES]
120
132
  )
121
133
  # get query uuid from the last query; this may not be the last queries in query history because snowpark
122
134
  # may run some post action queries, e.g., drop temp table.
123
135
  query_id = qh.queries[queries_cnt - 1].query_id
124
136
  if first_arrow_table is None:
125
137
  # empty arrow batch iterator
126
- pandas_df = result_df.to_pandas()
138
+ pandas_df = filtered_result_df.to_pandas()
127
139
  data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
128
140
  yield _build_execute_plan_response(0, data_bytes, schema, request)
129
141
  elif not tcm.TCM_RETURN_QUERY_ID_FOR_SMALL_RESULT and not is_large_result:
@@ -150,7 +162,7 @@ def map_execution_root(
150
162
  spark_schema.SerializeToString(),
151
163
  )
152
164
  else:
153
- arrow_table_iter = to_arrow_batch_iter(result_df)
165
+ arrow_table_iter = to_arrow_batch_iter(filtered_result_df)
154
166
  batch_count = 0
155
167
  for arrow_table in arrow_table_iter:
156
168
  if arrow_table.num_rows > 0:
@@ -166,12 +178,6 @@ def map_execution_root(
166
178
 
167
179
  # Empty result needs special processing
168
180
  if batch_count == 0:
169
- pandas_df = result_df.to_pandas()
181
+ pandas_df = filtered_result_df.to_pandas()
170
182
  data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
171
183
  yield _build_execute_plan_response(0, data_bytes, schema, request)
172
- else:
173
- pandas_df = result_df
174
- data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
175
- row_count = len(pandas_df)
176
- schema = None
177
- yield _build_execute_plan_response(row_count, data_bytes, schema, request)
@@ -8,6 +8,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
8
  from pyspark.sql.pandas.types import _dedup_names
9
9
 
10
10
  from snowflake.snowpark import types as sf_types
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.type_mapping import map_snowpark_types_to_pyarrow_types
12
14
  from snowflake.snowpark_connect.utils.telemetry import (
13
15
  SnowparkConnectNotImplementedError,
@@ -88,9 +90,11 @@ def is_streaming(rel: relation_proto.Relation) -> bool:
88
90
  case "html_string":
89
91
  return is_streaming(rel.html_string.input)
90
92
  case "cached_remote_relation":
91
- raise SnowparkConnectNotImplementedError(
93
+ exception = SnowparkConnectNotImplementedError(
92
94
  "Cached remote relation not implemented"
93
95
  )
96
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
97
+ raise exception
94
98
  case "common_inline_user_defined_table_function":
95
99
  return is_streaming(rel.common_inline_user_defined_table_function.input)
96
100
  case "fill_na":
@@ -0,0 +1,28 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ import snowflake.snowpark.functions as snowpark_fn
6
+ from snowflake.snowpark.column import Column
7
+ from snowflake.snowpark.types import DataType, StringType
8
+
9
+
10
+ def raise_error_helper(return_type: DataType, error_class=None):
11
+ error_class_str = (
12
+ f":{error_class.__name__}"
13
+ if error_class and hasattr(error_class, "__name__")
14
+ else ""
15
+ )
16
+
17
+ def _raise_fn(*msgs: Column) -> Column:
18
+ return snowpark_fn.cast(
19
+ snowpark_fn.abs(
20
+ snowpark_fn.concat(
21
+ snowpark_fn.lit(f"[snowpark-connect-exception{error_class_str}]"),
22
+ *(msg.try_cast(StringType()) for msg in msgs),
23
+ )
24
+ ).cast(StringType()),
25
+ return_type,
26
+ )
27
+
28
+ return _raise_fn
@@ -7,6 +7,9 @@ from typing import Any
7
7
  import pyspark.sql.connect.proto.expressions_pb2 as expressions_pb2
8
8
  import pyspark.sql.connect.proto.types_pb2 as types_pb2
9
9
 
10
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
11
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
12
+
10
13
 
11
14
  @dataclass(frozen=True)
12
15
  class DefaultParameter:
@@ -154,7 +157,9 @@ def _create_literal_expression(value: Any) -> expressions_pb2.Expression:
154
157
  null_type.null.SetInParent()
155
158
  expr.literal.null.CopyFrom(null_type)
156
159
  else:
157
- raise ValueError(f"Unsupported literal type: {value}")
160
+ exception = ValueError(f"Unsupported literal type: {value}")
161
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
162
+ raise exception
158
163
 
159
164
  return expr
160
165
 
@@ -189,11 +194,13 @@ def inject_function_defaults(
189
194
 
190
195
  # Check if any required params are missing.
191
196
  if missing_arg_count > len(defaults):
192
- raise ValueError(
197
+ exception = ValueError(
193
198
  f"Function '{function_name}' is missing required arguments. "
194
199
  f"Expected {total_args} args, got {current_arg_count}, "
195
200
  f"but only {len(defaults)} defaults are defined."
196
201
  )
202
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
203
+ raise exception
197
204
 
198
205
  defaults_to_append = defaults[-missing_arg_count:]
199
206
  injected = False
@@ -18,6 +18,9 @@ from snowflake import snowpark
18
18
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
19
19
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
20
20
  from snowflake.snowpark_connect.typed_column import TypedColumn
21
+ from snowflake.snowpark_connect.utils.identifiers import (
22
+ split_fully_qualified_spark_name,
23
+ )
21
24
 
22
25
 
23
26
  class HybridColumnMap:
@@ -119,9 +122,11 @@ class HybridColumnMap:
119
122
  # Handle column references
120
123
  if expr_type == "unresolved_attribute":
121
124
  column_name = exp.unresolved_attribute.unparsed_identifier
125
+ name_parts = split_fully_qualified_spark_name(column_name)
126
+ alias_column_name = name_parts[0]
122
127
 
123
128
  # Check if it's an alias to an existing aggregate expression
124
- if column_name in self.aggregate_aliases:
129
+ if alias_column_name in self.aggregate_aliases:
125
130
  # Use the aggregated context to get the alias
126
131
  return map_expression(
127
132
  exp, self.aggregated_column_map, self.aggregated_typer
@@ -148,14 +153,15 @@ class HybridColumnMap:
148
153
  exp, self.aggregated_column_map, self.aggregated_typer
149
154
  )
150
155
 
151
- # For other expression types, try aggregated context first (likely references to computed values)
152
156
  try:
157
+ # 1. Evaluate the expression using the input grouping columns. i.e input_df.
158
+ # If not found, use the aggregate alias.
159
+ return map_expression(exp, self.input_column_map, self.input_typer)
160
+ except Exception:
161
+ # Fall back to input context
153
162
  return map_expression(
154
163
  exp, self.aggregated_column_map, self.aggregated_typer
155
164
  )
156
- except Exception:
157
- # Fall back to input context
158
- return map_expression(exp, self.input_column_map, self.input_typer)
159
165
 
160
166
 
161
167
  def create_hybrid_column_map_for_having(
@@ -190,3 +196,45 @@ def create_hybrid_column_map_for_having(
190
196
  grouping_expressions=grouping_expressions,
191
197
  aggregate_aliases=aggregate_aliases,
192
198
  )
199
+
200
+
201
+ def create_hybrid_column_map_for_order_by(
202
+ aggregate_metadata, # AggregateMetadata type
203
+ aggregated_df: snowpark.DataFrame,
204
+ aggregated_column_map: ColumnNameMap,
205
+ ) -> HybridColumnMap:
206
+ """
207
+ Create a HybridColumnMap instance for ORDER BY clause resolution after aggregation.
208
+
209
+ This is similar to HAVING clause resolution - ORDER BY can reference:
210
+ 1. Grouping columns (e.g., year, a)
211
+ 2. Aggregate aliases (e.g., cnt)
212
+ 3. Expressions on grouping columns (e.g., year(date) where date is pre-aggregation)
213
+
214
+ Args:
215
+ aggregate_metadata: Metadata from the aggregate operation
216
+ aggregated_df: The DataFrame after aggregation
217
+ aggregated_column_map: Column mapping for the aggregated DataFrame
218
+
219
+ Returns:
220
+ HybridColumnMap for resolving ORDER BY expressions
221
+ """
222
+ # Create typers for both contexts
223
+ input_typer = ExpressionTyper(aggregate_metadata.input_dataframe)
224
+ aggregated_typer = ExpressionTyper(aggregated_df)
225
+
226
+ # Build alias mapping from spark column names to aggregate expressions
227
+ aggregate_aliases = {}
228
+ for i, (spark_name, _) in enumerate(aggregate_metadata.raw_aggregations):
229
+ if i < len(aggregate_metadata.aggregate_expressions):
230
+ aggregate_aliases[spark_name] = aggregate_metadata.aggregate_expressions[i]
231
+
232
+ return HybridColumnMap(
233
+ input_column_map=aggregate_metadata.input_column_map,
234
+ input_typer=input_typer,
235
+ aggregated_column_map=aggregated_column_map,
236
+ aggregated_typer=aggregated_typer,
237
+ aggregate_expressions=aggregate_metadata.aggregate_expressions,
238
+ grouping_expressions=aggregate_metadata.grouping_expressions,
239
+ aggregate_aliases=aggregate_aliases,
240
+ )
@@ -0,0 +1,219 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from pyspark.errors.exceptions.base import ArithmeticException
6
+
7
+ import snowflake.snowpark.functions as snowpark_fn
8
+ from snowflake.snowpark.column import Column
9
+ from snowflake.snowpark.types import (
10
+ ByteType,
11
+ DataType,
12
+ IntegerType,
13
+ LongType,
14
+ ShortType,
15
+ StringType,
16
+ )
17
+ from snowflake.snowpark_connect.config import global_config
18
+ from snowflake.snowpark_connect.expression.error_utils import raise_error_helper
19
+
20
+
21
+ def get_integral_type_bounds(typ: DataType) -> tuple[int, int]:
22
+ if isinstance(typ, ByteType):
23
+ return (-128, 127)
24
+ elif isinstance(typ, ShortType):
25
+ return (-32768, 32767)
26
+ elif isinstance(typ, IntegerType):
27
+ return (-2147483648, 2147483647)
28
+ elif isinstance(typ, LongType):
29
+ return (-9223372036854775808, 9223372036854775807)
30
+ else:
31
+ raise ValueError(f"Unsupported integral type: {typ}")
32
+
33
+
34
+ def apply_integral_overflow(col: Column, to_type: DataType) -> Column:
35
+ if not global_config.snowpark_connect_handleIntegralOverflow:
36
+ return col.cast(to_type)
37
+
38
+ min_val, max_val = get_integral_type_bounds(to_type)
39
+ range_size = max_val - min_val + 1
40
+
41
+ offset_value = col - snowpark_fn.lit(min_val)
42
+ wrapped_offset = snowpark_fn.function("MOD")(
43
+ offset_value, snowpark_fn.lit(range_size)
44
+ )
45
+
46
+ wrapped_offset = snowpark_fn.when(
47
+ wrapped_offset < 0, wrapped_offset + snowpark_fn.lit(range_size)
48
+ ).otherwise(wrapped_offset)
49
+
50
+ wrapped_result = wrapped_offset + snowpark_fn.lit(min_val)
51
+
52
+ return snowpark_fn.when(
53
+ (col >= snowpark_fn.lit(min_val)) & (col <= snowpark_fn.lit(max_val)),
54
+ col.cast(to_type),
55
+ ).otherwise(wrapped_result.cast(to_type))
56
+
57
+
58
+ def apply_fractional_to_integral_cast(col: Column, to_type: DataType) -> Column:
59
+ if not global_config.snowpark_connect_handleIntegralOverflow:
60
+ return col.cast(to_type)
61
+
62
+ min_val, max_val = get_integral_type_bounds(to_type)
63
+
64
+ clamped = (
65
+ snowpark_fn.when(col > snowpark_fn.lit(max_val), snowpark_fn.lit(max_val))
66
+ .when(col < snowpark_fn.lit(min_val), snowpark_fn.lit(min_val))
67
+ .otherwise(col)
68
+ )
69
+
70
+ return clamped.cast(to_type)
71
+
72
+
73
+ def apply_integral_overflow_with_ansi_check(
74
+ col: Column, to_type: DataType, ansi_enabled: bool
75
+ ) -> Column:
76
+ if not global_config.snowpark_connect_handleIntegralOverflow:
77
+ return col.cast(to_type)
78
+
79
+ if not ansi_enabled:
80
+ return apply_integral_overflow(col, to_type)
81
+
82
+ min_val, max_val = get_integral_type_bounds(to_type)
83
+ type_name = to_type.typeName().upper()
84
+
85
+ raise_error = raise_error_helper(to_type, ArithmeticException)
86
+
87
+ return snowpark_fn.when(
88
+ (col < snowpark_fn.lit(min_val)) | (col > snowpark_fn.lit(max_val)),
89
+ raise_error(
90
+ snowpark_fn.lit("[CAST_OVERFLOW] The value "),
91
+ col.cast(StringType()),
92
+ snowpark_fn.lit(
93
+ f" of the type BIGINT cannot be cast to {type_name} due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead."
94
+ ),
95
+ ),
96
+ ).otherwise(col.cast(to_type))
97
+
98
+
99
+ def apply_fractional_to_integral_cast_with_ansi_check(
100
+ col: Column, to_type: DataType, ansi_enabled: bool
101
+ ) -> Column:
102
+ if not global_config.snowpark_connect_handleIntegralOverflow:
103
+ return col.cast(to_type)
104
+
105
+ if not ansi_enabled:
106
+ return apply_fractional_to_integral_cast(col, to_type)
107
+
108
+ min_val, max_val = get_integral_type_bounds(to_type)
109
+ type_name = to_type.typeName().upper()
110
+
111
+ raise_error = raise_error_helper(to_type, ArithmeticException)
112
+
113
+ return snowpark_fn.when(
114
+ (col < snowpark_fn.lit(min_val)) | (col > snowpark_fn.lit(max_val)),
115
+ raise_error(
116
+ snowpark_fn.lit("[CAST_OVERFLOW] The value "),
117
+ col.cast(StringType()),
118
+ snowpark_fn.lit(
119
+ f" of the type DOUBLE cannot be cast to {type_name} "
120
+ f"due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead."
121
+ ),
122
+ ),
123
+ ).otherwise(col.cast(to_type))
124
+
125
+
126
+ def apply_arithmetic_overflow_with_ansi_check(
127
+ result_col: Column, result_type: DataType, ansi_enabled: bool, operation_name: str
128
+ ) -> Column:
129
+ if not global_config.snowpark_connect_handleIntegralOverflow:
130
+ return result_col.cast(result_type)
131
+
132
+ if not ansi_enabled:
133
+ return apply_integral_overflow(result_col, result_type)
134
+
135
+ min_val, max_val = get_integral_type_bounds(result_type)
136
+
137
+ raise_error = raise_error_helper(result_type, ArithmeticException)
138
+
139
+ return snowpark_fn.when(
140
+ (result_col < snowpark_fn.lit(min_val))
141
+ | (result_col > snowpark_fn.lit(max_val)),
142
+ raise_error(
143
+ snowpark_fn.lit(
144
+ f"[ARITHMETIC_OVERFLOW] {operation_name} overflow. "
145
+ f"Use 'try_{operation_name.lower()}' to tolerate overflow and return NULL instead. "
146
+ f'If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
147
+ ),
148
+ ),
149
+ ).otherwise(result_col.cast(result_type))
150
+
151
+
152
+ def apply_unary_overflow(value_col: Column, result_type: DataType) -> Column:
153
+ if not global_config.snowpark_connect_handleIntegralOverflow:
154
+ return (value_col * snowpark_fn.lit(-1)).cast(result_type)
155
+
156
+ min_val, _ = get_integral_type_bounds(result_type)
157
+ return snowpark_fn.when(
158
+ value_col == snowpark_fn.lit(min_val),
159
+ snowpark_fn.lit(min_val).cast(result_type),
160
+ ).otherwise((value_col * snowpark_fn.lit(-1)).cast(result_type))
161
+
162
+
163
+ def apply_unary_overflow_with_ansi_check(
164
+ value_col: Column, result_type: DataType, ansi_enabled: bool, operation_name: str
165
+ ) -> Column:
166
+ if not global_config.snowpark_connect_handleIntegralOverflow:
167
+ return (value_col * snowpark_fn.lit(-1)).cast(result_type)
168
+
169
+ if not ansi_enabled:
170
+ return apply_unary_overflow(value_col, result_type)
171
+
172
+ min_val, _ = get_integral_type_bounds(result_type)
173
+
174
+ raise_error = raise_error_helper(result_type, ArithmeticException)
175
+
176
+ return snowpark_fn.when(
177
+ value_col == snowpark_fn.lit(min_val),
178
+ raise_error(
179
+ snowpark_fn.lit(
180
+ f"[ARITHMETIC_OVERFLOW] {operation_name} overflow. "
181
+ f'If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
182
+ ),
183
+ ),
184
+ ).otherwise((value_col * snowpark_fn.lit(-1)).cast(result_type))
185
+
186
+
187
+ def apply_abs_overflow(value_col: Column, result_type: DataType) -> Column:
188
+ if not global_config.snowpark_connect_handleIntegralOverflow:
189
+ return snowpark_fn.abs(value_col).cast(result_type)
190
+
191
+ min_val, _ = get_integral_type_bounds(result_type)
192
+ return snowpark_fn.when(
193
+ value_col == snowpark_fn.lit(min_val),
194
+ snowpark_fn.lit(min_val).cast(result_type),
195
+ ).otherwise(snowpark_fn.abs(value_col).cast(result_type))
196
+
197
+
198
+ def apply_abs_overflow_with_ansi_check(
199
+ value_col: Column, result_type: DataType, ansi_enabled: bool
200
+ ) -> Column:
201
+ if not global_config.snowpark_connect_handleIntegralOverflow:
202
+ return snowpark_fn.abs(value_col).cast(result_type)
203
+
204
+ if not ansi_enabled:
205
+ return apply_abs_overflow(value_col, result_type)
206
+
207
+ min_val, _ = get_integral_type_bounds(result_type)
208
+
209
+ raise_error = raise_error_helper(result_type, ArithmeticException)
210
+
211
+ return snowpark_fn.when(
212
+ value_col == snowpark_fn.lit(min_val),
213
+ raise_error(
214
+ snowpark_fn.lit(
215
+ "[ARITHMETIC_OVERFLOW] abs overflow. "
216
+ 'If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
217
+ ),
218
+ ),
219
+ ).otherwise(snowpark_fn.abs(value_col).cast(result_type))
@@ -10,7 +10,8 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
10
10
  from tzlocal import get_localzone
11
11
 
12
12
  from snowflake.snowpark_connect.config import global_config
13
- from snowflake.snowpark_connect.utils.context import get_is_evaluating_sql
13
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
14
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
15
  from snowflake.snowpark_connect.utils.telemetry import (
15
16
  SnowparkConnectNotImplementedError,
16
17
  )
@@ -53,20 +54,21 @@ def get_literal_field_and_name(literal: expressions_proto.Expression.Literal):
53
54
  microseconds = literal.timestamp
54
55
  else:
55
56
  microseconds = literal.timestamp_ntz
56
- lit_dt = datetime.datetime.fromtimestamp(
57
- microseconds // 1_000_000
58
- ) + datetime.timedelta(microseconds=microseconds % 1_000_000)
59
- tz_dt = datetime.datetime.fromtimestamp(
60
- microseconds // 1_000_000, tz=local_tz
57
+
58
+ dt_utc = datetime.datetime.fromtimestamp(
59
+ microseconds // 1_000_000, tz=datetime.timezone.utc
61
60
  ) + datetime.timedelta(microseconds=microseconds % 1_000_000)
61
+
62
62
  if t == "timestamp_ntz":
63
- lit_dt = lit_dt.astimezone(datetime.timezone.utc)
64
- tz_dt = tz_dt.astimezone(datetime.timezone.utc)
65
- elif not get_is_evaluating_sql():
63
+ # For timestamp_ntz, display in UTC
64
+ lit_dt = dt_utc.replace(tzinfo=None)
65
+ tz_dt = dt_utc
66
+ else:
67
+ # For timestamp_ltz, always display in session timezone
66
68
  config_tz = global_config.spark_sql_session_timeZone
67
- config_tz = ZoneInfo(config_tz) if config_tz else local_tz
68
- tz_dt = tz_dt.astimezone(config_tz)
69
- lit_dt = lit_dt.astimezone(local_tz)
69
+ display_tz = ZoneInfo(config_tz) if config_tz else local_tz
70
+ tz_dt = dt_utc.astimezone(display_tz)
71
+ lit_dt = tz_dt.replace(tzinfo=None)
70
72
 
71
73
  def _format_timestamp(dt) -> str:
72
74
  without_micros = f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d} {dt.hour:02d}:{dt.minute:02d}:{dt.second:02d}"
@@ -97,7 +99,29 @@ def get_literal_field_and_name(literal: expressions_proto.Expression.Literal):
97
99
  *(get_literal_field_and_name(e) for e in literal.array.elements)
98
100
  )
99
101
  return array_values, f"ARRAY({', '.join(element_names)})"
102
+ case "struct":
103
+ struct_key_names = [
104
+ field.name for field in literal.struct.struct_type.struct.fields
105
+ ]
106
+ struct_values = [
107
+ get_literal_field_and_name(el)[0] for el in literal.struct.elements
108
+ ]
109
+
110
+ struct_dict = dict(zip(struct_key_names, struct_values))
111
+
112
+ struct_elements = [
113
+ item for pair in zip(struct_key_names, struct_values) for item in pair
114
+ ]
115
+
116
+ return (
117
+ struct_dict,
118
+ f"OBJECT_CONSTRUCT_KEEP_NULL({', '.join(str(x) for x in struct_elements)})",
119
+ )
100
120
  case "null" | None:
101
121
  return None, "NULL"
102
122
  case other:
103
- raise SnowparkConnectNotImplementedError(f"Other Literal Type {other}")
123
+ exception = SnowparkConnectNotImplementedError(
124
+ f"Other Literal Type {other}"
125
+ )
126
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
127
+ raise exception