snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -6,17 +6,22 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
  import pyspark.sql.connect.proto.types_pb2 as types_proto
7
7
  from pyspark.errors.exceptions.base import (
8
8
  AnalysisException,
9
+ ArithmeticException,
10
+ IllegalArgumentException,
9
11
  NumberFormatException,
10
12
  SparkRuntimeException,
11
13
  )
12
14
 
13
15
  import snowflake.snowpark.functions as snowpark_fn
16
+ from snowflake.snowpark.column import Column
14
17
  from snowflake.snowpark.types import (
15
18
  BinaryType,
16
19
  BooleanType,
17
20
  DataType,
18
21
  DateType,
22
+ DecimalType,
19
23
  DoubleType,
24
+ FloatType,
20
25
  IntegerType,
21
26
  LongType,
22
27
  MapType,
@@ -25,12 +30,22 @@ from snowflake.snowpark.types import (
25
30
  StructType,
26
31
  TimestampTimeZone,
27
32
  TimestampType,
33
+ YearMonthIntervalType,
28
34
  _FractionalType,
29
35
  _IntegralType,
30
36
  _NumericType,
31
37
  )
32
38
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
33
39
  from snowflake.snowpark_connect.config import global_config
40
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
41
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
42
+ from snowflake.snowpark_connect.expression.error_utils import raise_error_helper
43
+ from snowflake.snowpark_connect.expression.integral_types_support import (
44
+ apply_fractional_to_integral_cast,
45
+ apply_fractional_to_integral_cast_with_ansi_check,
46
+ apply_integral_overflow_with_ansi_check,
47
+ get_integral_type_bounds,
48
+ )
34
49
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
35
50
  from snowflake.snowpark_connect.type_mapping import (
36
51
  map_type_string_to_snowpark_type,
@@ -49,7 +64,7 @@ SYMBOL_FUNCTIONS = {"<", ">", "<=", ">=", "!=", "+", "-", "*", "/", "%", "div"}
49
64
  CAST_FUNCTIONS = {
50
65
  "boolean": types_proto.DataType(boolean=types_proto.DataType.Boolean()),
51
66
  "int": types_proto.DataType(integer=types_proto.DataType.Integer()),
52
- "smallint": types_proto.DataType(integer=types_proto.DataType.Integer()),
67
+ "smallint": types_proto.DataType(short=types_proto.DataType.Short()),
53
68
  "bigint": types_proto.DataType(long=types_proto.DataType.Long()),
54
69
  "tinyint": types_proto.DataType(byte=types_proto.DataType.Byte()),
55
70
  "float": types_proto.DataType(float=types_proto.DataType.Float()),
@@ -87,7 +102,9 @@ def map_cast(
87
102
  to_type = map_type_string_to_snowpark_type(exp.cast.type_str)
88
103
  to_type_str = exp.cast.type_str.upper()
89
104
  case _:
90
- raise ValueError("No type to cast to")
105
+ exception = ValueError("No type to cast to")
106
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
107
+ raise exception
91
108
 
92
109
  from_exp = exp.cast.expr
93
110
  new_name, typed_column = map_single_column_expression(
@@ -230,6 +247,11 @@ def map_cast(
230
247
  case (_, BooleanType()) if isinstance(from_type, _NumericType):
231
248
  result_exp = col.cast(LongType()).cast(to_type)
232
249
 
250
+ case (_IntegralType(), _IntegralType()):
251
+ result_exp = apply_integral_overflow_with_ansi_check(
252
+ col, to_type, spark_sql_ansi_enabled
253
+ )
254
+
233
255
  # binary
234
256
  case (StringType(), BinaryType()):
235
257
  result_exp = snowpark_fn.to_binary(col, "UTF-8")
@@ -265,6 +287,44 @@ def map_cast(
265
287
  result_exp = snowpark_fn.to_varchar(col, "UTF-8")
266
288
 
267
289
  # numeric
290
+ case (_, _) if isinstance(from_type, (FloatType, DoubleType)) and isinstance(
291
+ to_type, _IntegralType
292
+ ):
293
+ truncated = (
294
+ snowpark_fn.when(
295
+ col == snowpark_fn.lit(float("nan")), snowpark_fn.lit(0)
296
+ )
297
+ .when(col < 0, snowpark_fn.ceil(col))
298
+ .otherwise(snowpark_fn.floor(col))
299
+ )
300
+
301
+ if spark_sql_ansi_enabled:
302
+ result_exp = apply_fractional_to_integral_cast_with_ansi_check(
303
+ truncated, to_type, True
304
+ )
305
+ else:
306
+ target_min, target_max = get_integral_type_bounds(to_type)
307
+ result_exp = (
308
+ snowpark_fn.when(
309
+ truncated > snowpark_fn.lit(target_max),
310
+ snowpark_fn.lit(target_max),
311
+ )
312
+ .when(
313
+ truncated < snowpark_fn.lit(target_min),
314
+ snowpark_fn.lit(target_min),
315
+ )
316
+ .otherwise(truncated.cast(to_type))
317
+ )
318
+ case (_, _) if isinstance(from_type, DecimalType) and isinstance(
319
+ to_type, _IntegralType
320
+ ):
321
+ result_exp = snowpark_fn.when(col < 0, snowpark_fn.ceil(col)).otherwise(
322
+ snowpark_fn.floor(col)
323
+ )
324
+ result_exp = result_exp.cast(to_type)
325
+ result_exp = apply_integral_overflow_with_ansi_check(
326
+ result_exp, to_type, spark_sql_ansi_enabled
327
+ )
268
328
  case (_, _) if isinstance(from_type, _FractionalType) and isinstance(
269
329
  to_type, _IntegralType
270
330
  ):
@@ -275,16 +335,49 @@ def map_cast(
275
335
  .when(col < 0, snowpark_fn.ceil(col))
276
336
  .otherwise(snowpark_fn.floor(col))
277
337
  )
278
- result_exp = result_exp.cast(to_type)
338
+ result_exp = apply_fractional_to_integral_cast(result_exp, to_type)
279
339
  case (StringType(), _) if (isinstance(to_type, _IntegralType)):
280
340
  if spark_sql_ansi_enabled:
281
- result_exp = snowpark_fn.cast(col, DoubleType())
341
+ double_val = snowpark_fn.cast(col, DoubleType())
342
+
343
+ target_min, target_max = get_integral_type_bounds(to_type)
344
+ raise_error = raise_error_helper(to_type, NumberFormatException)
345
+ to_type_name = to_type.__class__.__name__.upper().replace("TYPE", "")
346
+
347
+ truncated = snowpark_fn.when(
348
+ double_val < 0, snowpark_fn.ceil(double_val)
349
+ ).otherwise(snowpark_fn.floor(double_val))
350
+
351
+ result_exp = snowpark_fn.when(
352
+ (truncated < snowpark_fn.lit(target_min))
353
+ | (truncated > snowpark_fn.lit(target_max)),
354
+ raise_error(
355
+ snowpark_fn.lit("[CAST_INVALID_INPUT] The value '"),
356
+ col,
357
+ snowpark_fn.lit(
358
+ f'\' of the type "STRING" cannot be cast to "{to_type_name}" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
359
+ ),
360
+ ),
361
+ ).otherwise(truncated.cast(to_type))
282
362
  else:
283
- result_exp = snowpark_fn.try_cast(col, DoubleType())
284
- result_exp = snowpark_fn.when(
285
- result_exp < 0, snowpark_fn.ceil(result_exp)
286
- ).otherwise(snowpark_fn.floor(result_exp))
287
- result_exp = result_exp.cast(to_type)
363
+ double_val = snowpark_fn.try_cast(col, DoubleType())
364
+
365
+ truncated = snowpark_fn.when(
366
+ double_val < 0, snowpark_fn.ceil(double_val)
367
+ ).otherwise(snowpark_fn.floor(double_val))
368
+
369
+ target_min, target_max = get_integral_type_bounds(to_type)
370
+ result_exp = (
371
+ snowpark_fn.when(
372
+ double_val.isNull(), snowpark_fn.lit(None).cast(to_type)
373
+ )
374
+ .when(
375
+ (truncated < snowpark_fn.lit(target_min))
376
+ | (truncated > snowpark_fn.lit(target_max)),
377
+ snowpark_fn.lit(None).cast(to_type),
378
+ )
379
+ .otherwise(truncated.cast(to_type))
380
+ )
288
381
  # https://docs.snowflake.com/en/sql-reference/functions/try_cast Only works on certain types (mostly non-structured ones)
289
382
  case (StringType(), _) if isinstance(to_type, _NumericType) or isinstance(
290
383
  to_type, StringType
@@ -299,10 +392,16 @@ def map_cast(
299
392
  result_exp = snowpark_fn.cast(col, to_type)
300
393
  else:
301
394
  result_exp = snowpark_fn.try_cast(col, to_type)
395
+ case (StringType(), YearMonthIntervalType()):
396
+ result_exp = _cast_string_to_year_month_interval(col, to_type)
397
+ case (YearMonthIntervalType(), StringType()):
398
+ result_exp = _cast_year_month_interval_to_string(col, from_type)
302
399
  case (StringType(), _):
303
- raise AnalysisException(
400
+ exception = AnalysisException(
304
401
  f"""[DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION] Cannot resolve "{col_name}" due to data type mismatch: cannot cast "{snowpark_to_proto_type(from_type, column_mapping)}" to "{exp.cast.type_str.upper()}".;"""
305
402
  )
403
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
404
+ raise exception
306
405
  case _:
307
406
  result_exp = snowpark_fn.cast(col, to_type)
308
407
 
@@ -317,9 +416,11 @@ def sanity_check(
317
416
  """
318
417
 
319
418
  if isinstance(from_type, LongType) and isinstance(to_type, BinaryType):
320
- raise NumberFormatException(
419
+ exception = NumberFormatException(
321
420
  f"""[DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION] Cannot resolve "CAST({value} AS BINARY)" due to data type mismatch: cannot cast "BIGINT" to "BINARY" with ANSI mode on."""
322
421
  )
422
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
423
+ raise exception
323
424
 
324
425
  if (
325
426
  from_type_cast
@@ -329,9 +430,11 @@ def sanity_check(
329
430
  if value is not None:
330
431
  value = value.strip().lower()
331
432
  if value not in {"t", "true", "f", "false", "y", "yes", "n", "no", "0", "1"}:
332
- raise SparkRuntimeException(
433
+ exception = SparkRuntimeException(
333
434
  f"""[CAST_INVALID_INPUT] The value '{value}' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error."""
334
435
  )
436
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
437
+ raise exception
335
438
 
336
439
  raise_cast_failure_exception = False
337
440
  if isinstance(to_type, _IntegralType):
@@ -351,6 +454,112 @@ def sanity_check(
351
454
  except Exception:
352
455
  raise_cast_failure_exception = True
353
456
  if raise_cast_failure_exception:
354
- raise NumberFormatException(
355
- """[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error."""
356
- )
457
+ if not isinstance(from_type, StringType) and isinstance(to_type, _IntegralType):
458
+ from_type_name = from_type.__class__.__name__.upper().replace("TYPE", "")
459
+ to_type_name = to_type.__class__.__name__.upper().replace("TYPE", "")
460
+ value_suffix = "L" if isinstance(from_type, LongType) else ""
461
+ exception = ArithmeticException(
462
+ f"""[CAST_OVERFLOW] The value {value}{value_suffix} of the type "{from_type_name}" cannot be cast to "{to_type_name}" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error."""
463
+ )
464
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
465
+ else:
466
+ exception = NumberFormatException(
467
+ """[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error."""
468
+ )
469
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
470
+ raise exception
471
+
472
+
473
+ def _cast_string_to_year_month_interval(col: Column, to_type: YearMonthIntervalType):
474
+ """
475
+ Cast string to year-month interval type.
476
+ Supports different interval formats:
477
+ 1. '[+|-]y-m' format (e.g., '1-2', '-1-6', '+2-3') - YEAR TO MONTH
478
+ 2. '[+|-]y' format (e.g., '1', '-2', '+3') - YEAR
479
+ 3. '[+|-]m' format (e.g., '12', '-6', '+15') - MONTH
480
+ 4. 'INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH' format - extract the y-m part
481
+ 5. 'INTERVAL [+|-]'[+|-]y' YEAR' format - extract the y part
482
+ 6. 'INTERVAL [+|-]'[+|-]m' MONTH' format - extract the m part
483
+ """
484
+ # Extract values from different formats
485
+ value = snowpark_fn.regexp_extract(col, "'([^']+)'", 1)
486
+ years = snowpark_fn.regexp_extract(col, "^[+-]?\\d+", 0)
487
+ months = snowpark_fn.regexp_extract(col, "-(\\d+)$", 1)
488
+ raise_error = raise_error_helper(to_type, IllegalArgumentException)
489
+
490
+ # For MONTH-only intervals, treat the input as months
491
+ if (
492
+ to_type.start_field == YearMonthIntervalType.MONTH
493
+ and to_type.end_field == YearMonthIntervalType.MONTH
494
+ ):
495
+ months = years
496
+ years = snowpark_fn.lit(0)
497
+
498
+ # Define overflow limits based on Snowflake's INTERVAL limits
499
+ # Maximum year-month interval is 178956970-7 (positive) and -178956970-8 (negative)
500
+ max_years = snowpark_fn.lit(178956970)
501
+ max_months_positive = snowpark_fn.lit(7)
502
+ max_months_negative = snowpark_fn.lit(8)
503
+
504
+ return snowpark_fn.when(
505
+ col.like("INTERVAL % YEAR TO MONTH")
506
+ | col.like("INTERVAL % YEAR")
507
+ | col.like("INTERVAL % MONTH"),
508
+ value.cast(to_type),
509
+ ).when(
510
+ col.rlike("^[+-]?\\d+(-\\d+)?$"),
511
+ snowpark_fn.when(
512
+ # Check for overflow conditions
513
+ ((years >= max_years) & (months > max_months_positive))
514
+ | (years > max_years)
515
+ | ((years <= -max_years) & (months > max_months_negative))
516
+ | (years < -max_years),
517
+ raise_error(snowpark_fn.lit("Error parsing interval year-month string")),
518
+ ).otherwise(col.cast(to_type)),
519
+ )
520
+
521
+
522
+ def _cast_year_month_interval_to_string(col: Column, from_type: YearMonthIntervalType):
523
+ """
524
+ Cast year-month interval to string.
525
+ Returns format like 'INTERVAL '1-2' YEAR TO MONTH' for year-month intervals.
526
+ """
527
+ years = snowpark_fn.date_part("YEAR", col)
528
+ months = snowpark_fn.date_part("MONTH", col)
529
+
530
+ total_months = years * 12 + months
531
+
532
+ start_field = from_type.start_field # YEAR
533
+ end_field = from_type.end_field # MONTH
534
+
535
+ def _format_interval_udf(
536
+ total_months: int, start_field: int, end_field: int
537
+ ) -> str:
538
+ is_negative = total_months < 0
539
+ abs_months = abs(total_months)
540
+ years = abs_months // 12
541
+ months = abs_months % 12
542
+
543
+ is_year_only = start_field == 0 and end_field == 0
544
+ is_month_only = start_field == 1 and end_field == 1
545
+
546
+ if is_year_only:
547
+ sign = "-" if is_negative else ""
548
+ return f"INTERVAL '{sign}{years}' YEAR"
549
+ elif is_month_only:
550
+ return f"INTERVAL '{total_months}' MONTH"
551
+ else: # YEAR TO MONTH
552
+ if is_negative:
553
+ return f"INTERVAL '-{years}-{months}' YEAR TO MONTH"
554
+ else:
555
+ return f"INTERVAL '{years}-{months}' YEAR TO MONTH"
556
+
557
+ format_udf = cached_udf(
558
+ _format_interval_udf,
559
+ input_types=[IntegerType(), IntegerType(), IntegerType()],
560
+ return_type=StringType(),
561
+ )
562
+
563
+ return format_udf(
564
+ total_months, snowpark_fn.lit(start_field), snowpark_fn.lit(end_field)
565
+ )
@@ -14,6 +14,8 @@ from snowflake.snowpark import Session
14
14
  from snowflake.snowpark._internal.analyzer.expression import UnresolvedAttribute
15
15
  from snowflake.snowpark.types import TimestampTimeZone, TimestampType
16
16
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
17
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
18
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
17
19
  from snowflake.snowpark_connect.expression import (
18
20
  map_extension,
19
21
  map_udf,
@@ -38,6 +40,7 @@ from snowflake.snowpark_connect.utils.context import (
38
40
  get_current_lambda_params,
39
41
  is_function_argument_being_resolved,
40
42
  is_lambda_being_resolved,
43
+ not_resolving_fun_args,
41
44
  )
42
45
  from snowflake.snowpark_connect.utils.telemetry import (
43
46
  SnowparkConnectNotImplementedError,
@@ -61,9 +64,11 @@ def map_alias(
61
64
  # Multi-column case: handle like explode("map").alias("key", "value")
62
65
  col_names, col = map_expression(alias.expr, column_mapping, typer)
63
66
  if len(col_names) != len(list(alias.name)):
64
- raise ValueError(
67
+ exception = ValueError(
65
68
  f"Found the unresolved operator: 'Project [{col_names} AS ({', '.join(list(alias.name))})]. Number of aliases ({len(list(alias.name))}) does not match number of columns ({len(col_names)})"
66
69
  )
70
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
71
+ raise exception
67
72
  return list(alias.name), col
68
73
 
69
74
  name, col = map_single_column_expression(alias.expr, column_mapping, typer)
@@ -136,7 +141,10 @@ def map_expression(
136
141
  case "expression_string":
137
142
  return map_sql_expr(exp, column_mapping, typer)
138
143
  case "extension":
139
- return map_extension.map_extension(exp, column_mapping, typer)
144
+ # Extensions can be passed as function args, and we need to reset the context here.
145
+ # Matters only for resolving alias expressions in the extensions rel.
146
+ with not_resolving_fun_args():
147
+ return map_extension.map_extension(exp, column_mapping, typer)
140
148
  case "lambda_function":
141
149
  lambda_name, lambda_body = map_single_column_expression(
142
150
  exp.lambda_function.function, column_mapping, typer
@@ -222,41 +230,39 @@ def map_expression(
222
230
  | exp.sort_order.SORT_DIRECTION_ASCENDING
223
231
  ):
224
232
  if exp.sort_order.null_ordering == exp.sort_order.SORT_NULLS_LAST:
225
- return [child_name], snowpark_fn.asc_nulls_last(child_column)
233
+ col = snowpark_fn.asc_nulls_last(child_column.col)
226
234
  else:
227
235
  # If nulls are not specified or null_ordering is FIRST in the sort order, Spark defaults to nulls
228
236
  # first in the case of ascending sort order.
229
- return [child_name], snowpark_fn.asc_nulls_first(child_column)
237
+ col = snowpark_fn.asc_nulls_first(child_column.col)
230
238
  case exp.sort_order.SORT_DIRECTION_DESCENDING:
231
239
  if exp.sort_order.null_ordering == exp.sort_order.SORT_NULLS_FIRST:
232
- return [child_name], snowpark_fn.desc_nulls_first(child_column)
240
+ col = snowpark_fn.desc_nulls_first(child_column.col)
233
241
  else:
234
242
  # If nulls are not specified or null_ordering is LAST in the sort order, Spark defaults to nulls
235
243
  # last in the case of descending sort order.
236
- return [child_name], snowpark_fn.desc_nulls_last(child_column)
244
+ col = snowpark_fn.desc_nulls_last(child_column.col)
237
245
  case _:
238
- raise ValueError(
246
+ exception = ValueError(
239
247
  f"Invalid sort direction {exp.sort_order.direction}"
240
248
  )
249
+ attach_custom_error_code(
250
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
251
+ )
252
+ raise exception
253
+ return [child_name], TypedColumn(col, lambda: typer.type(col))
241
254
  case "unresolved_attribute":
242
255
  col_name, col = map_att.map_unresolved_attribute(exp, column_mapping, typer)
243
256
  # Check if this is a multi-column regex expansion
244
257
  matched_cols = getattr(col, "_regex_matched_columns", list())
245
258
  if matched_cols:
246
259
  # Create expressions for all matched columns
247
- snowpark_cols = []
248
- for matched_col in matched_cols:
249
- snowpark_name = (
250
- column_mapping.get_snowpark_column_name_from_spark_column_name(
251
- matched_col
252
- )
253
- )
254
- snowpark_cols.append(snowpark_name)
255
-
260
+ snowpark_cols = [c.snowpark_name for c in matched_cols]
261
+ spark_cols = [c.spark_name for c in matched_cols]
256
262
  # Create a combined expression for all columns
257
263
  col_expr = snowpark_fn.sql_expr(", ".join(snowpark_cols))
258
264
  return (
259
- matched_cols,
265
+ spark_cols,
260
266
  TypedColumn(
261
267
  col_expr,
262
268
  lambda: [
@@ -271,6 +277,36 @@ def map_expression(
271
277
  )
272
278
  return [col_name], col
273
279
  case "unresolved_function":
280
+ from snowflake.snowpark_connect.utils.context import (
281
+ get_is_processing_order_by,
282
+ )
283
+
284
+ is_order_by = get_is_processing_order_by()
285
+ if is_order_by:
286
+ # For expressions in an order by clause check if we can reuse already-computed column.
287
+ if exp.unresolved_function.function_name:
288
+ func_name = exp.unresolved_function.function_name
289
+ available_columns = column_mapping.get_spark_columns()
290
+
291
+ for col_name in available_columns:
292
+ if (
293
+ func_name.lower() in col_name.lower()
294
+ and "(" in col_name
295
+ and ")" in col_name
296
+ ):
297
+ # This looks like it might be an expression
298
+ snowpark_col_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
299
+ col_name
300
+ )
301
+ if snowpark_col_name:
302
+ # Optimization applied - reusing already computed column
303
+ return [col_name], TypedColumn(
304
+ snowpark_fn.col(snowpark_col_name),
305
+ lambda col_name=snowpark_col_name: typer.type(
306
+ col_name
307
+ ),
308
+ )
309
+
274
310
  return map_func.map_unresolved_function(exp, column_mapping, typer)
275
311
  case "unresolved_named_lambda_variable":
276
312
  # Validate that this lambda variable is in scope
@@ -278,13 +314,28 @@ def map_expression(
278
314
  current_params = get_current_lambda_params()
279
315
 
280
316
  if current_params and var_name not in current_params:
281
- raise AnalysisException(
282
- f"Reference to non-lambda variable '{var_name}' within lambda function. "
283
- f"Lambda functions can only access their own parameters. "
284
- f"Available lambda parameters are: {current_params}. "
285
- f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
286
- f"that is an unsupported feature in Snowflake SQL."
317
+ outer_col_name = (
318
+ column_mapping.get_snowpark_column_name_from_spark_column_name(
319
+ var_name, allow_non_exists=True
320
+ )
287
321
  )
322
+ if outer_col_name:
323
+ col = snowpark_fn.col(outer_col_name)
324
+ return ["namedlambdavariable()"], TypedColumn(
325
+ col, lambda: typer.type(col)
326
+ )
327
+ else:
328
+ exception = AnalysisException(
329
+ f"Cannot resolve variable '{var_name}' within lambda function. "
330
+ f"Lambda functions can access their own parameters and parent dataframe columns. "
331
+ f"Current lambda parameters: {current_params}. "
332
+ f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
333
+ f"that is an unsupported feature in Snowflake SQL."
334
+ )
335
+ attach_custom_error_code(
336
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
337
+ )
338
+ raise exception
288
339
 
289
340
  col = snowpark_fn.Column(
290
341
  UnresolvedAttribute(exp.unresolved_named_lambda_variable.name_parts[0])
@@ -296,10 +347,10 @@ def map_expression(
296
347
  p = exp.unresolved_regex.col_name
297
348
  pattern_str = p[1:-1] if p.startswith("`") and p.endswith("`") else p
298
349
 
299
- mapping = column_mapping.spark_to_snowpark_for_pattern(pattern_str)
350
+ columns = column_mapping.get_columns_matching_pattern(pattern_str)
300
351
  spark_cols, snowpark_cols = (
301
- [spark_name for spark_name, _ in mapping],
302
- [snowpark_name for _, snowpark_name in mapping],
352
+ [c.spark_name for c in columns],
353
+ [c.snowpark_name for c in columns],
303
354
  )
304
355
 
305
356
  col_expr = snowpark_fn.sql_expr(", ".join(snowpark_cols))
@@ -319,6 +370,8 @@ def map_expression(
319
370
  case "update_fields":
320
371
  return map_update_fields.map_update_fields(exp, column_mapping, typer)
321
372
  case _:
322
- raise SnowparkConnectNotImplementedError(
373
+ exception = SnowparkConnectNotImplementedError(
323
374
  f"Unsupported expression type {expr_type}"
324
375
  )
376
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
377
+ raise exception