snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -6,11 +6,13 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
6
6
  import pyspark.sql.connect.proto.types_pb2 as types_proto
7
7
  from pyspark.errors.exceptions.base import (
8
8
  AnalysisException,
9
+ IllegalArgumentException,
9
10
  NumberFormatException,
10
11
  SparkRuntimeException,
11
12
  )
12
13
 
13
14
  import snowflake.snowpark.functions as snowpark_fn
15
+ from snowflake.snowpark.column import Column
14
16
  from snowflake.snowpark.types import (
15
17
  BinaryType,
16
18
  BooleanType,
@@ -25,12 +27,15 @@ from snowflake.snowpark.types import (
25
27
  StructType,
26
28
  TimestampTimeZone,
27
29
  TimestampType,
30
+ YearMonthIntervalType,
28
31
  _FractionalType,
29
32
  _IntegralType,
30
33
  _NumericType,
31
34
  )
32
35
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
33
36
  from snowflake.snowpark_connect.config import global_config
37
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
38
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
34
39
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
35
40
  from snowflake.snowpark_connect.type_mapping import (
36
41
  map_type_string_to_snowpark_type,
@@ -87,7 +92,9 @@ def map_cast(
87
92
  to_type = map_type_string_to_snowpark_type(exp.cast.type_str)
88
93
  to_type_str = exp.cast.type_str.upper()
89
94
  case _:
90
- raise ValueError("No type to cast to")
95
+ exception = ValueError("No type to cast to")
96
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
97
+ raise exception
91
98
 
92
99
  from_exp = exp.cast.expr
93
100
  new_name, typed_column = map_single_column_expression(
@@ -299,10 +306,16 @@ def map_cast(
299
306
  result_exp = snowpark_fn.cast(col, to_type)
300
307
  else:
301
308
  result_exp = snowpark_fn.try_cast(col, to_type)
309
+ case (StringType(), YearMonthIntervalType()):
310
+ result_exp = _cast_string_to_year_month_interval(col, to_type)
311
+ case (YearMonthIntervalType(), StringType()):
312
+ result_exp = _cast_year_month_interval_to_string(col, from_type)
302
313
  case (StringType(), _):
303
- raise AnalysisException(
314
+ exception = AnalysisException(
304
315
  f"""[DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION] Cannot resolve "{col_name}" due to data type mismatch: cannot cast "{snowpark_to_proto_type(from_type, column_mapping)}" to "{exp.cast.type_str.upper()}".;"""
305
316
  )
317
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
318
+ raise exception
306
319
  case _:
307
320
  result_exp = snowpark_fn.cast(col, to_type)
308
321
 
@@ -317,9 +330,11 @@ def sanity_check(
317
330
  """
318
331
 
319
332
  if isinstance(from_type, LongType) and isinstance(to_type, BinaryType):
320
- raise NumberFormatException(
333
+ exception = NumberFormatException(
321
334
  f"""[DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION] Cannot resolve "CAST({value} AS BINARY)" due to data type mismatch: cannot cast "BIGINT" to "BINARY" with ANSI mode on."""
322
335
  )
336
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
337
+ raise exception
323
338
 
324
339
  if (
325
340
  from_type_cast
@@ -329,9 +344,11 @@ def sanity_check(
329
344
  if value is not None:
330
345
  value = value.strip().lower()
331
346
  if value not in {"t", "true", "f", "false", "y", "yes", "n", "no", "0", "1"}:
332
- raise SparkRuntimeException(
347
+ exception = SparkRuntimeException(
333
348
  f"""[CAST_INVALID_INPUT] The value '{value}' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error."""
334
349
  )
350
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
351
+ raise exception
335
352
 
336
353
  raise_cast_failure_exception = False
337
354
  if isinstance(to_type, _IntegralType):
@@ -351,6 +368,107 @@ def sanity_check(
351
368
  except Exception:
352
369
  raise_cast_failure_exception = True
353
370
  if raise_cast_failure_exception:
354
- raise NumberFormatException(
371
+ exception = NumberFormatException(
355
372
  """[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error."""
356
373
  )
374
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
375
+ raise exception
376
+
377
+
378
+ def _cast_string_to_year_month_interval(col: Column, to_type: YearMonthIntervalType):
379
+ """
380
+ Cast string to year-month interval type.
381
+ Supports different interval formats:
382
+ 1. '[+|-]y-m' format (e.g., '1-2', '-1-6', '+2-3') - YEAR TO MONTH
383
+ 2. '[+|-]y' format (e.g., '1', '-2', '+3') - YEAR
384
+ 3. '[+|-]m' format (e.g., '12', '-6', '+15') - MONTH
385
+ 4. 'INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH' format - extract the y-m part
386
+ 5. 'INTERVAL [+|-]'[+|-]y' YEAR' format - extract the y part
387
+ 6. 'INTERVAL [+|-]'[+|-]m' MONTH' format - extract the m part
388
+ """
389
+ from snowflake.snowpark_connect.expression.map_unresolved_function import (
390
+ _raise_error_helper,
391
+ )
392
+
393
+ # Extract values from different formats
394
+ value = snowpark_fn.regexp_extract(col, "'([^']+)'", 1)
395
+ years = snowpark_fn.regexp_extract(col, "^[+-]?\\d+", 0)
396
+ months = snowpark_fn.regexp_extract(col, "-(\\d+)$", 1)
397
+ raise_error = _raise_error_helper(to_type, IllegalArgumentException)
398
+
399
+ # For MONTH-only intervals, treat the input as months
400
+ if (
401
+ to_type.start_field == YearMonthIntervalType.MONTH
402
+ and to_type.end_field == YearMonthIntervalType.MONTH
403
+ ):
404
+ months = years
405
+ years = snowpark_fn.lit(0)
406
+
407
+ # Define overflow limits based on Snowflake's INTERVAL limits
408
+ # Maximum year-month interval is 178956970-7 (positive) and -178956970-8 (negative)
409
+ max_years = snowpark_fn.lit(178956970)
410
+ max_months_positive = snowpark_fn.lit(7)
411
+ max_months_negative = snowpark_fn.lit(8)
412
+
413
+ return snowpark_fn.when(
414
+ col.like("INTERVAL % YEAR TO MONTH")
415
+ | col.like("INTERVAL % YEAR")
416
+ | col.like("INTERVAL % MONTH"),
417
+ value.cast(to_type),
418
+ ).when(
419
+ col.rlike("^[+-]?\\d+(-\\d+)?$"),
420
+ snowpark_fn.when(
421
+ # Check for overflow conditions
422
+ ((years >= max_years) & (months > max_months_positive))
423
+ | (years > max_years)
424
+ | ((years <= -max_years) & (months > max_months_negative))
425
+ | (years < -max_years),
426
+ raise_error(snowpark_fn.lit("Error parsing interval year-month string")),
427
+ ).otherwise(col.cast(to_type)),
428
+ )
429
+
430
+
431
+ def _cast_year_month_interval_to_string(col: Column, from_type: YearMonthIntervalType):
432
+ """
433
+ Cast year-month interval to string.
434
+ Returns format like 'INTERVAL '1-2' YEAR TO MONTH' for year-month intervals.
435
+ """
436
+ years = snowpark_fn.date_part("YEAR", col)
437
+ months = snowpark_fn.date_part("MONTH", col)
438
+
439
+ total_months = years * 12 + months
440
+
441
+ start_field = from_type.start_field # YEAR
442
+ end_field = from_type.end_field # MONTH
443
+
444
+ def _format_interval_udf(
445
+ total_months: int, start_field: int, end_field: int
446
+ ) -> str:
447
+ is_negative = total_months < 0
448
+ abs_months = abs(total_months)
449
+ years = abs_months // 12
450
+ months = abs_months % 12
451
+
452
+ is_year_only = start_field == 0 and end_field == 0
453
+ is_month_only = start_field == 1 and end_field == 1
454
+
455
+ if is_year_only:
456
+ sign = "-" if is_negative else ""
457
+ return f"INTERVAL '{sign}{years}' YEAR"
458
+ elif is_month_only:
459
+ return f"INTERVAL '{total_months}' MONTH"
460
+ else: # YEAR TO MONTH
461
+ if is_negative:
462
+ return f"INTERVAL '-{years}-{months}' YEAR TO MONTH"
463
+ else:
464
+ return f"INTERVAL '{years}-{months}' YEAR TO MONTH"
465
+
466
+ format_udf = cached_udf(
467
+ _format_interval_udf,
468
+ input_types=[IntegerType(), IntegerType(), IntegerType()],
469
+ return_type=StringType(),
470
+ )
471
+
472
+ return format_udf(
473
+ total_months, snowpark_fn.lit(start_field), snowpark_fn.lit(end_field)
474
+ )
@@ -14,6 +14,8 @@ from snowflake.snowpark import Session
14
14
  from snowflake.snowpark._internal.analyzer.expression import UnresolvedAttribute
15
15
  from snowflake.snowpark.types import TimestampTimeZone, TimestampType
16
16
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
17
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
18
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
17
19
  from snowflake.snowpark_connect.expression import (
18
20
  map_extension,
19
21
  map_udf,
@@ -38,6 +40,7 @@ from snowflake.snowpark_connect.utils.context import (
38
40
  get_current_lambda_params,
39
41
  is_function_argument_being_resolved,
40
42
  is_lambda_being_resolved,
43
+ not_resolving_fun_args,
41
44
  )
42
45
  from snowflake.snowpark_connect.utils.telemetry import (
43
46
  SnowparkConnectNotImplementedError,
@@ -61,9 +64,11 @@ def map_alias(
61
64
  # Multi-column case: handle like explode("map").alias("key", "value")
62
65
  col_names, col = map_expression(alias.expr, column_mapping, typer)
63
66
  if len(col_names) != len(list(alias.name)):
64
- raise ValueError(
67
+ exception = ValueError(
65
68
  f"Found the unresolved operator: 'Project [{col_names} AS ({', '.join(list(alias.name))})]. Number of aliases ({len(list(alias.name))}) does not match number of columns ({len(col_names)})"
66
69
  )
70
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
71
+ raise exception
67
72
  return list(alias.name), col
68
73
 
69
74
  name, col = map_single_column_expression(alias.expr, column_mapping, typer)
@@ -136,7 +141,10 @@ def map_expression(
136
141
  case "expression_string":
137
142
  return map_sql_expr(exp, column_mapping, typer)
138
143
  case "extension":
139
- return map_extension.map_extension(exp, column_mapping, typer)
144
+ # Extensions can be passed as function args, and we need to reset the context here.
145
+ # Matters only for resolving alias expressions in the extensions rel.
146
+ with not_resolving_fun_args():
147
+ return map_extension.map_extension(exp, column_mapping, typer)
140
148
  case "lambda_function":
141
149
  lambda_name, lambda_body = map_single_column_expression(
142
150
  exp.lambda_function.function, column_mapping, typer
@@ -222,41 +230,39 @@ def map_expression(
222
230
  | exp.sort_order.SORT_DIRECTION_ASCENDING
223
231
  ):
224
232
  if exp.sort_order.null_ordering == exp.sort_order.SORT_NULLS_LAST:
225
- return [child_name], snowpark_fn.asc_nulls_last(child_column)
233
+ col = snowpark_fn.asc_nulls_last(child_column.col)
226
234
  else:
227
235
  # If nulls are not specified or null_ordering is FIRST in the sort order, Spark defaults to nulls
228
236
  # first in the case of ascending sort order.
229
- return [child_name], snowpark_fn.asc_nulls_first(child_column)
237
+ col = snowpark_fn.asc_nulls_first(child_column.col)
230
238
  case exp.sort_order.SORT_DIRECTION_DESCENDING:
231
239
  if exp.sort_order.null_ordering == exp.sort_order.SORT_NULLS_FIRST:
232
- return [child_name], snowpark_fn.desc_nulls_first(child_column)
240
+ col = snowpark_fn.desc_nulls_first(child_column.col)
233
241
  else:
234
242
  # If nulls are not specified or null_ordering is LAST in the sort order, Spark defaults to nulls
235
243
  # last in the case of descending sort order.
236
- return [child_name], snowpark_fn.desc_nulls_last(child_column)
244
+ col = snowpark_fn.desc_nulls_last(child_column.col)
237
245
  case _:
238
- raise ValueError(
246
+ exception = ValueError(
239
247
  f"Invalid sort direction {exp.sort_order.direction}"
240
248
  )
249
+ attach_custom_error_code(
250
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
251
+ )
252
+ raise exception
253
+ return [child_name], TypedColumn(col, lambda: typer.type(col))
241
254
  case "unresolved_attribute":
242
255
  col_name, col = map_att.map_unresolved_attribute(exp, column_mapping, typer)
243
256
  # Check if this is a multi-column regex expansion
244
257
  matched_cols = getattr(col, "_regex_matched_columns", list())
245
258
  if matched_cols:
246
259
  # Create expressions for all matched columns
247
- snowpark_cols = []
248
- for matched_col in matched_cols:
249
- snowpark_name = (
250
- column_mapping.get_snowpark_column_name_from_spark_column_name(
251
- matched_col
252
- )
253
- )
254
- snowpark_cols.append(snowpark_name)
255
-
260
+ snowpark_cols = [c.snowpark_name for c in matched_cols]
261
+ spark_cols = [c.spark_name for c in matched_cols]
256
262
  # Create a combined expression for all columns
257
263
  col_expr = snowpark_fn.sql_expr(", ".join(snowpark_cols))
258
264
  return (
259
- matched_cols,
265
+ spark_cols,
260
266
  TypedColumn(
261
267
  col_expr,
262
268
  lambda: [
@@ -271,6 +277,36 @@ def map_expression(
271
277
  )
272
278
  return [col_name], col
273
279
  case "unresolved_function":
280
+ from snowflake.snowpark_connect.utils.context import (
281
+ get_is_processing_order_by,
282
+ )
283
+
284
+ is_order_by = get_is_processing_order_by()
285
+ if is_order_by:
286
+ # For expressions in an order by clause check if we can reuse already-computed column.
287
+ if exp.unresolved_function.function_name:
288
+ func_name = exp.unresolved_function.function_name
289
+ available_columns = column_mapping.get_spark_columns()
290
+
291
+ for col_name in available_columns:
292
+ if (
293
+ func_name.lower() in col_name.lower()
294
+ and "(" in col_name
295
+ and ")" in col_name
296
+ ):
297
+ # This looks like it might be an expression
298
+ snowpark_col_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
299
+ col_name
300
+ )
301
+ if snowpark_col_name:
302
+ # Optimization applied - reusing already computed column
303
+ return [col_name], TypedColumn(
304
+ snowpark_fn.col(snowpark_col_name),
305
+ lambda col_name=snowpark_col_name: typer.type(
306
+ col_name
307
+ ),
308
+ )
309
+
274
310
  return map_func.map_unresolved_function(exp, column_mapping, typer)
275
311
  case "unresolved_named_lambda_variable":
276
312
  # Validate that this lambda variable is in scope
@@ -278,13 +314,28 @@ def map_expression(
278
314
  current_params = get_current_lambda_params()
279
315
 
280
316
  if current_params and var_name not in current_params:
281
- raise AnalysisException(
282
- f"Reference to non-lambda variable '{var_name}' within lambda function. "
283
- f"Lambda functions can only access their own parameters. "
284
- f"Available lambda parameters are: {current_params}. "
285
- f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
286
- f"that is an unsupported feature in Snowflake SQL."
317
+ outer_col_name = (
318
+ column_mapping.get_snowpark_column_name_from_spark_column_name(
319
+ var_name, allow_non_exists=True
320
+ )
287
321
  )
322
+ if outer_col_name:
323
+ col = snowpark_fn.col(outer_col_name)
324
+ return ["namedlambdavariable()"], TypedColumn(
325
+ col, lambda: typer.type(col)
326
+ )
327
+ else:
328
+ exception = AnalysisException(
329
+ f"Cannot resolve variable '{var_name}' within lambda function. "
330
+ f"Lambda functions can access their own parameters and parent dataframe columns. "
331
+ f"Current lambda parameters: {current_params}. "
332
+ f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
333
+ f"that is an unsupported feature in Snowflake SQL."
334
+ )
335
+ attach_custom_error_code(
336
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
337
+ )
338
+ raise exception
288
339
 
289
340
  col = snowpark_fn.Column(
290
341
  UnresolvedAttribute(exp.unresolved_named_lambda_variable.name_parts[0])
@@ -296,10 +347,10 @@ def map_expression(
296
347
  p = exp.unresolved_regex.col_name
297
348
  pattern_str = p[1:-1] if p.startswith("`") and p.endswith("`") else p
298
349
 
299
- mapping = column_mapping.spark_to_snowpark_for_pattern(pattern_str)
350
+ columns = column_mapping.get_columns_matching_pattern(pattern_str)
300
351
  spark_cols, snowpark_cols = (
301
- [spark_name for spark_name, _ in mapping],
302
- [snowpark_name for _, snowpark_name in mapping],
352
+ [c.spark_name for c in columns],
353
+ [c.snowpark_name for c in columns],
303
354
  )
304
355
 
305
356
  col_expr = snowpark_fn.sql_expr(", ".join(snowpark_cols))
@@ -319,6 +370,8 @@ def map_expression(
319
370
  case "update_fields":
320
371
  return map_update_fields.map_update_fields(exp, column_mapping, typer)
321
372
  case _:
322
- raise SnowparkConnectNotImplementedError(
373
+ exception = SnowparkConnectNotImplementedError(
323
374
  f"Unsupported expression type {expr_type}"
324
375
  )
376
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
377
+ raise exception