snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -3,21 +3,27 @@
3
3
  #
4
4
 
5
5
  import re
6
+ from typing import Any, Optional
6
7
 
7
8
  import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
8
9
  from pyspark.errors.exceptions.connect import AnalysisException
9
10
 
10
- import snowflake.snowpark.functions as snowpark_fn
11
+ from snowflake.snowpark import Column, functions as snowpark_fn
11
12
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
12
13
  quote_name_without_upper_casing,
13
14
  )
14
15
  from snowflake.snowpark.exceptions import SnowparkSQLException
15
- from snowflake.snowpark.types import ArrayType, LongType, MapType, StructType
16
- from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
16
+ from snowflake.snowpark.types import ArrayType, DataType, LongType, MapType, StructType
17
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap, ColumnNames
17
18
  from snowflake.snowpark_connect.config import global_config
19
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
20
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
21
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
22
+ from snowflake.snowpark_connect.expression.map_sql_expression import NILARY_FUNCTIONS
18
23
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
19
24
  from snowflake.snowpark_connect.typed_column import TypedColumn
20
25
  from snowflake.snowpark_connect.utils.context import (
26
+ capture_attribute_name,
21
27
  get_current_grouping_columns,
22
28
  get_is_evaluating_sql,
23
29
  get_outer_dataframes,
@@ -67,6 +73,382 @@ def _get_catalog_database_from_column_map(
67
73
  return catalog_database_info
68
74
 
69
75
 
76
+ def _resolve_struct_field(
77
+ path: list[str], col: Column, typer: ExpressionTyper
78
+ ) -> Column:
79
+ try:
80
+ col_type = typer.type(col)[0]
81
+ except SnowparkSQLException as e:
82
+ if e.raw_message is not None and "invalid identifier" in e.raw_message:
83
+ exception = AnalysisException(
84
+ f'[COLUMN_NOT_FOUND] The column "{path[0]}" does not exist in the target dataframe.'
85
+ )
86
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
87
+ raise exception
88
+ else:
89
+ raise
90
+
91
+ field_path = path[1:]
92
+ if not global_config.spark_sql_caseSensitive:
93
+ field_path = _match_path_to_struct(field_path, col_type)
94
+
95
+ for field_name in field_path:
96
+ col = col.getItem(field_name)
97
+
98
+ return col
99
+
100
+
101
+ def _try_resolve_column_in_scopes(
102
+ column_name: str,
103
+ column_mapping: ColumnNameMap,
104
+ original_snowpark_name: Optional[str] = None,
105
+ ) -> tuple[str | None, ColumnNameMap | None, ExpressionTyper | None]:
106
+ """
107
+ Try to resolve a column name in current and outer scopes.
108
+
109
+ Args:
110
+ column_name: The column name to resolve
111
+ column_mapping: The column mapping for the current scope
112
+ original_snowpark_name: target df snowpark name when we resolve a specific plan id
113
+
114
+ Returns:
115
+ Tuple of (snowpark_name, found_column_map, found_typer) or (None, None, None) if not found
116
+ """
117
+ # Try current scope
118
+ snowpark_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
119
+ column_name,
120
+ allow_non_exists=True,
121
+ original_snowpark_name=original_snowpark_name,
122
+ )
123
+ if snowpark_name is not None:
124
+ return snowpark_name, column_mapping, None
125
+
126
+ # Try outer scopes
127
+ for outer_df in get_outer_dataframes():
128
+ snowpark_name = (
129
+ outer_df.column_map.get_snowpark_column_name_from_spark_column_name(
130
+ column_name,
131
+ allow_non_exists=True,
132
+ original_snowpark_name=original_snowpark_name,
133
+ )
134
+ )
135
+ if snowpark_name is not None:
136
+ return (
137
+ snowpark_name,
138
+ outer_df.column_map,
139
+ ExpressionTyper(outer_df.dataframe),
140
+ )
141
+
142
+ return None, None, None
143
+
144
+
145
+ def _find_column_with_qualifier_match(
146
+ name_parts: list[str],
147
+ column_mapping: ColumnNameMap,
148
+ ) -> tuple[int, str | None, Any]:
149
+ """
150
+ Find the column position in name_parts where the prefix matches a qualifier.
151
+
152
+ In Spark, table qualifiers have at most 3 parts:
153
+ - 1 part: table only (e.g., 't1') → ColumnQualifier(('t1',))
154
+ - 2 parts: database.table (e.g., 'mydb.t5') → ColumnQualifier(('mydb', 't5'))
155
+ - 3 parts: catalog.database.table (e.g., 'cat.mydb.t5') → ColumnQualifier(('cat', 'mydb', 't5'))
156
+
157
+ Examples of how this works (suffix matching):
158
+ 1) Input: "mydb1.t5.t5.i1" with qualifier ('mydb1', 't5')
159
+ - At i=2: prefix=['mydb1','t5'], matches qualifier suffix ('mydb1', 't5') → Column found!
160
+ - Remaining ['i1'] is treated as field access
161
+
162
+ 2) Input: "t5.t5.i1" with qualifier ('mydb1', 't5')
163
+ - At i=1: prefix=['t5'], matches qualifier suffix ('t5',) → Column found!
164
+ - Remaining ['i1'] is treated as field access
165
+
166
+ 3) Input: "cat.mydb.t5.t5.i1" with qualifier ('cat', 'mydb', 't5')
167
+ - At i=3: prefix=['cat','mydb','t5'], matches qualifier suffix → Column found!
168
+ - Remaining ['i1'] is treated as field access
169
+
170
+ The key insight: if the prefix before a candidate matches the END (suffix) of a qualifier,
171
+ then that position is the column reference. This allows partial qualification (e.g., just table
172
+ name instead of full database.table)
173
+
174
+ Args:
175
+ name_parts: The parts of the qualified name (e.g., ['mydb1', 't5', 't5', 'i1'])
176
+ column_mapping: The column mapping to resolve columns against
177
+
178
+ Returns:
179
+ Tuple of (column_part_index, snowpark_name, found_column_map)
180
+ Returns (0, None, None) if no valid column found
181
+
182
+ Raises:
183
+ AnalysisException: If a column is found but with invalid qualifier (scope violation)
184
+ """
185
+ # Track if we found a column but with wrong qualifier (scope violation)
186
+ scope_violation = None
187
+
188
+ for i in range(len(name_parts)):
189
+ candidate_column = name_parts[i]
190
+ snowpark_name, found_column_map, _ = _try_resolve_column_in_scopes(
191
+ candidate_column, column_mapping
192
+ )
193
+
194
+ if snowpark_name is not None:
195
+ candidate_qualifiers = found_column_map.get_qualifiers_for_snowpark_column(
196
+ snowpark_name
197
+ )
198
+ prefix_parts = name_parts[:i]
199
+
200
+ # Check if this is a valid column reference position
201
+ # A valid position is where the prefix exactly matches one of the qualifiers
202
+ is_valid_reference = False
203
+
204
+ if i == 0:
205
+ # No prefix (unqualified access)
206
+ # Always valid - Spark allows unqualified access to any column
207
+ # The remaining parts (name_parts[1:]) will be treated as
208
+ # struct/map/array field access (e.g., "person.address.city" where
209
+ # person is the column and address.city is the field path)
210
+ is_valid_reference = True
211
+ else:
212
+ # Has prefix - check if it matches the end (suffix) of any qualifier
213
+ # Spark allows partial qualification, so for qualifier ('mydb1', 't5'):
214
+ # - Can access as mydb1.t5.t5.i1 (full qualifier match)
215
+ # - Can access as t5.t5.i1 (suffix match - just table part)
216
+ # e.g., for "t5.t5.i1", when i=1, prefix=['t5'] matches suffix of ('mydb1', 't5')
217
+ # If valid, the remaining parts (name_parts[i+1:]) will be treated as
218
+ # struct/map/array field access (e.g., ['i1'] is a field in column t5)
219
+ for qual in candidate_qualifiers:
220
+ if len(qual.parts) >= len(prefix_parts) and qual.parts[
221
+ -len(prefix_parts) :
222
+ ] == tuple(prefix_parts):
223
+ is_valid_reference = True
224
+ break
225
+
226
+ if is_valid_reference:
227
+ # This is the actual column reference
228
+ return (i, snowpark_name, found_column_map)
229
+ elif i > 0:
230
+ # Found column but qualifier doesn't match - this is a scope violation
231
+ # e.g., SELECT nt1.k where k exists but nt1 is not its qualifier
232
+ attr_name = ".".join(name_parts)
233
+ scope_violation = (attr_name, ".".join(prefix_parts))
234
+
235
+ # If we detected a scope violation, throw error
236
+ if scope_violation:
237
+ attr_name, invalid_qualifier = scope_violation
238
+ exception = AnalysisException(
239
+ f'[UNRESOLVED_COLUMN] Column "{attr_name}" cannot be resolved. '
240
+ f'The table or alias "{invalid_qualifier}" is not in scope or does not exist.'
241
+ )
242
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
243
+ raise exception
244
+
245
+ # No valid column found
246
+ return (0, None, None)
247
+
248
+
249
+ def _get_quoted_attr_name(name_parts: list[str]) -> str:
250
+ quoted_attr_name = ".".join(
251
+ quote_name_without_upper_casing(x) for x in name_parts[:-1]
252
+ )
253
+ if len(name_parts) > 1:
254
+ quoted_attr_name = f"{quoted_attr_name}.{name_parts[-1]}"
255
+ else:
256
+ quoted_attr_name = name_parts[0]
257
+ return quoted_attr_name
258
+
259
+
260
+ def _attribute_is_regex(original_attr_name: str) -> bool:
261
+ return (
262
+ get_is_evaluating_sql()
263
+ and global_config.spark_sql_parser_quotedRegexColumnNames
264
+ and SPARK_QUOTED.match(original_attr_name)
265
+ )
266
+
267
+
268
+ def _get_matching_columns(
269
+ column_mapping: ColumnNameMap, pattern: str
270
+ ) -> list[ColumnNames]:
271
+ # Match the regex pattern against available columns
272
+ matched_columns = column_mapping.get_columns_matching_pattern(pattern)
273
+
274
+ if not matched_columns:
275
+ # Get all available column names from the column mapping
276
+ available_columns = column_mapping.get_spark_columns()
277
+ # Keep the improved error message for SQL regex patterns
278
+ # This is only hit for SQL queries like SELECT `(e|f)` FROM table
279
+ # when spark.sql.parser.quotedRegexColumnNames is enabled
280
+ exception = AnalysisException(
281
+ f"No columns match the regex pattern '{pattern}'. "
282
+ f"Snowflake SQL does not support SELECT statements with no columns. "
283
+ f"Please ensure your regex pattern matches at least one column. "
284
+ f"Available columns: {', '.join(available_columns[:10])}{'...' if len(available_columns) > 10 else ''}"
285
+ )
286
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
287
+ raise exception
288
+
289
+ return matched_columns
290
+
291
+
292
+ def _resolve_matched_columns(
293
+ matched_columns: list[ColumnNames],
294
+ typer: ExpressionTyper,
295
+ ):
296
+ # When multiple columns match, we need to signal that this should expand to multiple columns
297
+ # Since map_unresolved_attribute can only return one column, we'll use a special marker
298
+ # to indicate that this is a multi-column regex expansion
299
+ if len(matched_columns) > 1:
300
+ # Create a special column name that indicates multi-column expansion
301
+ # The higher-level logic will need to handle this
302
+ multi_col_name = "__REGEX_MULTI_COL__"
303
+ # For now, return the first column but mark it specially
304
+ first_col = matched_columns[0]
305
+ snowpark_name = first_col.snowpark_name
306
+ col = snowpark_fn.col(snowpark_name)
307
+ qualifiers = first_col.qualifiers
308
+ typed_col = TypedColumn(col, lambda: typer.type(col))
309
+ typed_col.set_qualifiers(qualifiers)
310
+ # Store matched columns info for later use
311
+ typed_col._regex_matched_columns = matched_columns
312
+ return multi_col_name, typed_col
313
+ else:
314
+ # Single column match - return that column
315
+ matched_col = matched_columns[0]
316
+ snowpark_name = matched_col.snowpark_name
317
+ col = snowpark_fn.col(snowpark_name)
318
+ qualifiers = matched_col.qualifiers
319
+ typed_col = TypedColumn(col, lambda: typer.type(col))
320
+ typed_col.set_qualifiers(qualifiers)
321
+ return matched_col.spark_name, typed_col
322
+
323
+
324
+ def _resolve_attribute_with_original_snowpark_name(
325
+ path: list[str],
326
+ current_column_mapping: ColumnNameMap,
327
+ typer: ExpressionTyper,
328
+ original_snowpark_name: str,
329
+ ) -> TypedColumn:
330
+ # if the column was found in the target dataframe
331
+ # we need to find its snowpark name in the current column mapping or any outer scope
332
+ # it can be the same name or an equivalent after a join rename
333
+ spark_name = path[0]
334
+ (
335
+ matching_snowpark_name,
336
+ found_column_mapping,
337
+ found_typer,
338
+ ) = _try_resolve_column_in_scopes(
339
+ spark_name,
340
+ current_column_mapping,
341
+ original_snowpark_name=original_snowpark_name,
342
+ )
343
+
344
+ if not matching_snowpark_name:
345
+ # the column doesn't exist in the current dataframe
346
+ exception = AnalysisException(
347
+ f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{spark_name}" does not exist in the target dataframe.'
348
+ )
349
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
350
+ raise exception
351
+
352
+ # we need to use the typer for the dataframe where the column was resolved
353
+ found_typer = found_typer if found_typer else typer
354
+
355
+ col = snowpark_fn.col(matching_snowpark_name)
356
+ if len(path) > 1:
357
+ col = _resolve_struct_field(path, col, found_typer)
358
+ # no qualifiers for struct fields
359
+ return TypedColumn(col, lambda: found_typer.type(col))
360
+
361
+ typed_col = TypedColumn(col, lambda: found_typer.type(col))
362
+ typed_col.set_qualifiers(
363
+ found_column_mapping.get_qualifiers_for_snowpark_column(matching_snowpark_name)
364
+ )
365
+ return typed_col
366
+
367
+
368
+ def _resolve_attribute_regex_with_plan_id(
369
+ pattern: str,
370
+ target_df_container: DataFrameContainer,
371
+ current_column_mapping: ColumnNameMap,
372
+ typer: ExpressionTyper,
373
+ ) -> tuple[str, TypedColumn]:
374
+ """
375
+ Resolves all columns matching the given pattern in the target dataframe
376
+ """
377
+ target_column_mapping = target_df_container.column_map
378
+ # find all matching columns
379
+ matched_columns = _get_matching_columns(target_column_mapping, pattern)
380
+
381
+ if len(matched_columns) == 1 and target_column_mapping.has_spark_column(pattern):
382
+ # if the pattern is just the column name, we resolve the column using its equivalent snowpark name
383
+ spark_name = matched_columns[0].spark_name
384
+ snowpark_name = matched_columns[0].snowpark_name
385
+ return spark_name, _resolve_attribute_with_original_snowpark_name(
386
+ [spark_name], current_column_mapping, typer, snowpark_name
387
+ )
388
+
389
+ # if the pattern is not an exact match for an existing column, we don't want to use equivalent snowpark names
390
+ # and we just check if the matched columns exist in the current mapping
391
+ available_snowpark_columns = current_column_mapping.get_snowpark_columns()
392
+ matched_columns = [
393
+ c for c in matched_columns if c.snowpark_name in available_snowpark_columns
394
+ ]
395
+ if len(matched_columns) == 0:
396
+ return "", TypedColumn.empty()
397
+ return _resolve_matched_columns(matched_columns, typer)
398
+
399
+
400
+ def _resolve_attribute_with_plan_id(
401
+ path: list[str],
402
+ target_df_container: DataFrameContainer,
403
+ current_column_mapping: ColumnNameMap,
404
+ typer: ExpressionTyper,
405
+ ) -> tuple[str, TypedColumn]:
406
+ """
407
+ Resolves a given spark name with a specific plan_id to the equivalent snowpark column in
408
+ the target dataframe
409
+ """
410
+ target_column_mapping = target_df_container.column_map
411
+
412
+ quoted_attr_name = _get_quoted_attr_name(path)
413
+
414
+ # Try to resolve the full qualified name first
415
+ # TODO: implement better mechanism for matching qualified names
416
+ snowpark_name, found_column_map, _ = _try_resolve_column_in_scopes(
417
+ quoted_attr_name, target_column_mapping
418
+ )
419
+
420
+ if snowpark_name:
421
+ # we don't need the qualifiers anymore, since the original snowpark name is enough to disambiguate
422
+ spark_name = path[-1]
423
+ path = [spark_name]
424
+ else:
425
+ # in some cases the column can be qualified, so we have to match qualifiers as well
426
+ (
427
+ column_part_index,
428
+ snowpark_name,
429
+ found_column_map,
430
+ ) = _find_column_with_qualifier_match(path, target_column_mapping)
431
+ # extract the column name, and remove qualifiers
432
+ spark_name = path[column_part_index]
433
+ path = path[column_part_index:]
434
+
435
+ if not snowpark_name or found_column_map is not target_column_mapping:
436
+ # if the column doesn't exist in the plan_id dataframe, we don't need to look further
437
+ exception = AnalysisException(
438
+ f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{spark_name}" does not exist in the target dataframe.'
439
+ )
440
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
441
+ raise exception
442
+
443
+ matching_snowpark_col = _resolve_attribute_with_original_snowpark_name(
444
+ path, current_column_mapping, typer, snowpark_name
445
+ )
446
+
447
+ # if resolving a struct field, we need to return the field name
448
+ # that's why this is path[-1] and not spark_name
449
+ return path[-1], matching_snowpark_col
450
+
451
+
70
452
  def map_unresolved_attribute(
71
453
  exp: expressions_proto.Expression,
72
454
  column_mapping: ColumnNameMap,
@@ -74,6 +456,7 @@ def map_unresolved_attribute(
74
456
  ) -> tuple[str, TypedColumn]:
75
457
  original_attr_name = exp.unresolved_attribute.unparsed_identifier
76
458
  name_parts = split_fully_qualified_spark_name(original_attr_name)
459
+ attribute_is_regex = _attribute_is_regex(original_attr_name)
77
460
 
78
461
  assert len(name_parts) > 0, f"Unable to parse input attribute: {original_attr_name}"
79
462
 
@@ -85,9 +468,11 @@ def map_unresolved_attribute(
85
468
  grouping_spark_columns = get_current_grouping_columns()
86
469
  if not grouping_spark_columns:
87
470
  # grouping__id can only be used with GROUP BY CUBE/ROLLUP/GROUPING SETS
88
- raise AnalysisException(
471
+ exception = AnalysisException(
89
472
  "[MISSING_GROUP_BY] grouping__id can only be used with GROUP BY (CUBE | ROLLUP | GROUPING SETS)"
90
473
  )
474
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
475
+ raise exception
91
476
  # Convert to GROUPING_ID() function call with the grouping columns
92
477
  # Map Spark column names to Snowpark column names
93
478
  snowpark_cols = []
@@ -99,9 +484,11 @@ def map_unresolved_attribute(
99
484
  )
100
485
  )
101
486
  if not snowpark_name:
102
- raise AnalysisException(
487
+ exception = AnalysisException(
103
488
  f"[INTERNAL_ERROR] Cannot find Snowpark column mapping for grouping column '{spark_col_name}'"
104
489
  )
490
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
491
+ raise exception
105
492
  snowpark_cols.append(snowpark_fn.col(snowpark_name))
106
493
 
107
494
  # Call GROUPING_ID with all grouping columns using Snowpark names
@@ -155,176 +542,70 @@ def map_unresolved_attribute(
155
542
 
156
543
  if is_catalog:
157
544
  # This looks like a catalog.database.column.field pattern
158
- raise AnalysisException(
545
+ exception = AnalysisException(
159
546
  f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{original_attr_name}` cannot be resolved. "
160
547
  f"Cross-catalog column references are not supported in DataFrame API."
161
548
  )
549
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
550
+ raise exception
162
551
 
163
552
  attr_name = ".".join(name_parts)
553
+ capture_attribute_name(attr_name)
164
554
 
165
555
  has_plan_id = exp.unresolved_attribute.HasField("plan_id")
166
- source_qualifiers = None
167
556
 
168
557
  if has_plan_id:
169
558
  plan_id = exp.unresolved_attribute.plan_id
559
+ # get target dataframe and column mapping
170
560
  target_df_container = get_plan_id_map(plan_id)
171
- target_df = target_df_container.dataframe
172
561
  assert (
173
- target_df is not None
562
+ target_df_container is not None
174
563
  ), f"resolving an attribute of a unresolved dataframe {plan_id}"
175
-
176
- # Get the qualifiers for this column from the target DataFrame
177
- source_qualifiers = (
178
- target_df_container.column_map.get_qualifier_for_spark_column(
179
- name_parts[-1]
564
+ if attribute_is_regex:
565
+ # we should never get a struct field reference here
566
+ assert (
567
+ len(name_parts) == 1
568
+ ), "resolving struct field for attribute regexp with plan id"
569
+ return _resolve_attribute_regex_with_plan_id(
570
+ name_parts[0], target_df_container, column_mapping, typer
180
571
  )
181
- )
182
-
183
- if hasattr(column_mapping, "hidden_columns"):
184
- hidden = column_mapping.hidden_columns
185
- else:
186
- hidden = None
187
-
188
- column_mapping = target_df_container.column_map
189
- column_mapping.hidden_columns = hidden
190
- typer = ExpressionTyper(target_df)
191
-
192
- def get_col(snowpark_name, has_hidden=False):
193
- return (
194
- snowpark_fn.col(snowpark_name)
195
- if not has_plan_id or has_hidden
196
- else target_df.col(snowpark_name)
572
+ return _resolve_attribute_with_plan_id(
573
+ name_parts, target_df_container, column_mapping, typer
197
574
  )
198
575
 
199
576
  # Check if regex column names are enabled and this is a quoted identifier
200
577
  # We need to check the original attribute name before split_fully_qualified_spark_name processes it
201
- if (
202
- get_is_evaluating_sql()
203
- and global_config.spark_sql_parser_quotedRegexColumnNames
204
- and SPARK_QUOTED.match(original_attr_name)
205
- ):
578
+ if attribute_is_regex:
206
579
  # Extract regex pattern by removing backticks
207
580
  regex_pattern = original_attr_name[1:-1] # Remove first and last backtick
581
+ matched_columns = _get_matching_columns(column_mapping, regex_pattern)
582
+ return _resolve_matched_columns(matched_columns, typer)
208
583
 
209
- # Get all available column names from the column mapping
210
- available_columns = column_mapping.get_spark_columns()
211
-
212
- # Match the regex pattern against available columns
213
- matched_columns = []
214
- try:
215
- compiled_regex = re.compile(
216
- regex_pattern,
217
- re.IGNORECASE if not global_config.spark_sql_caseSensitive else 0,
218
- )
219
- for col_name in available_columns:
220
- if compiled_regex.fullmatch(col_name):
221
- matched_columns.append(col_name)
222
- except re.error as e:
223
- raise AnalysisException(f"Invalid regex pattern '{regex_pattern}': {e}")
224
-
225
- if not matched_columns:
226
- # Keep the improved error message for SQL regex patterns
227
- # This is only hit for SQL queries like SELECT `(e|f)` FROM table
228
- # when spark.sql.parser.quotedRegexColumnNames is enabled
229
- raise AnalysisException(
230
- f"No columns match the regex pattern '{regex_pattern}'. "
231
- f"Snowflake SQL does not support SELECT statements with no columns. "
232
- f"Please ensure your regex pattern matches at least one column. "
233
- f"Available columns: {', '.join(available_columns[:10])}{'...' if len(available_columns) > 10 else ''}"
234
- )
235
-
236
- # When multiple columns match, we need to signal that this should expand to multiple columns
237
- # Since map_unresolved_attribute can only return one column, we'll use a special marker
238
- # to indicate that this is a multi-column regex expansion
239
- if len(matched_columns) > 1:
240
- # Create a special column name that indicates multi-column expansion
241
- # The higher-level logic will need to handle this
242
- multi_col_name = "__REGEX_MULTI_COL__"
243
- # For now, return the first column but mark it specially
244
- quoted_col_name = matched_columns[0]
245
- snowpark_name = (
246
- column_mapping.get_snowpark_column_name_from_spark_column_name(
247
- quoted_col_name
248
- )
249
- )
250
- col = get_col(snowpark_name)
251
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
252
- typed_col = TypedColumn(col, lambda: typer.type(col))
253
- typed_col.set_qualifiers(qualifiers)
254
- # Store matched columns info for later use
255
- typed_col._regex_matched_columns = matched_columns
256
- return (multi_col_name, typed_col)
257
- else:
258
- # Single column match - return that column
259
- quoted_col_name = matched_columns[0]
260
- snowpark_name = (
261
- column_mapping.get_snowpark_column_name_from_spark_column_name(
262
- quoted_col_name
263
- )
264
- )
265
- col = get_col(snowpark_name)
266
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
267
- typed_col = TypedColumn(col, lambda: typer.type(col))
268
- typed_col.set_qualifiers(qualifiers)
269
- return (matched_columns[0], typed_col)
584
+ quoted_attr_name = _get_quoted_attr_name(name_parts)
270
585
 
271
- quoted_attr_name = ".".join(
272
- quote_name_without_upper_casing(x) for x in name_parts[:-1]
273
- )
274
- if len(name_parts) > 1:
275
- quoted_attr_name = f"{quoted_attr_name}.{name_parts[-1]}"
276
- else:
277
- quoted_attr_name = name_parts[0]
278
-
279
- snowpark_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
280
- quoted_attr_name,
281
- allow_non_exists=True,
282
- is_qualified=has_plan_id,
283
- source_qualifiers=source_qualifiers if has_plan_id else None,
586
+ # Try to resolve the full qualified name first
587
+ snowpark_name, found_column_map, found_typer = _try_resolve_column_in_scopes(
588
+ quoted_attr_name, column_mapping
284
589
  )
285
590
 
591
+ qualifiers = set()
286
592
  if snowpark_name is not None:
287
- is_hidden = column_mapping.is_hidden_column_reference(
288
- quoted_attr_name, source_qualifiers
289
- )
290
- col = get_col(snowpark_name, is_hidden)
291
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_attr_name)
593
+ col = snowpark_fn.col(snowpark_name)
594
+ qualifiers = found_column_map.get_qualifiers_for_snowpark_column(snowpark_name)
595
+ typer = found_typer if found_typer else typer
292
596
  else:
293
- # this means it has to be a struct column with a field name
294
- snowpark_name: str | None = None
295
- column_part_index: int = 0
296
-
297
597
  # Get catalog/database info from column map if available
298
598
  catalog_database_info = _get_catalog_database_from_column_map(
299
599
  original_attr_name, column_mapping
300
600
  )
301
601
 
302
- # Try to find the column name in different parts of the name_parts array
303
- # For qualified names like "table.column.field", we need to find the column part
304
- for i in range(len(name_parts)):
305
- candidate_column = name_parts[i]
306
- snowpark_name = (
307
- column_mapping.get_snowpark_column_name_from_spark_column_name(
308
- candidate_column, allow_non_exists=True
309
- )
310
- )
311
- if snowpark_name is not None:
312
- column_part_index = i
313
- break
314
-
315
- # Also try in outer dataframes
316
- for outer_df in get_outer_dataframes():
317
- snowpark_name = (
318
- outer_df.column_map.get_snowpark_column_name_from_spark_column_name(
319
- candidate_column, allow_non_exists=True
320
- )
321
- )
322
- if snowpark_name is not None:
323
- column_part_index = i
324
- break
325
-
326
- if snowpark_name is not None:
327
- break
602
+ # Find the column by matching qualifiers with the prefix parts
603
+ # Note: This may raise AnalysisException if a scope violation is detected
604
+ (
605
+ column_part_index,
606
+ snowpark_name,
607
+ found_column_map,
608
+ ) = _find_column_with_qualifier_match(name_parts, column_mapping)
328
609
 
329
610
  if snowpark_name is None:
330
611
  # Attempt LCA fallback.
@@ -345,9 +626,9 @@ def map_unresolved_attribute(
345
626
  )
346
627
  )
347
628
  if snowpark_name is not None:
348
- col = get_col(snowpark_name)
349
- qualifiers = column_mapping.get_qualifier_for_spark_column(
350
- unqualified_name
629
+ col = snowpark_fn.col(snowpark_name)
630
+ qualifiers = column_mapping.get_qualifiers_for_snowpark_column(
631
+ snowpark_name
351
632
  )
352
633
  typed_col = TypedColumn(col, lambda: typer.type(col))
353
634
  typed_col.set_qualifiers(qualifiers)
@@ -368,44 +649,38 @@ def map_unresolved_attribute(
368
649
  )
369
650
  if outer_col_name:
370
651
  # This is an outer scope column being referenced inside a lambda
371
- raise AnalysisException(
652
+ exception = AnalysisException(
372
653
  f"Reference to non-lambda variable '{attr_name}' within lambda function. "
373
654
  f"Lambda functions can only access their own parameters. "
374
655
  f"If '{attr_name}' is a table column, it must be passed as an explicit parameter to the enclosing function."
375
656
  )
657
+ attach_custom_error_code(
658
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
659
+ )
660
+ raise exception
376
661
 
377
662
  if has_plan_id:
378
- raise AnalysisException(
663
+ exception = AnalysisException(
379
664
  f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
380
665
  )
381
- else:
382
- # Column does not exist. Pass in dummy column name for lazy error throwing as it could be a built-in function
666
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
667
+ raise exception
668
+ elif attr_name.lower() in NILARY_FUNCTIONS:
383
669
  snowpark_name = attr_name
384
-
385
- col = get_col(snowpark_name)
386
- try:
387
- col_type = typer.type(col)[0]
388
- except SnowparkSQLException as e:
389
- if e.raw_message is not None and "invalid identifier" in e.raw_message:
390
- raise AnalysisException(
670
+ else:
671
+ exception = AnalysisException(
391
672
  f'[COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
392
673
  )
393
- else:
394
- raise
395
- is_struct = isinstance(col_type, StructType)
396
- # for struct columns when accessed, spark use just the leaf field name rather than fully attributed one
397
- if is_struct:
398
- attr_name = name_parts[-1]
674
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
675
+ raise exception
399
676
 
677
+ col = snowpark_fn.col(snowpark_name)
678
+ # Check if this is a struct field reference
400
679
  # Calculate the field path correctly based on where we found the column
401
- path = name_parts[column_part_index + 1 :]
402
- if is_struct and not global_config.spark_sql_caseSensitive:
403
- path = _match_path_to_struct(path, col_type)
404
-
405
- for field_name in path:
406
- col = col.getItem(field_name)
680
+ path = name_parts[column_part_index:]
407
681
 
408
- qualifiers = []
682
+ if len(path) > 1:
683
+ col = _resolve_struct_field(path, col, typer)
409
684
 
410
685
  typed_col = TypedColumn(col, lambda: typer.type(col))
411
686
  typed_col.set_qualifiers(qualifiers)
@@ -417,10 +692,11 @@ def map_unresolved_attribute(
417
692
  if final_catalog_database_info:
418
693
  typed_col.set_catalog_database_info(final_catalog_database_info)
419
694
 
695
+ # for struct columns when accessed, spark use just the leaf field name rather than fully attributed one
420
696
  return (name_parts[-1], typed_col)
421
697
 
422
698
 
423
- def _match_path_to_struct(path: list[str], col_type: StructType) -> list[str]:
699
+ def _match_path_to_struct(path: list[str], col_type: DataType) -> list[str]:
424
700
  """Takes a path of names and adjusts them to strictly match the field names in a StructType."""
425
701
  adjusted_path = []
426
702
  typ = col_type
@@ -438,7 +714,9 @@ def _match_path_to_struct(path: list[str], col_type: StructType) -> list[str]:
438
714
  typ = typ.value_type if isinstance(typ, MapType) else typ.element_type
439
715
  else:
440
716
  # If the type is not a struct, map, or array, we cannot access the field.
441
- raise AnalysisException(
717
+ exception = AnalysisException(
442
718
  f"[INVALID_EXTRACT_BASE_FIELD_TYPE] Can't extract a value from \"{'.'.join(path[:i])}\". Need a complex type [STRUCT, ARRAY, MAP] but got \"{typ}\"."
443
719
  )
720
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
721
+ raise exception
444
722
  return adjusted_path