snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -11,13 +11,18 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
11
11
  import pyspark.sql.connect.proto.types_pb2 as types_proto
12
12
  from google.protobuf.any_pb2 import Any
13
13
  from pyspark.errors.exceptions.base import AnalysisException
14
+ from pyspark.sql.connect import functions as pyspark_functions
14
15
 
15
16
  import snowflake.snowpark_connect.proto.snowflake_expression_ext_pb2 as snowflake_proto
16
- from snowflake import snowpark
17
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
18
+ from snowflake.snowpark.types import DayTimeIntervalType, YearMonthIntervalType
17
19
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
18
20
  from snowflake.snowpark_connect.config import global_config
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
23
  from snowflake.snowpark_connect.typed_column import TypedColumn
20
24
  from snowflake.snowpark_connect.utils.context import (
25
+ get_jpype_jclass_lock,
21
26
  get_sql_named_arg,
22
27
  get_sql_plan,
23
28
  get_sql_pos_arg,
@@ -32,8 +37,36 @@ from .typer import ExpressionTyper
32
37
 
33
38
  DECIMAL_RE = re.compile(r"decimal\((\d+), *(\d+)\)")
34
39
 
40
+ _INTERVAL_YEARMONTH_PATTERN_RE = re.compile(r"interval (year|month)( to (year|month))?")
41
+ _INTERVAL_DAYTIME_PATTERN_RE = re.compile(
42
+ r"interval (day|hour|minute|second)( to (day|hour|minute|second))?"
43
+ )
44
+
45
+ # Interval field mappings using proper constants
46
+ _YEAR_MONTH_FIELD_MAP = {
47
+ "year": YearMonthIntervalType.YEAR,
48
+ "month": YearMonthIntervalType.MONTH,
49
+ }
50
+
51
+ _DAY_TIME_FIELD_MAP = {
52
+ "day": DayTimeIntervalType.DAY,
53
+ "hour": DayTimeIntervalType.HOUR,
54
+ "minute": DayTimeIntervalType.MINUTE,
55
+ "second": DayTimeIntervalType.SECOND,
56
+ }
57
+
35
58
  _window_specs = ContextVar[dict[str, any]]("_window_specs", default={})
36
59
 
60
+ # Functions that can be called without parentheses in Spark SQL. Build up the list as we see more functions.
61
+ NILARY_FUNCTIONS = frozenset(
62
+ [
63
+ "current_date",
64
+ "current_timestamp",
65
+ "current_user",
66
+ "user",
67
+ ]
68
+ )
69
+
37
70
 
38
71
  def sql_parser():
39
72
  """
@@ -44,26 +77,35 @@ def sql_parser():
44
77
  """
45
78
 
46
79
  ts_type = global_config.spark_sql_timestampType
80
+ session_tz = global_config.spark_sql_session_timeZone
47
81
 
48
82
  if ts_type is not None:
49
83
  _get_sql_conf().get().setConfString("spark.sql.timestampType", str(ts_type))
50
84
 
85
+ if session_tz is not None:
86
+ _get_sql_conf().get().setConfString(
87
+ "spark.sql.session.timeZone", str(session_tz)
88
+ )
89
+
51
90
  return _get_sql_parser()
52
91
 
53
92
 
54
93
  @cache
55
94
  def _get_sql_parser():
56
- return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
95
+ with get_jpype_jclass_lock():
96
+ return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
57
97
 
58
98
 
59
99
  @cache
60
100
  def _get_sql_conf():
61
- return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
101
+ with get_jpype_jclass_lock():
102
+ return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
62
103
 
63
104
 
64
105
  @cache
65
106
  def _as_java_list():
66
- return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
107
+ with get_jpype_jclass_lock():
108
+ return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
67
109
 
68
110
 
69
111
  def as_java_list(obj):
@@ -72,7 +114,8 @@ def as_java_list(obj):
72
114
 
73
115
  @cache
74
116
  def _as_java_map():
75
- return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
117
+ with get_jpype_jclass_lock():
118
+ return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
76
119
 
77
120
 
78
121
  def as_java_map(obj):
@@ -89,6 +132,11 @@ def as_scala_seq(input):
89
132
  )
90
133
 
91
134
 
135
+ @cache
136
+ def _scala_some():
137
+ return jpype.JClass("scala.Some")
138
+
139
+
92
140
  def map_sql_expr(
93
141
  exp: expressions_proto.Expression,
94
142
  column_mapping: ColumnNameMap,
@@ -223,20 +271,52 @@ def apply_filter_clause(
223
271
 
224
272
 
225
273
  def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Expression:
226
- from snowflake.snowpark_connect.expression.map_expression import (
227
- map_single_column_expression,
228
- )
229
274
  from snowflake.snowpark_connect.relation.map_sql import map_logical_plan_relation
230
275
 
231
276
  class_name = str(exp.getClass().getSimpleName())
232
277
  match class_name:
233
278
  case "AggregateExpression":
234
- func_name = as_java_list(exp.children())[0].nodeName()
279
+ aggregate_func = as_java_list(exp.children())[0]
280
+ func_name = aggregate_func.nodeName()
235
281
  args = [
236
282
  map_logical_plan_expression(e)
237
- for e in as_java_list(as_java_list(exp.children())[0].children())
283
+ for e in list(as_java_list(aggregate_func.children()))
238
284
  ]
239
- proto = apply_filter_clause(func_name, args, exp)
285
+
286
+ # Special handling for percentile_cont and percentile_disc
287
+ # These functions have a 'reverse' property that indicates sort order
288
+ # Pass it as a 3rd argument (sort_order expression) without modifying children
289
+ if func_name.lower() in ("percentile_cont", "percentiledisc"):
290
+ # percentile_cont/disc should always have exactly 2 children: unresolved attribute and percentile value
291
+ if len(args) != 2:
292
+ exception = AssertionError(
293
+ f"{func_name} expected 2 args but got {len(args)}"
294
+ )
295
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
296
+ raise exception
297
+
298
+ reverse = bool(aggregate_func.reverse())
299
+
300
+ direction = (
301
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
302
+ if reverse
303
+ else expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
304
+ )
305
+
306
+ sort_order_expr = expressions_proto.Expression(
307
+ sort_order=expressions_proto.Expression.SortOrder(
308
+ child=args[0],
309
+ direction=direction,
310
+ )
311
+ )
312
+ args.append(sort_order_expr)
313
+ proto = apply_filter_clause(func_name, [args[0]], exp)
314
+ # second arg is a literal value and it doesn't make sense to apply filter on it.
315
+ # also skips filtering on sort_order.
316
+ proto.unresolved_function.arguments.append(args[1])
317
+ proto.unresolved_function.arguments.append(sort_order_expr)
318
+ else:
319
+ proto = apply_filter_clause(func_name, args, exp)
240
320
  case "Alias":
241
321
  proto = expressions_proto.Expression(
242
322
  alias=expressions_proto.Expression.Alias(
@@ -253,7 +333,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
253
333
  function_name="when",
254
334
  arguments=[
255
335
  map_logical_plan_expression(e)
256
- for e in as_java_list(exp.children())
336
+ for e in list(as_java_list(exp.children()))
257
337
  ],
258
338
  )
259
339
  )
@@ -267,7 +347,8 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
267
347
  )
268
348
  case "Coalesce":
269
349
  arguments = [
270
- map_logical_plan_expression(e) for e in as_java_list(exp.children())
350
+ map_logical_plan_expression(e)
351
+ for e in list(as_java_list(exp.children()))
271
352
  ]
272
353
 
273
354
  proto = expressions_proto.Expression(
@@ -277,6 +358,14 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
277
358
  )
278
359
  )
279
360
  case "CreateNamedStruct":
361
+ # Both struct() and named_struct() Spark SQL functions produce a CreateNamedStruct
362
+ # logical plan. We distinguish them by checking exp.prettyName():
363
+ # - "named_struct" -> explicit named_struct() call, requires name-value pairs
364
+ # - "struct" -> struct() call, field names are inferred from column expressions
365
+
366
+ # Additionally, struct(*) with star expansion is handled as named_struct.
367
+ # TODO - consider refactoring the impl and handle it in "struct" impl
368
+
280
369
  arg_exprs = [
281
370
  arg
282
371
  for k_v in zip(
@@ -284,17 +373,31 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
284
373
  )
285
374
  for arg in k_v
286
375
  ]
287
- struct_args = [
288
- map_logical_plan_expression(e)
289
- for e in arg_exprs
290
- if e.prettyName() != "NamePlaceholder"
291
- ]
292
- proto = expressions_proto.Expression(
293
- unresolved_function=expressions_proto.Expression.UnresolvedFunction(
294
- function_name="named_struct",
295
- arguments=struct_args,
376
+
377
+ if (
378
+ "unresolvedstar" in [e.prettyName() for e in arg_exprs]
379
+ or exp.prettyName() == "named_struct"
380
+ ):
381
+ struct_args = [
382
+ map_logical_plan_expression(e)
383
+ for e in arg_exprs
384
+ if e.prettyName() != "NamePlaceholder"
385
+ ]
386
+ proto = expressions_proto.Expression(
387
+ unresolved_function=expressions_proto.Expression.UnresolvedFunction(
388
+ function_name="named_struct",
389
+ arguments=struct_args,
390
+ )
391
+ )
392
+ else:
393
+ arg_exprs = [arg for arg in as_java_list(exp.valExprs())]
394
+ struct_args = [map_logical_plan_expression(e) for e in arg_exprs]
395
+ proto = expressions_proto.Expression(
396
+ unresolved_function=expressions_proto.Expression.UnresolvedFunction(
397
+ function_name="struct",
398
+ arguments=struct_args,
399
+ )
296
400
  )
297
- )
298
401
  case "Exists":
299
402
  rel_proto = map_logical_plan_relation(exp.plan())
300
403
  any_proto = Any()
@@ -308,22 +411,23 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
308
411
  )
309
412
  proto = expressions_proto.Expression(extension=any_proto)
310
413
  case "ExpressionWithUnresolvedIdentifier":
311
- plan_id = None
312
- identifierExpr = map_logical_plan_expression(exp.identifierExpr())
313
- session = snowpark.Session.get_active_session()
314
- m = ColumnNameMap([], [], None)
315
- expr = map_single_column_expression(
316
- identifierExpr, m, ExpressionTyper.dummy_typer(session)
414
+ from snowflake.snowpark_connect.relation.map_sql import (
415
+ get_relation_identifier_name,
317
416
  )
318
- value = session.range(1).select(expr[1].col).collect()[0][0]
319
417
 
320
- proto = expressions_proto.Expression(
321
- unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
322
- unparsed_identifier=str(value),
323
- plan_id=plan_id,
324
- ),
325
- )
326
- # TODO: support identifier referencing unresolved function
418
+ value = unquote_if_quoted(get_relation_identifier_name(exp))
419
+ if getattr(pyspark_functions, value.lower(), None) is not None:
420
+ unresolved_function = exp.exprBuilder().apply(
421
+ _scala_some()(value).toList()
422
+ )
423
+ proto = map_logical_plan_expression(unresolved_function)
424
+ else:
425
+ proto = expressions_proto.Expression(
426
+ unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
427
+ unparsed_identifier=str(value),
428
+ plan_id=None,
429
+ ),
430
+ )
327
431
  case "InSubquery":
328
432
  rel_proto = map_logical_plan_relation(exp.query().plan())
329
433
  any_proto = Any()
@@ -334,7 +438,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
334
438
  subquery_type=snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN,
335
439
  in_subquery_values=[
336
440
  map_logical_plan_expression(value)
337
- for value in as_java_list(exp.values())
441
+ for value in list(as_java_list(exp.values()))
338
442
  ],
339
443
  )
340
444
  )
@@ -343,7 +447,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
343
447
  case "LambdaFunction":
344
448
  arguments = [
345
449
  map_logical_plan_expression(arg).unresolved_named_lambda_variable
346
- for arg in as_java_list(exp.arguments())
450
+ for arg in list(as_java_list(exp.arguments()))
347
451
  ]
348
452
  proto = expressions_proto.Expression(
349
453
  lambda_function=expressions_proto.Expression.LambdaFunction(
@@ -352,19 +456,28 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
352
456
  )
353
457
  )
354
458
  case "Like" | "ILike" | "RLike":
459
+ arguments = [
460
+ map_logical_plan_expression(e)
461
+ for e in list(as_java_list(exp.children()))
462
+ ]
463
+ # exp.escapeChar() returns a JPype JChar - convert to string and create a literal
464
+ if getattr(exp, "escapeChar", None) is not None:
465
+ escape_char_str = str(exp.escapeChar())
466
+ escape_literal = expressions_proto.Expression(
467
+ literal=expressions_proto.Expression.Literal(string=escape_char_str)
468
+ )
469
+ arguments.append(escape_literal)
355
470
  proto = expressions_proto.Expression(
356
471
  unresolved_function=expressions_proto.Expression.UnresolvedFunction(
357
472
  function_name=class_name.lower(),
358
- arguments=[
359
- map_logical_plan_expression(e)
360
- for e in as_java_list(exp.children())
361
- ],
473
+ arguments=arguments,
362
474
  )
363
475
  )
364
476
  case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
365
- patterns = as_java_list(exp.patterns())
477
+ patterns = list(as_java_list(exp.patterns()))
366
478
  arguments = [
367
- map_logical_plan_expression(e) for e in as_java_list(exp.children())
479
+ map_logical_plan_expression(e)
480
+ for e in list(as_java_list(exp.children()))
368
481
  ]
369
482
  arguments += [map_logical_plan_expression(e) for e in patterns]
370
483
  proto = expressions_proto.Expression(
@@ -384,8 +497,106 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
384
497
  type_value = types_proto.DataType()
385
498
  elif type_name == "binary":
386
499
  type_value = bytes(type_value)
387
- elif type_name.startswith("interval "):
388
- type_name = "day_time_interval" # TODO
500
+ elif year_month_match := _INTERVAL_YEARMONTH_PATTERN_RE.match(type_name):
501
+ # Extract start and end fields for year-month intervals
502
+ start_field_name = year_month_match.group(1) # 'year' or 'month'
503
+ end_field_name = (
504
+ year_month_match.group(3)
505
+ if year_month_match.group(3)
506
+ else start_field_name
507
+ )
508
+
509
+ # Validate field names exist in mapping
510
+ start_field = _YEAR_MONTH_FIELD_MAP.get(start_field_name)
511
+ end_field = _YEAR_MONTH_FIELD_MAP.get(end_field_name)
512
+
513
+ if start_field is None:
514
+ exception = AnalysisException(
515
+ f"Invalid year-month interval start field: '{start_field_name}'. Expected 'year' or 'month'."
516
+ )
517
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
518
+ raise exception
519
+ if end_field is None:
520
+ exception = AnalysisException(
521
+ f"Invalid year-month interval end field: '{end_field_name}'. Expected 'year' or 'month'."
522
+ )
523
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
524
+ raise exception
525
+
526
+ # Validate field ordering (start_field should be <= end_field)
527
+ if start_field > end_field:
528
+ exception = AnalysisException(
529
+ f"Invalid year-month interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
530
+ )
531
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
532
+ raise exception
533
+
534
+ # Use extension for year-month intervals to preserve start/end field info
535
+ literal = expressions_proto.Expression.Literal(
536
+ year_month_interval=type_value
537
+ )
538
+ any_proto = Any()
539
+ any_proto.Pack(
540
+ snowflake_proto.ExpExtension(
541
+ interval_literal=snowflake_proto.IntervalLiteralExpression(
542
+ literal=literal,
543
+ start_field=start_field,
544
+ end_field=end_field,
545
+ )
546
+ )
547
+ )
548
+ return expressions_proto.Expression(extension=any_proto)
549
+ elif day_time_match := _INTERVAL_DAYTIME_PATTERN_RE.match(type_name):
550
+ # Extract start and end fields for day-time intervals
551
+ start_field_name = day_time_match.group(
552
+ 1
553
+ ) # 'day', 'hour', 'minute', 'second'
554
+ end_field_name = (
555
+ day_time_match.group(3)
556
+ if day_time_match.group(3)
557
+ else start_field_name
558
+ )
559
+
560
+ # Validate field names exist in mapping
561
+ start_field = _DAY_TIME_FIELD_MAP.get(start_field_name)
562
+ end_field = _DAY_TIME_FIELD_MAP.get(end_field_name)
563
+
564
+ if start_field is None:
565
+ exception = AnalysisException(
566
+ f"Invalid day-time interval start field: '{start_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
567
+ )
568
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
569
+ raise exception
570
+ if end_field is None:
571
+ exception = AnalysisException(
572
+ f"Invalid day-time interval end field: '{end_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
573
+ )
574
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
575
+ raise exception
576
+
577
+ # Validate field ordering (start_field should be <= end_field)
578
+ if start_field > end_field:
579
+ exception = AnalysisException(
580
+ f"Invalid day-time interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
581
+ )
582
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
583
+ raise exception
584
+
585
+ # Use extension for day-time intervals to preserve start/end field info
586
+ literal = expressions_proto.Expression.Literal(
587
+ day_time_interval=type_value
588
+ )
589
+ any_proto = Any()
590
+ any_proto.Pack(
591
+ snowflake_proto.ExpExtension(
592
+ interval_literal=snowflake_proto.IntervalLiteralExpression(
593
+ literal=literal,
594
+ start_field=start_field,
595
+ end_field=end_field,
596
+ )
597
+ )
598
+ )
599
+ return expressions_proto.Expression(extension=any_proto)
389
600
  elif m := DECIMAL_RE.fullmatch(type_name):
390
601
  type_name = "decimal"
391
602
  type_value = expressions_proto.Expression.Literal.Decimal(
@@ -425,19 +636,27 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
425
636
  name = str(exp.name())
426
637
  value = get_sql_named_arg(name)
427
638
  if not value.HasField("literal_type"):
428
- raise AnalysisException(f"Found an unbound parameter {name!r}")
639
+ exception = AnalysisException(f"Found an unbound parameter {name!r}")
640
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
641
+ raise exception
429
642
  proto = expressions_proto.Expression(literal=value)
430
643
  case "NamePlaceholder$":
431
644
  # This is a placeholder for an expression name to be resolved later.
432
- raise SnowparkConnectNotImplementedError(
645
+ exception = SnowparkConnectNotImplementedError(
433
646
  "NamePlaceholder is not supported in SQL expressions."
434
647
  )
648
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
649
+ raise exception
435
650
  case "PosParameter":
436
651
  pos = exp.pos()
437
652
  try:
438
653
  value = get_sql_pos_arg(pos)
439
654
  except KeyError:
440
- raise AnalysisException(f"Found an unbound parameter at position {pos}")
655
+ exception = AnalysisException(
656
+ f"Found an unbound parameter at position {pos}"
657
+ )
658
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
659
+ raise exception
441
660
  proto = expressions_proto.Expression(literal=value)
442
661
  case "ScalarSubquery":
443
662
  rel_proto = map_logical_plan_relation(exp.plan())
@@ -507,7 +726,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
507
726
  ]
508
727
  + [
509
728
  map_logical_plan_expression(e)
510
- for e in as_java_list(exp.children())
729
+ for e in list(as_java_list(exp.children()))
511
730
  ],
512
731
  )
513
732
  )
@@ -515,32 +734,42 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
515
734
  proto = map_logical_plan_expression(exp.child())
516
735
  case "UnresolvedAttribute":
517
736
  *parents, name = as_java_list(exp.nameParts())
518
- plan_id = None
519
- is_fully_qualified_name = False
520
- if parents:
521
- parent_name = ".".join(str(p) for p in parents)
522
- plan_id = get_sql_plan(parent_name)
523
- # If no plan_id exists, treat the column name as fully qualified by its parent.
524
- if plan_id is None:
737
+ if not parents and name.lower() in NILARY_FUNCTIONS:
738
+ # this is very likely a function call without parentheses disguised as an attribute, e.g. `CURRENT_TIMESTAMP` instead of `CURRENT_TIMESTAMP()`.
739
+ # Note limitation: this only works when these names are not real table column names, which should be very rare and is bad practice. E.g., mytable(CURRENT_TIMESTAMP, col2, col3).
740
+ proto = expressions_proto.Expression(
741
+ unresolved_function=expressions_proto.Expression.UnresolvedFunction(
742
+ function_name=name.lower(),
743
+ is_distinct=False,
744
+ )
745
+ )
746
+ else:
747
+ plan_id = None
748
+ is_fully_qualified_name = False
749
+ if parents:
750
+ parent_name = ".".join(str(p) for p in parents)
751
+ plan_id = get_sql_plan(parent_name)
752
+ # If no plan_id exists, treat the column name as fully qualified by its parent.
753
+ if plan_id is None:
754
+ # There's a difference in how Spark sql and dataframe operation passes backticks in column names.
755
+ # Spark sql un-escapes the backticks instead of passing the raw string. This
756
+ # logic is to escape backticks again to make it consistent with regular spark functions.
757
+ parent_chain = ".".join(escape_spark_quoted(p) for p in parents)
758
+ name = f"{parent_chain}.{escape_spark_quoted(name)}"
759
+ is_fully_qualified_name = True
760
+
761
+ if not is_fully_qualified_name:
525
762
  # There's a difference in how Spark sql and dataframe operation passes backticks in column names.
526
763
  # Spark sql un-escapes the backticks instead of passing the raw string. This
527
764
  # logic is to escape backticks again to make it consistent with regular spark functions.
528
- parent_chain = ".".join(escape_spark_quoted(p) for p in parents)
529
- name = f"{parent_chain}.{escape_spark_quoted(name)}"
530
- is_fully_qualified_name = True
531
-
532
- if not is_fully_qualified_name:
533
- # There's a difference in how Spark sql and dataframe operation passes backticks in column names.
534
- # Spark sql un-escapes the backticks instead of passing the raw string. This
535
- # logic is to escape backticks again to make it consistent with regular spark functions.
536
- name = escape_spark_quoted(name)
765
+ name = escape_spark_quoted(name)
537
766
 
538
- proto = expressions_proto.Expression(
539
- unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
540
- unparsed_identifier=str(name),
541
- plan_id=plan_id,
542
- ),
543
- )
767
+ proto = expressions_proto.Expression(
768
+ unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
769
+ unparsed_identifier=str(name),
770
+ plan_id=plan_id,
771
+ ),
772
+ )
544
773
  case "UnresolvedExtractValue":
545
774
  proto = expressions_proto.Expression(
546
775
  unresolved_extract_value=expressions_proto.Expression.UnresolvedExtractValue(
@@ -550,18 +779,20 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
550
779
  )
551
780
  case "UnresolvedFunction":
552
781
  func_name = ".".join(
553
- str(part) for part in as_java_list(exp.nameParts())
782
+ str(part) for part in list(as_java_list(exp.nameParts()))
554
783
  ).lower()
555
784
  args = [
556
785
  map_logical_plan_expression(arg)
557
- for arg in as_java_list(exp.arguments())
786
+ for arg in list(as_java_list(exp.arguments()))
558
787
  ]
559
788
 
560
789
  proto = apply_filter_clause(func_name, args, exp, exp.isDistinct())
561
790
  case "UnresolvedNamedLambdaVariable":
562
791
  proto = expressions_proto.Expression(
563
792
  unresolved_named_lambda_variable=expressions_proto.Expression.UnresolvedNamedLambdaVariable(
564
- name_parts=[str(part) for part in as_java_list(exp.nameParts())],
793
+ name_parts=[
794
+ str(part) for part in list(as_java_list(exp.nameParts()))
795
+ ],
565
796
  )
566
797
  )
567
798
  case "UnresolvedStar":
@@ -582,9 +813,11 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
582
813
  # Build Window expression
583
814
  proto = get_window_expression_proto(window_spec, exp.child())
584
815
  else:
585
- raise AnalysisException(
816
+ exception = AnalysisException(
586
817
  f"Window specification not found {window_spec_reference!r}"
587
818
  )
819
+ attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
820
+ raise exception
588
821
  case "UTF8String":
589
822
  proto = expressions_proto.Expression(
590
823
  literal=expressions_proto.Expression.Literal(
@@ -614,13 +847,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
614
847
  function_name=proto_func,
615
848
  arguments=[
616
849
  map_logical_plan_expression(arg)
617
- for arg in as_java_list(exp.children())
850
+ for arg in list(as_java_list(exp.children()))
618
851
  ],
619
852
  )
620
853
  )
621
854
 
622
855
  case other:
623
- raise SnowparkConnectNotImplementedError(f"Not implemented: {other}")
856
+ exception = SnowparkConnectNotImplementedError(f"Not implemented: {other}")
857
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
858
+ raise exception
624
859
 
625
860
  return proto
626
861
 
@@ -643,11 +878,11 @@ def get_window_expression_proto(
643
878
  window_function=map_logical_plan_expression(window_function),
644
879
  partition_spec=[
645
880
  map_logical_plan_expression(e)
646
- for e in as_java_list(window_spec.partitionSpec())
881
+ for e in list(as_java_list(window_spec.partitionSpec()))
647
882
  ],
648
883
  order_spec=[
649
884
  map_logical_plan_expression(e).sort_order
650
- for e in as_java_list(window_spec.orderSpec())
885
+ for e in list(as_java_list(window_spec.orderSpec()))
651
886
  ],
652
887
  frame_spec=frame_spec_proto,
653
888
  )