snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +200 -102
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +53 -8
- snowflake/snowpark_connect/expression/map_extension.py +37 -11
- snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
- snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
- snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
- snowflake/snowpark_connect/relation/map_extension.py +38 -17
- snowflake/snowpark_connect/relation/map_join.py +26 -12
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +124 -25
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +160 -48
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +73 -21
- snowflake/snowpark_connect/type_mapping.py +90 -20
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +24 -4
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,8 @@ from snowflake.snowpark.types import (
|
|
|
13
13
|
YearMonthIntervalType,
|
|
14
14
|
)
|
|
15
15
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
16
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
17
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
16
18
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
17
19
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
18
20
|
from snowflake.snowpark_connect.utils.context import (
|
|
@@ -78,9 +80,11 @@ def map_extension(
|
|
|
78
80
|
elif value.HasField("unresolved_attribute"):
|
|
79
81
|
name = "__" + key + "__" + exp_name[0]
|
|
80
82
|
else:
|
|
81
|
-
|
|
83
|
+
exception = SnowparkConnectNotImplementedError(
|
|
82
84
|
"Named argument not supported yet for this input."
|
|
83
85
|
)
|
|
86
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
87
|
+
raise exception
|
|
84
88
|
return [name], typed_col
|
|
85
89
|
|
|
86
90
|
case "interval_literal":
|
|
@@ -152,9 +156,11 @@ def map_extension(
|
|
|
152
156
|
|
|
153
157
|
queries = df.queries["queries"]
|
|
154
158
|
if len(queries) != 1:
|
|
155
|
-
|
|
159
|
+
exception = SnowparkConnectNotImplementedError(
|
|
156
160
|
f"Unexpected number of queries: {len(queries)}"
|
|
157
161
|
)
|
|
162
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
163
|
+
raise exception
|
|
158
164
|
query = f"({queries[0]})"
|
|
159
165
|
|
|
160
166
|
match extension.subquery_expression.subquery_type:
|
|
@@ -168,7 +174,13 @@ def map_extension(
|
|
|
168
174
|
result_type = BooleanType()
|
|
169
175
|
case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_TABLE_ARG:
|
|
170
176
|
# TODO: Currently, map_sql.py handles this, so we never end up here.
|
|
171
|
-
|
|
177
|
+
exception = SnowparkConnectNotImplementedError(
|
|
178
|
+
"Unexpected table arg"
|
|
179
|
+
)
|
|
180
|
+
attach_custom_error_code(
|
|
181
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
182
|
+
)
|
|
183
|
+
raise exception
|
|
172
184
|
case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN:
|
|
173
185
|
cols = [
|
|
174
186
|
map_expression(e, column_mapping, typer)
|
|
@@ -184,14 +196,22 @@ def map_extension(
|
|
|
184
196
|
)
|
|
185
197
|
result_type = BooleanType()
|
|
186
198
|
case other:
|
|
187
|
-
|
|
199
|
+
exception = SnowparkConnectNotImplementedError(
|
|
188
200
|
f"Unexpected subquery type: {other}"
|
|
189
201
|
)
|
|
202
|
+
attach_custom_error_code(
|
|
203
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
204
|
+
)
|
|
205
|
+
raise exception
|
|
190
206
|
|
|
191
207
|
return [name], TypedColumn(result_exp, lambda: [result_type])
|
|
192
208
|
|
|
193
209
|
case other:
|
|
194
|
-
|
|
210
|
+
exception = SnowparkConnectNotImplementedError(
|
|
211
|
+
f"Unexpected extension {other}"
|
|
212
|
+
)
|
|
213
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
214
|
+
raise exception
|
|
195
215
|
|
|
196
216
|
|
|
197
217
|
def _format_year_month_interval(
|
|
@@ -257,6 +277,9 @@ def _format_day_time_interval(
|
|
|
257
277
|
if is_negative:
|
|
258
278
|
days = -days
|
|
259
279
|
|
|
280
|
+
# Calculate days string representation (handle -0 case)
|
|
281
|
+
days_str = "-0" if (is_negative and days == 0) else str(days)
|
|
282
|
+
|
|
260
283
|
# Format based on the specific start/end field context
|
|
261
284
|
if (
|
|
262
285
|
start_field == DayTimeIntervalType.DAY and end_field == DayTimeIntervalType.DAY
|
|
@@ -324,7 +347,10 @@ def _format_day_time_interval(
|
|
|
324
347
|
start_field == DayTimeIntervalType.HOUR
|
|
325
348
|
and end_field == DayTimeIntervalType.MINUTE
|
|
326
349
|
): # HOUR TO MINUTE
|
|
327
|
-
|
|
350
|
+
if is_negative:
|
|
351
|
+
str_value = f"INTERVAL '-{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
|
|
352
|
+
else:
|
|
353
|
+
str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
|
|
328
354
|
elif (
|
|
329
355
|
start_field == DayTimeIntervalType.HOUR
|
|
330
356
|
and end_field == DayTimeIntervalType.SECOND
|
|
@@ -348,21 +374,21 @@ def _format_day_time_interval(
|
|
|
348
374
|
and end_field == DayTimeIntervalType.SECOND
|
|
349
375
|
): # DAY TO SECOND
|
|
350
376
|
if seconds == int(seconds):
|
|
351
|
-
str_value = f"INTERVAL '{
|
|
377
|
+
str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
|
|
352
378
|
else:
|
|
353
379
|
seconds_str = _format_seconds_precise(seconds)
|
|
354
|
-
str_value = f"INTERVAL '{
|
|
380
|
+
str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
|
|
355
381
|
else:
|
|
356
382
|
# Fallback - use smart formatting like the original literal.py logic
|
|
357
|
-
if days
|
|
383
|
+
if days >= 0:
|
|
358
384
|
if hours == 0 and minutes == 0 and seconds == 0:
|
|
359
385
|
str_value = f"INTERVAL '{int(days)}' DAY"
|
|
360
386
|
else:
|
|
361
387
|
if seconds == int(seconds):
|
|
362
|
-
str_value = f"INTERVAL '{
|
|
388
|
+
str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
|
|
363
389
|
else:
|
|
364
390
|
seconds_str = _format_seconds_precise(seconds)
|
|
365
|
-
str_value = f"INTERVAL '{
|
|
391
|
+
str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
|
|
366
392
|
elif hours > 0:
|
|
367
393
|
if minutes == 0 and seconds == 0:
|
|
368
394
|
str_value = f"INTERVAL '{_format_time_component(hours)}' HOUR"
|
|
@@ -18,8 +18,11 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quot
|
|
|
18
18
|
from snowflake.snowpark.types import DayTimeIntervalType, YearMonthIntervalType
|
|
19
19
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
20
20
|
from snowflake.snowpark_connect.config import global_config
|
|
21
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
22
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
21
23
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
22
24
|
from snowflake.snowpark_connect.utils.context import (
|
|
25
|
+
get_jpype_jclass_lock,
|
|
23
26
|
get_sql_named_arg,
|
|
24
27
|
get_sql_plan,
|
|
25
28
|
get_sql_pos_arg,
|
|
@@ -73,17 +76,20 @@ def sql_parser():
|
|
|
73
76
|
|
|
74
77
|
@cache
|
|
75
78
|
def _get_sql_parser():
|
|
76
|
-
|
|
79
|
+
with get_jpype_jclass_lock():
|
|
80
|
+
return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
|
|
77
81
|
|
|
78
82
|
|
|
79
83
|
@cache
|
|
80
84
|
def _get_sql_conf():
|
|
81
|
-
|
|
85
|
+
with get_jpype_jclass_lock():
|
|
86
|
+
return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
|
|
82
87
|
|
|
83
88
|
|
|
84
89
|
@cache
|
|
85
90
|
def _as_java_list():
|
|
86
|
-
|
|
91
|
+
with get_jpype_jclass_lock():
|
|
92
|
+
return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
|
|
87
93
|
|
|
88
94
|
|
|
89
95
|
def as_java_list(obj):
|
|
@@ -92,7 +98,8 @@ def as_java_list(obj):
|
|
|
92
98
|
|
|
93
99
|
@cache
|
|
94
100
|
def _as_java_map():
|
|
95
|
-
|
|
101
|
+
with get_jpype_jclass_lock():
|
|
102
|
+
return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
|
|
96
103
|
|
|
97
104
|
|
|
98
105
|
def as_java_map(obj):
|
|
@@ -253,12 +260,47 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
253
260
|
class_name = str(exp.getClass().getSimpleName())
|
|
254
261
|
match class_name:
|
|
255
262
|
case "AggregateExpression":
|
|
256
|
-
|
|
263
|
+
aggregate_func = as_java_list(exp.children())[0]
|
|
264
|
+
func_name = aggregate_func.nodeName()
|
|
257
265
|
args = [
|
|
258
266
|
map_logical_plan_expression(e)
|
|
259
|
-
for e in
|
|
267
|
+
for e in list(as_java_list(aggregate_func.children()))
|
|
260
268
|
]
|
|
261
|
-
|
|
269
|
+
|
|
270
|
+
# Special handling for percentile_cont and percentile_disc
|
|
271
|
+
# These functions have a 'reverse' property that indicates sort order
|
|
272
|
+
# Pass it as a 3rd argument (sort_order expression) without modifying children
|
|
273
|
+
if func_name.lower() in ("percentile_cont", "percentiledisc"):
|
|
274
|
+
# percentile_cont/disc should always have exactly 2 children: unresolved attribute and percentile value
|
|
275
|
+
if len(args) != 2:
|
|
276
|
+
exception = AssertionError(
|
|
277
|
+
f"{func_name} expected 2 args but got {len(args)}"
|
|
278
|
+
)
|
|
279
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
280
|
+
raise exception
|
|
281
|
+
|
|
282
|
+
reverse = bool(aggregate_func.reverse())
|
|
283
|
+
|
|
284
|
+
direction = (
|
|
285
|
+
expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
|
|
286
|
+
if reverse
|
|
287
|
+
else expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
sort_order_expr = expressions_proto.Expression(
|
|
291
|
+
sort_order=expressions_proto.Expression.SortOrder(
|
|
292
|
+
child=args[0],
|
|
293
|
+
direction=direction,
|
|
294
|
+
)
|
|
295
|
+
)
|
|
296
|
+
args.append(sort_order_expr)
|
|
297
|
+
proto = apply_filter_clause(func_name, [args[0]], exp)
|
|
298
|
+
# second arg is a literal value and it doesn't make sense to apply filter on it.
|
|
299
|
+
# also skips filtering on sort_order.
|
|
300
|
+
proto.unresolved_function.arguments.append(args[1])
|
|
301
|
+
proto.unresolved_function.arguments.append(sort_order_expr)
|
|
302
|
+
else:
|
|
303
|
+
proto = apply_filter_clause(func_name, args, exp)
|
|
262
304
|
case "Alias":
|
|
263
305
|
proto = expressions_proto.Expression(
|
|
264
306
|
alias=expressions_proto.Expression.Alias(
|
|
@@ -275,7 +317,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
275
317
|
function_name="when",
|
|
276
318
|
arguments=[
|
|
277
319
|
map_logical_plan_expression(e)
|
|
278
|
-
for e in as_java_list(exp.children())
|
|
320
|
+
for e in list(as_java_list(exp.children()))
|
|
279
321
|
],
|
|
280
322
|
)
|
|
281
323
|
)
|
|
@@ -289,7 +331,8 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
289
331
|
)
|
|
290
332
|
case "Coalesce":
|
|
291
333
|
arguments = [
|
|
292
|
-
map_logical_plan_expression(e)
|
|
334
|
+
map_logical_plan_expression(e)
|
|
335
|
+
for e in list(as_java_list(exp.children()))
|
|
293
336
|
]
|
|
294
337
|
|
|
295
338
|
proto = expressions_proto.Expression(
|
|
@@ -357,7 +400,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
357
400
|
subquery_type=snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN,
|
|
358
401
|
in_subquery_values=[
|
|
359
402
|
map_logical_plan_expression(value)
|
|
360
|
-
for value in as_java_list(exp.values())
|
|
403
|
+
for value in list(as_java_list(exp.values()))
|
|
361
404
|
],
|
|
362
405
|
)
|
|
363
406
|
)
|
|
@@ -366,7 +409,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
366
409
|
case "LambdaFunction":
|
|
367
410
|
arguments = [
|
|
368
411
|
map_logical_plan_expression(arg).unresolved_named_lambda_variable
|
|
369
|
-
for arg in as_java_list(exp.arguments())
|
|
412
|
+
for arg in list(as_java_list(exp.arguments()))
|
|
370
413
|
]
|
|
371
414
|
proto = expressions_proto.Expression(
|
|
372
415
|
lambda_function=expressions_proto.Expression.LambdaFunction(
|
|
@@ -380,14 +423,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
380
423
|
function_name=class_name.lower(),
|
|
381
424
|
arguments=[
|
|
382
425
|
map_logical_plan_expression(e)
|
|
383
|
-
for e in as_java_list(exp.children())
|
|
426
|
+
for e in list(as_java_list(exp.children()))
|
|
384
427
|
],
|
|
385
428
|
)
|
|
386
429
|
)
|
|
387
430
|
case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
|
|
388
|
-
patterns = as_java_list(exp.patterns())
|
|
431
|
+
patterns = list(as_java_list(exp.patterns()))
|
|
389
432
|
arguments = [
|
|
390
|
-
map_logical_plan_expression(e)
|
|
433
|
+
map_logical_plan_expression(e)
|
|
434
|
+
for e in list(as_java_list(exp.children()))
|
|
391
435
|
]
|
|
392
436
|
arguments += [map_logical_plan_expression(e) for e in patterns]
|
|
393
437
|
proto = expressions_proto.Expression(
|
|
@@ -421,19 +465,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
421
465
|
end_field = _YEAR_MONTH_FIELD_MAP.get(end_field_name)
|
|
422
466
|
|
|
423
467
|
if start_field is None:
|
|
424
|
-
|
|
468
|
+
exception = AnalysisException(
|
|
425
469
|
f"Invalid year-month interval start field: '{start_field_name}'. Expected 'year' or 'month'."
|
|
426
470
|
)
|
|
471
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
472
|
+
raise exception
|
|
427
473
|
if end_field is None:
|
|
428
|
-
|
|
474
|
+
exception = AnalysisException(
|
|
429
475
|
f"Invalid year-month interval end field: '{end_field_name}'. Expected 'year' or 'month'."
|
|
430
476
|
)
|
|
477
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
478
|
+
raise exception
|
|
431
479
|
|
|
432
480
|
# Validate field ordering (start_field should be <= end_field)
|
|
433
481
|
if start_field > end_field:
|
|
434
|
-
|
|
482
|
+
exception = AnalysisException(
|
|
435
483
|
f"Invalid year-month interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
|
|
436
484
|
)
|
|
485
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
486
|
+
raise exception
|
|
437
487
|
|
|
438
488
|
# Use extension for year-month intervals to preserve start/end field info
|
|
439
489
|
literal = expressions_proto.Expression.Literal(
|
|
@@ -466,19 +516,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
466
516
|
end_field = _DAY_TIME_FIELD_MAP.get(end_field_name)
|
|
467
517
|
|
|
468
518
|
if start_field is None:
|
|
469
|
-
|
|
519
|
+
exception = AnalysisException(
|
|
470
520
|
f"Invalid day-time interval start field: '{start_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
|
|
471
521
|
)
|
|
522
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
523
|
+
raise exception
|
|
472
524
|
if end_field is None:
|
|
473
|
-
|
|
525
|
+
exception = AnalysisException(
|
|
474
526
|
f"Invalid day-time interval end field: '{end_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
|
|
475
527
|
)
|
|
528
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
529
|
+
raise exception
|
|
476
530
|
|
|
477
531
|
# Validate field ordering (start_field should be <= end_field)
|
|
478
532
|
if start_field > end_field:
|
|
479
|
-
|
|
533
|
+
exception = AnalysisException(
|
|
480
534
|
f"Invalid day-time interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
|
|
481
535
|
)
|
|
536
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
537
|
+
raise exception
|
|
482
538
|
|
|
483
539
|
# Use extension for day-time intervals to preserve start/end field info
|
|
484
540
|
literal = expressions_proto.Expression.Literal(
|
|
@@ -534,19 +590,27 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
534
590
|
name = str(exp.name())
|
|
535
591
|
value = get_sql_named_arg(name)
|
|
536
592
|
if not value.HasField("literal_type"):
|
|
537
|
-
|
|
593
|
+
exception = AnalysisException(f"Found an unbound parameter {name!r}")
|
|
594
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
|
|
595
|
+
raise exception
|
|
538
596
|
proto = expressions_proto.Expression(literal=value)
|
|
539
597
|
case "NamePlaceholder$":
|
|
540
598
|
# This is a placeholder for an expression name to be resolved later.
|
|
541
|
-
|
|
599
|
+
exception = SnowparkConnectNotImplementedError(
|
|
542
600
|
"NamePlaceholder is not supported in SQL expressions."
|
|
543
601
|
)
|
|
602
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
603
|
+
raise exception
|
|
544
604
|
case "PosParameter":
|
|
545
605
|
pos = exp.pos()
|
|
546
606
|
try:
|
|
547
607
|
value = get_sql_pos_arg(pos)
|
|
548
608
|
except KeyError:
|
|
549
|
-
|
|
609
|
+
exception = AnalysisException(
|
|
610
|
+
f"Found an unbound parameter at position {pos}"
|
|
611
|
+
)
|
|
612
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
|
|
613
|
+
raise exception
|
|
550
614
|
proto = expressions_proto.Expression(literal=value)
|
|
551
615
|
case "ScalarSubquery":
|
|
552
616
|
rel_proto = map_logical_plan_relation(exp.plan())
|
|
@@ -616,7 +680,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
616
680
|
]
|
|
617
681
|
+ [
|
|
618
682
|
map_logical_plan_expression(e)
|
|
619
|
-
for e in as_java_list(exp.children())
|
|
683
|
+
for e in list(as_java_list(exp.children()))
|
|
620
684
|
],
|
|
621
685
|
)
|
|
622
686
|
)
|
|
@@ -659,18 +723,20 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
659
723
|
)
|
|
660
724
|
case "UnresolvedFunction":
|
|
661
725
|
func_name = ".".join(
|
|
662
|
-
str(part) for part in as_java_list(exp.nameParts())
|
|
726
|
+
str(part) for part in list(as_java_list(exp.nameParts()))
|
|
663
727
|
).lower()
|
|
664
728
|
args = [
|
|
665
729
|
map_logical_plan_expression(arg)
|
|
666
|
-
for arg in as_java_list(exp.arguments())
|
|
730
|
+
for arg in list(as_java_list(exp.arguments()))
|
|
667
731
|
]
|
|
668
732
|
|
|
669
733
|
proto = apply_filter_clause(func_name, args, exp, exp.isDistinct())
|
|
670
734
|
case "UnresolvedNamedLambdaVariable":
|
|
671
735
|
proto = expressions_proto.Expression(
|
|
672
736
|
unresolved_named_lambda_variable=expressions_proto.Expression.UnresolvedNamedLambdaVariable(
|
|
673
|
-
name_parts=[
|
|
737
|
+
name_parts=[
|
|
738
|
+
str(part) for part in list(as_java_list(exp.nameParts()))
|
|
739
|
+
],
|
|
674
740
|
)
|
|
675
741
|
)
|
|
676
742
|
case "UnresolvedStar":
|
|
@@ -691,9 +757,11 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
691
757
|
# Build Window expression
|
|
692
758
|
proto = get_window_expression_proto(window_spec, exp.child())
|
|
693
759
|
else:
|
|
694
|
-
|
|
760
|
+
exception = AnalysisException(
|
|
695
761
|
f"Window specification not found {window_spec_reference!r}"
|
|
696
762
|
)
|
|
763
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
764
|
+
raise exception
|
|
697
765
|
case "UTF8String":
|
|
698
766
|
proto = expressions_proto.Expression(
|
|
699
767
|
literal=expressions_proto.Expression.Literal(
|
|
@@ -723,13 +791,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
723
791
|
function_name=proto_func,
|
|
724
792
|
arguments=[
|
|
725
793
|
map_logical_plan_expression(arg)
|
|
726
|
-
for arg in as_java_list(exp.children())
|
|
794
|
+
for arg in list(as_java_list(exp.children()))
|
|
727
795
|
],
|
|
728
796
|
)
|
|
729
797
|
)
|
|
730
798
|
|
|
731
799
|
case other:
|
|
732
|
-
|
|
800
|
+
exception = SnowparkConnectNotImplementedError(f"Not implemented: {other}")
|
|
801
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
802
|
+
raise exception
|
|
733
803
|
|
|
734
804
|
return proto
|
|
735
805
|
|
|
@@ -752,11 +822,11 @@ def get_window_expression_proto(
|
|
|
752
822
|
window_function=map_logical_plan_expression(window_function),
|
|
753
823
|
partition_spec=[
|
|
754
824
|
map_logical_plan_expression(e)
|
|
755
|
-
for e in as_java_list(window_spec.partitionSpec())
|
|
825
|
+
for e in list(as_java_list(window_spec.partitionSpec()))
|
|
756
826
|
],
|
|
757
827
|
order_spec=[
|
|
758
828
|
map_logical_plan_expression(e).sort_order
|
|
759
|
-
for e in as_java_list(window_spec.orderSpec())
|
|
829
|
+
for e in list(as_java_list(window_spec.orderSpec()))
|
|
760
830
|
],
|
|
761
831
|
frame_spec=frame_spec_proto,
|
|
762
832
|
)
|
|
@@ -10,6 +10,8 @@ from snowflake import snowpark
|
|
|
10
10
|
from snowflake.snowpark.types import MapType, StructType, VariantType
|
|
11
11
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
12
12
|
from snowflake.snowpark_connect.config import global_config
|
|
13
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
14
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
13
15
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
14
16
|
from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
|
|
15
17
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
@@ -54,7 +56,11 @@ def cache_external_udf_wrapper(from_register_udf: bool):
|
|
|
54
56
|
case "python_udf":
|
|
55
57
|
pass
|
|
56
58
|
case _:
|
|
57
|
-
|
|
59
|
+
exception = ValueError(f"Unsupported UDF type: {function_type}")
|
|
60
|
+
attach_custom_error_code(
|
|
61
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
62
|
+
)
|
|
63
|
+
raise exception
|
|
58
64
|
|
|
59
65
|
return cached_udf
|
|
60
66
|
|
|
@@ -97,9 +103,11 @@ def register_udf(
|
|
|
97
103
|
case "scalar_scala_udf":
|
|
98
104
|
output_type = udf_proto.scalar_scala_udf.outputType
|
|
99
105
|
case _:
|
|
100
|
-
|
|
106
|
+
exception = ValueError(
|
|
101
107
|
f"Unsupported UDF type: {udf_proto.WhichOneof('function')}"
|
|
102
108
|
)
|
|
109
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
110
|
+
raise exception
|
|
103
111
|
processed_return_type, original_return_type = process_udf_return_type(output_type)
|
|
104
112
|
session = get_or_create_snowpark_session()
|
|
105
113
|
kwargs = {
|
|
@@ -15,6 +15,8 @@ from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
|
15
15
|
from snowflake.snowpark.types import ArrayType, LongType, MapType, StructType
|
|
16
16
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
17
17
|
from snowflake.snowpark_connect.config import global_config
|
|
18
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
19
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
18
20
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
19
21
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
20
22
|
from snowflake.snowpark_connect.utils.context import (
|
|
@@ -85,9 +87,11 @@ def map_unresolved_attribute(
|
|
|
85
87
|
grouping_spark_columns = get_current_grouping_columns()
|
|
86
88
|
if not grouping_spark_columns:
|
|
87
89
|
# grouping__id can only be used with GROUP BY CUBE/ROLLUP/GROUPING SETS
|
|
88
|
-
|
|
90
|
+
exception = AnalysisException(
|
|
89
91
|
"[MISSING_GROUP_BY] grouping__id can only be used with GROUP BY (CUBE | ROLLUP | GROUPING SETS)"
|
|
90
92
|
)
|
|
93
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
94
|
+
raise exception
|
|
91
95
|
# Convert to GROUPING_ID() function call with the grouping columns
|
|
92
96
|
# Map Spark column names to Snowpark column names
|
|
93
97
|
snowpark_cols = []
|
|
@@ -99,9 +103,11 @@ def map_unresolved_attribute(
|
|
|
99
103
|
)
|
|
100
104
|
)
|
|
101
105
|
if not snowpark_name:
|
|
102
|
-
|
|
106
|
+
exception = AnalysisException(
|
|
103
107
|
f"[INTERNAL_ERROR] Cannot find Snowpark column mapping for grouping column '{spark_col_name}'"
|
|
104
108
|
)
|
|
109
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
110
|
+
raise exception
|
|
105
111
|
snowpark_cols.append(snowpark_fn.col(snowpark_name))
|
|
106
112
|
|
|
107
113
|
# Call GROUPING_ID with all grouping columns using Snowpark names
|
|
@@ -155,10 +161,12 @@ def map_unresolved_attribute(
|
|
|
155
161
|
|
|
156
162
|
if is_catalog:
|
|
157
163
|
# This looks like a catalog.database.column.field pattern
|
|
158
|
-
|
|
164
|
+
exception = AnalysisException(
|
|
159
165
|
f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{original_attr_name}` cannot be resolved. "
|
|
160
166
|
f"Cross-catalog column references are not supported in DataFrame API."
|
|
161
167
|
)
|
|
168
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
169
|
+
raise exception
|
|
162
170
|
|
|
163
171
|
attr_name = ".".join(name_parts)
|
|
164
172
|
|
|
@@ -205,18 +213,24 @@ def map_unresolved_attribute(
|
|
|
205
213
|
if compiled_regex.fullmatch(col_name):
|
|
206
214
|
matched_columns.append(col_name)
|
|
207
215
|
except re.error as e:
|
|
208
|
-
|
|
216
|
+
exception = AnalysisException(
|
|
217
|
+
f"Invalid regex pattern '{regex_pattern}': {e}"
|
|
218
|
+
)
|
|
219
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
220
|
+
raise exception
|
|
209
221
|
|
|
210
222
|
if not matched_columns:
|
|
211
223
|
# Keep the improved error message for SQL regex patterns
|
|
212
224
|
# This is only hit for SQL queries like SELECT `(e|f)` FROM table
|
|
213
225
|
# when spark.sql.parser.quotedRegexColumnNames is enabled
|
|
214
|
-
|
|
226
|
+
exception = AnalysisException(
|
|
215
227
|
f"No columns match the regex pattern '{regex_pattern}'. "
|
|
216
228
|
f"Snowflake SQL does not support SELECT statements with no columns. "
|
|
217
229
|
f"Please ensure your regex pattern matches at least one column. "
|
|
218
230
|
f"Available columns: {', '.join(available_columns[:10])}{'...' if len(available_columns) > 10 else ''}"
|
|
219
231
|
)
|
|
232
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
233
|
+
raise exception
|
|
220
234
|
|
|
221
235
|
# When multiple columns match, we need to signal that this should expand to multiple columns
|
|
222
236
|
# Since map_unresolved_attribute can only return one column, we'll use a special marker
|
|
@@ -233,7 +247,7 @@ def map_unresolved_attribute(
|
|
|
233
247
|
)
|
|
234
248
|
)
|
|
235
249
|
col = get_col(snowpark_name)
|
|
236
|
-
qualifiers = column_mapping.
|
|
250
|
+
qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
|
|
237
251
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
238
252
|
typed_col.set_qualifiers(qualifiers)
|
|
239
253
|
# Store matched columns info for later use
|
|
@@ -248,7 +262,7 @@ def map_unresolved_attribute(
|
|
|
248
262
|
)
|
|
249
263
|
)
|
|
250
264
|
col = get_col(snowpark_name)
|
|
251
|
-
qualifiers = column_mapping.
|
|
265
|
+
qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
|
|
252
266
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
253
267
|
typed_col.set_qualifiers(qualifiers)
|
|
254
268
|
return (matched_columns[0], typed_col)
|
|
@@ -266,7 +280,7 @@ def map_unresolved_attribute(
|
|
|
266
280
|
)
|
|
267
281
|
if snowpark_name is not None:
|
|
268
282
|
col = get_col(snowpark_name)
|
|
269
|
-
qualifiers = column_mapping.
|
|
283
|
+
qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_attr_name)
|
|
270
284
|
else:
|
|
271
285
|
# this means it has to be a struct column with a field name
|
|
272
286
|
snowpark_name: str | None = None
|
|
@@ -324,7 +338,7 @@ def map_unresolved_attribute(
|
|
|
324
338
|
)
|
|
325
339
|
if snowpark_name is not None:
|
|
326
340
|
col = get_col(snowpark_name)
|
|
327
|
-
qualifiers = column_mapping.
|
|
341
|
+
qualifiers = column_mapping.get_qualifiers_for_spark_column(
|
|
328
342
|
unqualified_name
|
|
329
343
|
)
|
|
330
344
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
@@ -346,16 +360,22 @@ def map_unresolved_attribute(
|
|
|
346
360
|
)
|
|
347
361
|
if outer_col_name:
|
|
348
362
|
# This is an outer scope column being referenced inside a lambda
|
|
349
|
-
|
|
363
|
+
exception = AnalysisException(
|
|
350
364
|
f"Reference to non-lambda variable '{attr_name}' within lambda function. "
|
|
351
365
|
f"Lambda functions can only access their own parameters. "
|
|
352
366
|
f"If '{attr_name}' is a table column, it must be passed as an explicit parameter to the enclosing function."
|
|
353
367
|
)
|
|
368
|
+
attach_custom_error_code(
|
|
369
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
370
|
+
)
|
|
371
|
+
raise exception
|
|
354
372
|
|
|
355
373
|
if has_plan_id:
|
|
356
|
-
|
|
374
|
+
exception = AnalysisException(
|
|
357
375
|
f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
|
|
358
376
|
)
|
|
377
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
378
|
+
raise exception
|
|
359
379
|
else:
|
|
360
380
|
# Column does not exist. Pass in dummy column name for lazy error throwing as it could be a built-in function
|
|
361
381
|
snowpark_name = attr_name
|
|
@@ -365,9 +385,11 @@ def map_unresolved_attribute(
|
|
|
365
385
|
col_type = typer.type(col)[0]
|
|
366
386
|
except SnowparkSQLException as e:
|
|
367
387
|
if e.raw_message is not None and "invalid identifier" in e.raw_message:
|
|
368
|
-
|
|
388
|
+
exception = AnalysisException(
|
|
369
389
|
f'[COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
|
|
370
390
|
)
|
|
391
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
392
|
+
raise exception
|
|
371
393
|
else:
|
|
372
394
|
raise
|
|
373
395
|
is_struct = isinstance(col_type, StructType)
|
|
@@ -383,7 +405,7 @@ def map_unresolved_attribute(
|
|
|
383
405
|
for field_name in path:
|
|
384
406
|
col = col.getItem(field_name)
|
|
385
407
|
|
|
386
|
-
qualifiers =
|
|
408
|
+
qualifiers = set()
|
|
387
409
|
|
|
388
410
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
389
411
|
typed_col.set_qualifiers(qualifiers)
|
|
@@ -416,7 +438,9 @@ def _match_path_to_struct(path: list[str], col_type: StructType) -> list[str]:
|
|
|
416
438
|
typ = typ.value_type if isinstance(typ, MapType) else typ.element_type
|
|
417
439
|
else:
|
|
418
440
|
# If the type is not a struct, map, or array, we cannot access the field.
|
|
419
|
-
|
|
441
|
+
exception = AnalysisException(
|
|
420
442
|
f"[INVALID_EXTRACT_BASE_FIELD_TYPE] Can't extract a value from \"{'.'.join(path[:i])}\". Need a complex type [STRUCT, ARRAY, MAP] but got \"{typ}\"."
|
|
421
443
|
)
|
|
444
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
445
|
+
raise exception
|
|
422
446
|
return adjusted_path
|