snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +150 -25
- snowflake/snowpark_connect/config.py +54 -16
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +48 -4
- snowflake/snowpark_connect/expression/map_extension.py +25 -5
- snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
- snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
- snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +66 -4
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
- snowflake/snowpark_connect/relation/map_extension.py +28 -8
- snowflake/snowpark_connect/relation/map_join.py +21 -10
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +36 -9
- snowflake/snowpark_connect/relation/map_sql.py +91 -24
- snowflake/snowpark_connect/relation/map_stats.py +25 -6
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +24 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/utils.py +19 -2
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +146 -63
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +72 -19
- snowflake/snowpark_connect/type_mapping.py +54 -17
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +5 -2
- snowflake/snowpark_connect/utils/telemetry.py +81 -18
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +81 -78
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,8 @@ from snowflake.snowpark.types import (
|
|
|
13
13
|
YearMonthIntervalType,
|
|
14
14
|
)
|
|
15
15
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
16
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
17
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
16
18
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
17
19
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
18
20
|
from snowflake.snowpark_connect.utils.context import (
|
|
@@ -78,9 +80,11 @@ def map_extension(
|
|
|
78
80
|
elif value.HasField("unresolved_attribute"):
|
|
79
81
|
name = "__" + key + "__" + exp_name[0]
|
|
80
82
|
else:
|
|
81
|
-
|
|
83
|
+
exception = SnowparkConnectNotImplementedError(
|
|
82
84
|
"Named argument not supported yet for this input."
|
|
83
85
|
)
|
|
86
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
87
|
+
raise exception
|
|
84
88
|
return [name], typed_col
|
|
85
89
|
|
|
86
90
|
case "interval_literal":
|
|
@@ -152,9 +156,11 @@ def map_extension(
|
|
|
152
156
|
|
|
153
157
|
queries = df.queries["queries"]
|
|
154
158
|
if len(queries) != 1:
|
|
155
|
-
|
|
159
|
+
exception = SnowparkConnectNotImplementedError(
|
|
156
160
|
f"Unexpected number of queries: {len(queries)}"
|
|
157
161
|
)
|
|
162
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
163
|
+
raise exception
|
|
158
164
|
query = f"({queries[0]})"
|
|
159
165
|
|
|
160
166
|
match extension.subquery_expression.subquery_type:
|
|
@@ -168,7 +174,13 @@ def map_extension(
|
|
|
168
174
|
result_type = BooleanType()
|
|
169
175
|
case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_TABLE_ARG:
|
|
170
176
|
# TODO: Currently, map_sql.py handles this, so we never end up here.
|
|
171
|
-
|
|
177
|
+
exception = SnowparkConnectNotImplementedError(
|
|
178
|
+
"Unexpected table arg"
|
|
179
|
+
)
|
|
180
|
+
attach_custom_error_code(
|
|
181
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
182
|
+
)
|
|
183
|
+
raise exception
|
|
172
184
|
case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN:
|
|
173
185
|
cols = [
|
|
174
186
|
map_expression(e, column_mapping, typer)
|
|
@@ -184,14 +196,22 @@ def map_extension(
|
|
|
184
196
|
)
|
|
185
197
|
result_type = BooleanType()
|
|
186
198
|
case other:
|
|
187
|
-
|
|
199
|
+
exception = SnowparkConnectNotImplementedError(
|
|
188
200
|
f"Unexpected subquery type: {other}"
|
|
189
201
|
)
|
|
202
|
+
attach_custom_error_code(
|
|
203
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
204
|
+
)
|
|
205
|
+
raise exception
|
|
190
206
|
|
|
191
207
|
return [name], TypedColumn(result_exp, lambda: [result_type])
|
|
192
208
|
|
|
193
209
|
case other:
|
|
194
|
-
|
|
210
|
+
exception = SnowparkConnectNotImplementedError(
|
|
211
|
+
f"Unexpected extension {other}"
|
|
212
|
+
)
|
|
213
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
214
|
+
raise exception
|
|
195
215
|
|
|
196
216
|
|
|
197
217
|
def _format_year_month_interval(
|
|
@@ -18,8 +18,11 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quot
|
|
|
18
18
|
from snowflake.snowpark.types import DayTimeIntervalType, YearMonthIntervalType
|
|
19
19
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
20
20
|
from snowflake.snowpark_connect.config import global_config
|
|
21
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
22
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
21
23
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
22
24
|
from snowflake.snowpark_connect.utils.context import (
|
|
25
|
+
get_jpype_jclass_lock,
|
|
23
26
|
get_sql_named_arg,
|
|
24
27
|
get_sql_plan,
|
|
25
28
|
get_sql_pos_arg,
|
|
@@ -73,17 +76,20 @@ def sql_parser():
|
|
|
73
76
|
|
|
74
77
|
@cache
|
|
75
78
|
def _get_sql_parser():
|
|
76
|
-
|
|
79
|
+
with get_jpype_jclass_lock():
|
|
80
|
+
return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
|
|
77
81
|
|
|
78
82
|
|
|
79
83
|
@cache
|
|
80
84
|
def _get_sql_conf():
|
|
81
|
-
|
|
85
|
+
with get_jpype_jclass_lock():
|
|
86
|
+
return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
|
|
82
87
|
|
|
83
88
|
|
|
84
89
|
@cache
|
|
85
90
|
def _as_java_list():
|
|
86
|
-
|
|
91
|
+
with get_jpype_jclass_lock():
|
|
92
|
+
return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
|
|
87
93
|
|
|
88
94
|
|
|
89
95
|
def as_java_list(obj):
|
|
@@ -92,7 +98,8 @@ def as_java_list(obj):
|
|
|
92
98
|
|
|
93
99
|
@cache
|
|
94
100
|
def _as_java_map():
|
|
95
|
-
|
|
101
|
+
with get_jpype_jclass_lock():
|
|
102
|
+
return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
|
|
96
103
|
|
|
97
104
|
|
|
98
105
|
def as_java_map(obj):
|
|
@@ -256,7 +263,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
256
263
|
func_name = as_java_list(exp.children())[0].nodeName()
|
|
257
264
|
args = [
|
|
258
265
|
map_logical_plan_expression(e)
|
|
259
|
-
for e in as_java_list(as_java_list(exp.children())[0].children())
|
|
266
|
+
for e in list(as_java_list(as_java_list(exp.children())[0].children()))
|
|
260
267
|
]
|
|
261
268
|
proto = apply_filter_clause(func_name, args, exp)
|
|
262
269
|
case "Alias":
|
|
@@ -275,7 +282,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
275
282
|
function_name="when",
|
|
276
283
|
arguments=[
|
|
277
284
|
map_logical_plan_expression(e)
|
|
278
|
-
for e in as_java_list(exp.children())
|
|
285
|
+
for e in list(as_java_list(exp.children()))
|
|
279
286
|
],
|
|
280
287
|
)
|
|
281
288
|
)
|
|
@@ -289,7 +296,8 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
289
296
|
)
|
|
290
297
|
case "Coalesce":
|
|
291
298
|
arguments = [
|
|
292
|
-
map_logical_plan_expression(e)
|
|
299
|
+
map_logical_plan_expression(e)
|
|
300
|
+
for e in list(as_java_list(exp.children()))
|
|
293
301
|
]
|
|
294
302
|
|
|
295
303
|
proto = expressions_proto.Expression(
|
|
@@ -357,7 +365,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
357
365
|
subquery_type=snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN,
|
|
358
366
|
in_subquery_values=[
|
|
359
367
|
map_logical_plan_expression(value)
|
|
360
|
-
for value in as_java_list(exp.values())
|
|
368
|
+
for value in list(as_java_list(exp.values()))
|
|
361
369
|
],
|
|
362
370
|
)
|
|
363
371
|
)
|
|
@@ -366,7 +374,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
366
374
|
case "LambdaFunction":
|
|
367
375
|
arguments = [
|
|
368
376
|
map_logical_plan_expression(arg).unresolved_named_lambda_variable
|
|
369
|
-
for arg in as_java_list(exp.arguments())
|
|
377
|
+
for arg in list(as_java_list(exp.arguments()))
|
|
370
378
|
]
|
|
371
379
|
proto = expressions_proto.Expression(
|
|
372
380
|
lambda_function=expressions_proto.Expression.LambdaFunction(
|
|
@@ -380,14 +388,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
380
388
|
function_name=class_name.lower(),
|
|
381
389
|
arguments=[
|
|
382
390
|
map_logical_plan_expression(e)
|
|
383
|
-
for e in as_java_list(exp.children())
|
|
391
|
+
for e in list(as_java_list(exp.children()))
|
|
384
392
|
],
|
|
385
393
|
)
|
|
386
394
|
)
|
|
387
395
|
case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
|
|
388
|
-
patterns = as_java_list(exp.patterns())
|
|
396
|
+
patterns = list(as_java_list(exp.patterns()))
|
|
389
397
|
arguments = [
|
|
390
|
-
map_logical_plan_expression(e)
|
|
398
|
+
map_logical_plan_expression(e)
|
|
399
|
+
for e in list(as_java_list(exp.children()))
|
|
391
400
|
]
|
|
392
401
|
arguments += [map_logical_plan_expression(e) for e in patterns]
|
|
393
402
|
proto = expressions_proto.Expression(
|
|
@@ -421,19 +430,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
421
430
|
end_field = _YEAR_MONTH_FIELD_MAP.get(end_field_name)
|
|
422
431
|
|
|
423
432
|
if start_field is None:
|
|
424
|
-
|
|
433
|
+
exception = AnalysisException(
|
|
425
434
|
f"Invalid year-month interval start field: '{start_field_name}'. Expected 'year' or 'month'."
|
|
426
435
|
)
|
|
436
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
437
|
+
raise exception
|
|
427
438
|
if end_field is None:
|
|
428
|
-
|
|
439
|
+
exception = AnalysisException(
|
|
429
440
|
f"Invalid year-month interval end field: '{end_field_name}'. Expected 'year' or 'month'."
|
|
430
441
|
)
|
|
442
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
443
|
+
raise exception
|
|
431
444
|
|
|
432
445
|
# Validate field ordering (start_field should be <= end_field)
|
|
433
446
|
if start_field > end_field:
|
|
434
|
-
|
|
447
|
+
exception = AnalysisException(
|
|
435
448
|
f"Invalid year-month interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
|
|
436
449
|
)
|
|
450
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
451
|
+
raise exception
|
|
437
452
|
|
|
438
453
|
# Use extension for year-month intervals to preserve start/end field info
|
|
439
454
|
literal = expressions_proto.Expression.Literal(
|
|
@@ -466,19 +481,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
466
481
|
end_field = _DAY_TIME_FIELD_MAP.get(end_field_name)
|
|
467
482
|
|
|
468
483
|
if start_field is None:
|
|
469
|
-
|
|
484
|
+
exception = AnalysisException(
|
|
470
485
|
f"Invalid day-time interval start field: '{start_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
|
|
471
486
|
)
|
|
487
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
488
|
+
raise exception
|
|
472
489
|
if end_field is None:
|
|
473
|
-
|
|
490
|
+
exception = AnalysisException(
|
|
474
491
|
f"Invalid day-time interval end field: '{end_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
|
|
475
492
|
)
|
|
493
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
494
|
+
raise exception
|
|
476
495
|
|
|
477
496
|
# Validate field ordering (start_field should be <= end_field)
|
|
478
497
|
if start_field > end_field:
|
|
479
|
-
|
|
498
|
+
exception = AnalysisException(
|
|
480
499
|
f"Invalid day-time interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
|
|
481
500
|
)
|
|
501
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
502
|
+
raise exception
|
|
482
503
|
|
|
483
504
|
# Use extension for day-time intervals to preserve start/end field info
|
|
484
505
|
literal = expressions_proto.Expression.Literal(
|
|
@@ -534,19 +555,27 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
534
555
|
name = str(exp.name())
|
|
535
556
|
value = get_sql_named_arg(name)
|
|
536
557
|
if not value.HasField("literal_type"):
|
|
537
|
-
|
|
558
|
+
exception = AnalysisException(f"Found an unbound parameter {name!r}")
|
|
559
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
|
|
560
|
+
raise exception
|
|
538
561
|
proto = expressions_proto.Expression(literal=value)
|
|
539
562
|
case "NamePlaceholder$":
|
|
540
563
|
# This is a placeholder for an expression name to be resolved later.
|
|
541
|
-
|
|
564
|
+
exception = SnowparkConnectNotImplementedError(
|
|
542
565
|
"NamePlaceholder is not supported in SQL expressions."
|
|
543
566
|
)
|
|
567
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
568
|
+
raise exception
|
|
544
569
|
case "PosParameter":
|
|
545
570
|
pos = exp.pos()
|
|
546
571
|
try:
|
|
547
572
|
value = get_sql_pos_arg(pos)
|
|
548
573
|
except KeyError:
|
|
549
|
-
|
|
574
|
+
exception = AnalysisException(
|
|
575
|
+
f"Found an unbound parameter at position {pos}"
|
|
576
|
+
)
|
|
577
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
|
|
578
|
+
raise exception
|
|
550
579
|
proto = expressions_proto.Expression(literal=value)
|
|
551
580
|
case "ScalarSubquery":
|
|
552
581
|
rel_proto = map_logical_plan_relation(exp.plan())
|
|
@@ -616,7 +645,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
616
645
|
]
|
|
617
646
|
+ [
|
|
618
647
|
map_logical_plan_expression(e)
|
|
619
|
-
for e in as_java_list(exp.children())
|
|
648
|
+
for e in list(as_java_list(exp.children()))
|
|
620
649
|
],
|
|
621
650
|
)
|
|
622
651
|
)
|
|
@@ -659,18 +688,20 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
659
688
|
)
|
|
660
689
|
case "UnresolvedFunction":
|
|
661
690
|
func_name = ".".join(
|
|
662
|
-
str(part) for part in as_java_list(exp.nameParts())
|
|
691
|
+
str(part) for part in list(as_java_list(exp.nameParts()))
|
|
663
692
|
).lower()
|
|
664
693
|
args = [
|
|
665
694
|
map_logical_plan_expression(arg)
|
|
666
|
-
for arg in as_java_list(exp.arguments())
|
|
695
|
+
for arg in list(as_java_list(exp.arguments()))
|
|
667
696
|
]
|
|
668
697
|
|
|
669
698
|
proto = apply_filter_clause(func_name, args, exp, exp.isDistinct())
|
|
670
699
|
case "UnresolvedNamedLambdaVariable":
|
|
671
700
|
proto = expressions_proto.Expression(
|
|
672
701
|
unresolved_named_lambda_variable=expressions_proto.Expression.UnresolvedNamedLambdaVariable(
|
|
673
|
-
name_parts=[
|
|
702
|
+
name_parts=[
|
|
703
|
+
str(part) for part in list(as_java_list(exp.nameParts()))
|
|
704
|
+
],
|
|
674
705
|
)
|
|
675
706
|
)
|
|
676
707
|
case "UnresolvedStar":
|
|
@@ -691,9 +722,11 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
691
722
|
# Build Window expression
|
|
692
723
|
proto = get_window_expression_proto(window_spec, exp.child())
|
|
693
724
|
else:
|
|
694
|
-
|
|
725
|
+
exception = AnalysisException(
|
|
695
726
|
f"Window specification not found {window_spec_reference!r}"
|
|
696
727
|
)
|
|
728
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
729
|
+
raise exception
|
|
697
730
|
case "UTF8String":
|
|
698
731
|
proto = expressions_proto.Expression(
|
|
699
732
|
literal=expressions_proto.Expression.Literal(
|
|
@@ -723,13 +756,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
723
756
|
function_name=proto_func,
|
|
724
757
|
arguments=[
|
|
725
758
|
map_logical_plan_expression(arg)
|
|
726
|
-
for arg in as_java_list(exp.children())
|
|
759
|
+
for arg in list(as_java_list(exp.children()))
|
|
727
760
|
],
|
|
728
761
|
)
|
|
729
762
|
)
|
|
730
763
|
|
|
731
764
|
case other:
|
|
732
|
-
|
|
765
|
+
exception = SnowparkConnectNotImplementedError(f"Not implemented: {other}")
|
|
766
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
767
|
+
raise exception
|
|
733
768
|
|
|
734
769
|
return proto
|
|
735
770
|
|
|
@@ -752,11 +787,11 @@ def get_window_expression_proto(
|
|
|
752
787
|
window_function=map_logical_plan_expression(window_function),
|
|
753
788
|
partition_spec=[
|
|
754
789
|
map_logical_plan_expression(e)
|
|
755
|
-
for e in as_java_list(window_spec.partitionSpec())
|
|
790
|
+
for e in list(as_java_list(window_spec.partitionSpec()))
|
|
756
791
|
],
|
|
757
792
|
order_spec=[
|
|
758
793
|
map_logical_plan_expression(e).sort_order
|
|
759
|
-
for e in as_java_list(window_spec.orderSpec())
|
|
794
|
+
for e in list(as_java_list(window_spec.orderSpec()))
|
|
760
795
|
],
|
|
761
796
|
frame_spec=frame_spec_proto,
|
|
762
797
|
)
|
|
@@ -10,6 +10,8 @@ from snowflake import snowpark
|
|
|
10
10
|
from snowflake.snowpark.types import MapType, StructType, VariantType
|
|
11
11
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
12
12
|
from snowflake.snowpark_connect.config import global_config
|
|
13
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
14
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
13
15
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
14
16
|
from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
|
|
15
17
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
@@ -54,7 +56,11 @@ def cache_external_udf_wrapper(from_register_udf: bool):
|
|
|
54
56
|
case "python_udf":
|
|
55
57
|
pass
|
|
56
58
|
case _:
|
|
57
|
-
|
|
59
|
+
exception = ValueError(f"Unsupported UDF type: {function_type}")
|
|
60
|
+
attach_custom_error_code(
|
|
61
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
62
|
+
)
|
|
63
|
+
raise exception
|
|
58
64
|
|
|
59
65
|
return cached_udf
|
|
60
66
|
|
|
@@ -97,9 +103,11 @@ def register_udf(
|
|
|
97
103
|
case "scalar_scala_udf":
|
|
98
104
|
output_type = udf_proto.scalar_scala_udf.outputType
|
|
99
105
|
case _:
|
|
100
|
-
|
|
106
|
+
exception = ValueError(
|
|
101
107
|
f"Unsupported UDF type: {udf_proto.WhichOneof('function')}"
|
|
102
108
|
)
|
|
109
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
110
|
+
raise exception
|
|
103
111
|
processed_return_type, original_return_type = process_udf_return_type(output_type)
|
|
104
112
|
session = get_or_create_snowpark_session()
|
|
105
113
|
kwargs = {
|
|
@@ -15,6 +15,8 @@ from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
|
15
15
|
from snowflake.snowpark.types import ArrayType, LongType, MapType, StructType
|
|
16
16
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
17
17
|
from snowflake.snowpark_connect.config import global_config
|
|
18
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
19
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
18
20
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
19
21
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
20
22
|
from snowflake.snowpark_connect.utils.context import (
|
|
@@ -85,9 +87,11 @@ def map_unresolved_attribute(
|
|
|
85
87
|
grouping_spark_columns = get_current_grouping_columns()
|
|
86
88
|
if not grouping_spark_columns:
|
|
87
89
|
# grouping__id can only be used with GROUP BY CUBE/ROLLUP/GROUPING SETS
|
|
88
|
-
|
|
90
|
+
exception = AnalysisException(
|
|
89
91
|
"[MISSING_GROUP_BY] grouping__id can only be used with GROUP BY (CUBE | ROLLUP | GROUPING SETS)"
|
|
90
92
|
)
|
|
93
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
94
|
+
raise exception
|
|
91
95
|
# Convert to GROUPING_ID() function call with the grouping columns
|
|
92
96
|
# Map Spark column names to Snowpark column names
|
|
93
97
|
snowpark_cols = []
|
|
@@ -99,9 +103,11 @@ def map_unresolved_attribute(
|
|
|
99
103
|
)
|
|
100
104
|
)
|
|
101
105
|
if not snowpark_name:
|
|
102
|
-
|
|
106
|
+
exception = AnalysisException(
|
|
103
107
|
f"[INTERNAL_ERROR] Cannot find Snowpark column mapping for grouping column '{spark_col_name}'"
|
|
104
108
|
)
|
|
109
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
110
|
+
raise exception
|
|
105
111
|
snowpark_cols.append(snowpark_fn.col(snowpark_name))
|
|
106
112
|
|
|
107
113
|
# Call GROUPING_ID with all grouping columns using Snowpark names
|
|
@@ -155,10 +161,12 @@ def map_unresolved_attribute(
|
|
|
155
161
|
|
|
156
162
|
if is_catalog:
|
|
157
163
|
# This looks like a catalog.database.column.field pattern
|
|
158
|
-
|
|
164
|
+
exception = AnalysisException(
|
|
159
165
|
f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{original_attr_name}` cannot be resolved. "
|
|
160
166
|
f"Cross-catalog column references are not supported in DataFrame API."
|
|
161
167
|
)
|
|
168
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
169
|
+
raise exception
|
|
162
170
|
|
|
163
171
|
attr_name = ".".join(name_parts)
|
|
164
172
|
|
|
@@ -205,18 +213,24 @@ def map_unresolved_attribute(
|
|
|
205
213
|
if compiled_regex.fullmatch(col_name):
|
|
206
214
|
matched_columns.append(col_name)
|
|
207
215
|
except re.error as e:
|
|
208
|
-
|
|
216
|
+
exception = AnalysisException(
|
|
217
|
+
f"Invalid regex pattern '{regex_pattern}': {e}"
|
|
218
|
+
)
|
|
219
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
220
|
+
raise exception
|
|
209
221
|
|
|
210
222
|
if not matched_columns:
|
|
211
223
|
# Keep the improved error message for SQL regex patterns
|
|
212
224
|
# This is only hit for SQL queries like SELECT `(e|f)` FROM table
|
|
213
225
|
# when spark.sql.parser.quotedRegexColumnNames is enabled
|
|
214
|
-
|
|
226
|
+
exception = AnalysisException(
|
|
215
227
|
f"No columns match the regex pattern '{regex_pattern}'. "
|
|
216
228
|
f"Snowflake SQL does not support SELECT statements with no columns. "
|
|
217
229
|
f"Please ensure your regex pattern matches at least one column. "
|
|
218
230
|
f"Available columns: {', '.join(available_columns[:10])}{'...' if len(available_columns) > 10 else ''}"
|
|
219
231
|
)
|
|
232
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
233
|
+
raise exception
|
|
220
234
|
|
|
221
235
|
# When multiple columns match, we need to signal that this should expand to multiple columns
|
|
222
236
|
# Since map_unresolved_attribute can only return one column, we'll use a special marker
|
|
@@ -346,16 +360,22 @@ def map_unresolved_attribute(
|
|
|
346
360
|
)
|
|
347
361
|
if outer_col_name:
|
|
348
362
|
# This is an outer scope column being referenced inside a lambda
|
|
349
|
-
|
|
363
|
+
exception = AnalysisException(
|
|
350
364
|
f"Reference to non-lambda variable '{attr_name}' within lambda function. "
|
|
351
365
|
f"Lambda functions can only access their own parameters. "
|
|
352
366
|
f"If '{attr_name}' is a table column, it must be passed as an explicit parameter to the enclosing function."
|
|
353
367
|
)
|
|
368
|
+
attach_custom_error_code(
|
|
369
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
370
|
+
)
|
|
371
|
+
raise exception
|
|
354
372
|
|
|
355
373
|
if has_plan_id:
|
|
356
|
-
|
|
374
|
+
exception = AnalysisException(
|
|
357
375
|
f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
|
|
358
376
|
)
|
|
377
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
378
|
+
raise exception
|
|
359
379
|
else:
|
|
360
380
|
# Column does not exist. Pass in dummy column name for lazy error throwing as it could be a built-in function
|
|
361
381
|
snowpark_name = attr_name
|
|
@@ -365,9 +385,11 @@ def map_unresolved_attribute(
|
|
|
365
385
|
col_type = typer.type(col)[0]
|
|
366
386
|
except SnowparkSQLException as e:
|
|
367
387
|
if e.raw_message is not None and "invalid identifier" in e.raw_message:
|
|
368
|
-
|
|
388
|
+
exception = AnalysisException(
|
|
369
389
|
f'[COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
|
|
370
390
|
)
|
|
391
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
392
|
+
raise exception
|
|
371
393
|
else:
|
|
372
394
|
raise
|
|
373
395
|
is_struct = isinstance(col_type, StructType)
|
|
@@ -416,7 +438,9 @@ def _match_path_to_struct(path: list[str], col_type: StructType) -> list[str]:
|
|
|
416
438
|
typ = typ.value_type if isinstance(typ, MapType) else typ.element_type
|
|
417
439
|
else:
|
|
418
440
|
# If the type is not a struct, map, or array, we cannot access the field.
|
|
419
|
-
|
|
441
|
+
exception = AnalysisException(
|
|
420
442
|
f"[INVALID_EXTRACT_BASE_FIELD_TYPE] Can't extract a value from \"{'.'.join(path[:i])}\". Need a complex type [STRUCT, ARRAY, MAP] but got \"{typ}\"."
|
|
421
443
|
)
|
|
444
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
445
|
+
raise exception
|
|
422
446
|
return adjusted_path
|