snowpark-connect 0.21.0__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (41) hide show
  1. snowflake/snowpark_connect/config.py +19 -3
  2. snowflake/snowpark_connect/error/error_utils.py +25 -0
  3. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  4. snowflake/snowpark_connect/expression/map_unresolved_function.py +203 -128
  5. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  6. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  7. snowflake/snowpark_connect/relation/map_aggregate.py +102 -18
  8. snowflake/snowpark_connect/relation/map_column_ops.py +21 -2
  9. snowflake/snowpark_connect/relation/map_map_partitions.py +3 -1
  10. snowflake/snowpark_connect/relation/map_sql.py +18 -191
  11. snowflake/snowpark_connect/relation/map_udtf.py +4 -4
  12. snowflake/snowpark_connect/relation/read/map_read_json.py +12 -1
  13. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  14. snowflake/snowpark_connect/relation/write/map_write.py +68 -24
  15. snowflake/snowpark_connect/server.py +9 -0
  16. snowflake/snowpark_connect/type_mapping.py +4 -0
  17. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  18. snowflake/snowpark_connect/utils/session.py +0 -4
  19. snowflake/snowpark_connect/utils/telemetry.py +213 -61
  20. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  21. snowflake/snowpark_connect/version.py +1 -1
  22. snowflake/snowpark_decoder/__init__.py +0 -0
  23. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  24. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  25. snowflake/snowpark_decoder/dp_session.py +111 -0
  26. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  27. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +2 -2
  28. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +40 -29
  29. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  30. spark/__init__.py +0 -0
  31. spark/connect/__init__.py +0 -0
  32. spark/connect/envelope_pb2.py +31 -0
  33. spark/connect/envelope_pb2.pyi +46 -0
  34. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  35. {snowpark_connect-0.21.0.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  36. {snowpark_connect-0.21.0.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  37. {snowpark_connect-0.21.0.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  38. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  39. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE-binary +0 -0
  40. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
  41. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/NOTICE-binary +0 -0
@@ -168,6 +168,9 @@ class GlobalConfig:
168
168
  "snowpark.connect.udf.packages": lambda session, packages: session.add_packages(
169
169
  *packages.strip("[] ").split(",")
170
170
  ),
171
+ "snowpark.connect.udf.imports": lambda session, imports: parse_imports(
172
+ session, imports
173
+ ),
171
174
  }
172
175
 
173
176
  float_config_list = []
@@ -332,7 +335,7 @@ def route_config_proto(
332
335
  match op_type:
333
336
  case "set":
334
337
  logger.info("SET")
335
-
338
+ telemetry.report_config_set(config.operation.set.pairs)
336
339
  for pair in config.operation.set.pairs:
337
340
  # Check if the value field is present, not present when invalid fields are set in conf.
338
341
  if not pair.HasField("value"):
@@ -342,7 +345,6 @@ def route_config_proto(
342
345
  f"Cannot set config '{pair.key}' to None"
343
346
  )
344
347
 
345
- telemetry.report_config_set(pair.key, pair.value)
346
348
  set_config_param(
347
349
  config.session_id, pair.key, pair.value, snowpark_session
348
350
  )
@@ -350,14 +352,15 @@ def route_config_proto(
350
352
  return proto_base.ConfigResponse(session_id=config.session_id)
351
353
  case "unset":
352
354
  logger.info("UNSET")
355
+ telemetry.report_config_unset(config.operation.unset.keys)
353
356
  for key in config.operation.unset.keys:
354
- telemetry.report_config_unset(key)
355
357
  unset_config_param(config.session_id, key, snowpark_session)
356
358
 
357
359
  return proto_base.ConfigResponse(session_id=config.session_id)
358
360
  case "get":
359
361
  logger.info("GET")
360
362
  res = proto_base.ConfigResponse(session_id=config.session_id)
363
+ telemetry.report_config_get(config.operation.get.keys)
361
364
  for key in config.operation.get.keys:
362
365
  pair = res.pairs.add()
363
366
  pair.key = key
@@ -367,6 +370,9 @@ def route_config_proto(
367
370
  return res
368
371
  case "get_with_default":
369
372
  logger.info("GET_WITH_DEFAULT")
373
+ telemetry.report_config_get(
374
+ [pair.key for pair in config.operation.get_with_default.pairs]
375
+ )
370
376
  result_pairs = [
371
377
  proto_base.KeyValue(
372
378
  key=pair.key,
@@ -383,6 +389,7 @@ def route_config_proto(
383
389
  case "get_option":
384
390
  logger.info("GET_OPTION")
385
391
  res = proto_base.ConfigResponse(session_id=config.session_id)
392
+ telemetry.report_config_get(config.operation.get_option.keys)
386
393
  for key in config.operation.get_option.keys:
387
394
  pair = res.pairs.add()
388
395
  pair.key = key
@@ -411,6 +418,7 @@ def route_config_proto(
411
418
  case "is_modifiable":
412
419
  logger.info("IS_MODIFIABLE")
413
420
  res = proto_base.ConfigResponse(session_id=config.session_id)
421
+ telemetry.report_config_get(config.operation.is_modifiable.keys)
414
422
  for key in config.operation.is_modifiable.keys:
415
423
  pair = res.pairs.add()
416
424
  pair.key = key
@@ -588,3 +596,11 @@ def auto_uppercase_non_column_identifiers() -> bool:
588
596
  return session_config[
589
597
  "snowpark.connect.sql.identifiers.auto-uppercase"
590
598
  ].lower() in ("all", "all_except_columns")
599
+
600
+
601
+ def parse_imports(session: snowpark.Session, imports: str | None) -> None:
602
+ if not imports:
603
+ return
604
+
605
+ for udf_import in imports.strip("[] ").split(","):
606
+ session.add_import(udf_import)
@@ -28,7 +28,9 @@ from pyspark.errors.exceptions.base import (
28
28
  PySparkException,
29
29
  PythonException,
30
30
  SparkRuntimeException,
31
+ UnsupportedOperationException,
31
32
  )
33
+ from pyspark.errors.exceptions.connect import SparkConnectGrpcException
32
34
  from snowflake.core.exceptions import NotFoundError
33
35
 
34
36
  from snowflake.connector.errors import ProgrammingError
@@ -49,7 +51,9 @@ SPARK_PYTHON_TO_JAVA_EXCEPTION = {
49
51
  ArrayIndexOutOfBoundsException: "java.lang.ArrayIndexOutOfBoundsException",
50
52
  NumberFormatException: "java.lang.NumberFormatException",
51
53
  SparkRuntimeException: "org.apache.spark.SparkRuntimeException",
54
+ SparkConnectGrpcException: "pyspark.errors.exceptions.connect.SparkConnectGrpcException",
52
55
  PythonException: "org.apache.spark.api.python.PythonException",
56
+ UnsupportedOperationException: "java.lang.UnsupportedOperationException",
53
57
  }
54
58
 
55
59
  WINDOW_FUNCTION_ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {1005, 2303}
@@ -68,6 +72,9 @@ init_multi_args_exception_pattern = (
68
72
  terminate_multi_args_exception_pattern = (
69
73
  r"terminate\(\) missing \d+ required positional argument"
70
74
  )
75
+ snowpark_connect_exception_pattern = re.compile(
76
+ r"\[snowpark-connect-exception(?::(\w+))?\]\s*(.+?)'\s*is not recognized"
77
+ )
71
78
 
72
79
 
73
80
  def contains_udtf_select(sql_string):
@@ -100,6 +107,19 @@ def _get_converted_known_sql_or_custom_exception(
100
107
  return SparkRuntimeException(
101
108
  message="Unexpected value for start in function slice: SQL array indices start at 1."
102
109
  )
110
+ match = snowpark_connect_exception_pattern.search(
111
+ ex.message if hasattr(ex, "message") else str(ex)
112
+ )
113
+ if match:
114
+ class_name = match.group(1)
115
+ message = match.group(2)
116
+ exception_class = (
117
+ globals().get(class_name, SparkConnectGrpcException)
118
+ if class_name
119
+ else SparkConnectGrpcException
120
+ )
121
+ return exception_class(message=message)
122
+
103
123
  if "select with no columns" in msg and contains_udtf_select(query):
104
124
  # We try our best to detect if the SQL string contains a UDTF call and the output schema is empty.
105
125
  return PythonException(message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}")
@@ -131,6 +151,11 @@ def _get_converted_known_sql_or_custom_exception(
131
151
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the terminate method: {ex.message}"
132
152
  )
133
153
 
154
+ if "failed to split string, provided pattern:" in msg:
155
+ return IllegalArgumentException(
156
+ message=f"Failed to split string using provided pattern. {ex.message}"
157
+ )
158
+
134
159
  if "100357" in msg and "wrong tuple size for returned value" in msg:
135
160
  return PythonException(
136
161
  message=f"[UDTF_RETURN_SCHEMA_MISMATCH] The number of columns in the result does not match the specified schema. {ex.message}"
@@ -13,10 +13,7 @@ from snowflake.snowpark_connect.config import global_config
13
13
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
14
14
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
15
15
  from snowflake.snowpark_connect.typed_column import TypedColumn
16
- from snowflake.snowpark_connect.utils.session import (
17
- get_or_create_snowpark_session,
18
- get_python_udxf_import_files,
19
- )
16
+ from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
20
17
  from snowflake.snowpark_connect.utils.udf_helper import (
21
18
  SnowparkUDF,
22
19
  gen_input_types,
@@ -28,6 +25,9 @@ from snowflake.snowpark_connect.utils.udf_helper import (
28
25
  from snowflake.snowpark_connect.utils.udf_utils import (
29
26
  ProcessCommonInlineUserDefinedFunction,
30
27
  )
28
+ from snowflake.snowpark_connect.utils.udxf_import_utils import (
29
+ get_python_udxf_import_files,
30
+ )
31
31
 
32
32
 
33
33
  def process_udf_return_type(