PyPI - snowflake-ml-python - Versions diffs - 1.16.0__py3-none-any.whl → 1.17.0__py3-none-any.whl - Mend

snowflake-ml-python 1.16.0py3-none-any.whl → 1.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

snowflake/ml/_internal/human_readable_id/adjectives.txt CHANGED Viewed

@@ -1,3 +1,4 @@
+aerial
 afraid
 ancient
 angry
@@ -26,7 +27,6 @@ dull
 empty
 evil
 fast
-fat
 fluffy
 foolish
 fresh
@@ -57,10 +57,10 @@ lovely
 lucky
 massive
 mean
+metallic
 mighty
 modern
 moody
-nasty
 neat
 nervous
 new
@@ -85,7 +85,6 @@ rotten
 rude
 selfish
 serious
-shaggy
 sharp
 short
 shy
@@ -96,14 +95,15 @@ slippery
 smart
 smooth
 soft
+solid
 sour
 spicy
 splendid
 spotty
+squishy
 stale
 strange
 strong
-stupid
 sweet
 swift
 tall
@@ -116,7 +116,6 @@ tidy
 tiny
 tough
 tricky
-ugly
 warm
 weak
 wet
@@ -124,5 +123,6 @@ wicked
 wise
 witty
 wonderful
+wooden
 yellow
 young

snowflake/ml/_internal/human_readable_id/animals.txt CHANGED Viewed

@@ -1,10 +1,9 @@
 anaconda
 ant
-ape
-baboon
 badger
 bat
 bear
+beetle
 bird
 bobcat
 bulldog
@@ -73,7 +72,6 @@ lobster
 mayfly
 mamba
 mole
-monkey
 moose
 moth
 mouse
@@ -114,6 +112,7 @@ swan
 termite
 tiger
 treefrog
+tuna
 turkey
 turtle
 vampirebat
@@ -126,3 +125,4 @@ worm
 yak
 yeti
 zebra
+zebrafish

snowflake/ml/jobs/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from snowflake.ml.jobs._interop.exception_utils import install_exception_display_hooks
 from snowflake.ml.jobs._utils.types import JOB_STATUS
 from snowflake.ml.jobs.decorators import remote
 from snowflake.ml.jobs.job import MLJob
@@ -10,6 +11,9 @@ from snowflake.ml.jobs.manager import (
     submit_from_stage,
 )
+# Initialize exception display hooks for remote job error handling
+install_exception_display_hooks()
 __all__ = [
     "remote",
     "submit_file",

snowflake/ml/jobs/_interop/__init__.py ADDED Viewed

File without changes

snowflake/ml/jobs/_interop/data_utils.py ADDED Viewed

@@ -0,0 +1,124 @@
+import io
+import json
+from typing import Any, Literal, Optional, Protocol, Union, cast, overload
+from snowflake import snowpark
+from snowflake.ml.jobs._interop import dto_schema
+class StageFileWriter(io.IOBase):
+    """
+    A context manager IOBase implementation that proxies writes to an internal BytesIO
+    and uploads to Snowflake stage on close.
+    """
+    def __init__(self, session: snowpark.Session, path: str) -> None:
+        self._session = session
+        self._path = path
+        self._buffer = io.BytesIO()
+        self._closed = False
+        self._exception_occurred = False
+    def write(self, data: Union[bytes, bytearray]) -> int:
+        """Write data to the internal buffer."""
+        if self._closed:
+            raise ValueError("I/O operation on closed file")
+        return self._buffer.write(data)
+    def close(self, write_contents: bool = True) -> None:
+        """Close the file and upload the buffer contents to the stage."""
+        if not self._closed:
+            # Only upload if buffer has content and no exception occurred
+            if write_contents and self._buffer.tell() > 0:
+                self._buffer.seek(0)
+                self._session.file.put_stream(self._buffer, self._path)
+            self._buffer.close()
+            self._closed = True
+    def __enter__(self) -> "StageFileWriter":
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        exception_occurred = exc_type is not None
+        self.close(write_contents=not exception_occurred)
+    @property
+    def closed(self) -> bool:
+        return self._closed
+    def writable(self) -> bool:
+        return not self._closed
+    def readable(self) -> bool:
+        return False
+    def seekable(self) -> bool:
+        return not self._closed
+def _is_stage_path(path: str) -> bool:
+    return path.startswith("@") or path.startswith("snow://")
+def open_stream(path: str, mode: str = "rb", session: Optional[snowpark.Session] = None) -> io.IOBase:
+    if _is_stage_path(path):
+        if session is None:
+            raise ValueError("Session is required when opening a stage path")
+        if "r" in mode:
+            stream: io.IOBase = session.file.get_stream(path)  # type: ignore[assignment]
+            return stream
+        elif "w" in mode:
+            return StageFileWriter(session, path)
+        else:
+            raise ValueError(f"Unsupported mode '{mode}' for stage path")
+    else:
+        result: io.IOBase = open(path, mode)  # type: ignore[assignment]
+        return result
+class DtoCodec(Protocol):
+    @overload
+    @staticmethod
+    def decode(stream: io.IOBase, as_dict: Literal[True]) -> dict[str, Any]:
+        ...
+    @overload
+    @staticmethod
+    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.ResultDTO:
+        ...
+    @staticmethod
+    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.ResultDTO, dict[str, Any]]:
+        pass
+    @staticmethod
+    def encode(dto: dto_schema.ResultDTO) -> bytes:
+        pass
+class JsonDtoCodec(DtoCodec):
+    @overload
+    @staticmethod
+    def decode(stream: io.IOBase, as_dict: Literal[True]) -> dict[str, Any]:
+        ...
+    @overload
+    @staticmethod
+    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.ResultDTO:
+        ...
+    @staticmethod
+    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.ResultDTO, dict[str, Any]]:
+        data = cast(dict[str, Any], json.load(stream))
+        if as_dict:
+            return data
+        return dto_schema.ResultDTO.model_validate(data)
+    @staticmethod
+    def encode(dto: dto_schema.ResultDTO) -> bytes:
+        # Temporarily extract the value to avoid accidentally applying model_dump() on it
+        result_value = dto.value
+        dto.value = None  # Clear value to avoid serializing it in the model_dump
+        result_dict = dto.model_dump()
+        result_dict["value"] = result_value  # Put back the value
+        return json.dumps(result_dict).encode("utf-8")

snowflake/ml/jobs/_interop/dto_schema.py ADDED Viewed

@@ -0,0 +1,95 @@
+from typing import Any, Optional, Union
+from pydantic import BaseModel, model_validator
+from typing_extensions import NotRequired, TypedDict
+class BinaryManifest(TypedDict):
+    """
+    Binary data manifest schema.
+    Contains one of: path, bytes, or base64 for the serialized data.
+    """
+    path: NotRequired[str]  # Path to file
+    bytes: NotRequired[bytes]  # In-line byte string (not supported with JSON codec)
+    base64: NotRequired[str]  # Base64 encoded string
+class ParquetManifest(TypedDict):
+    """Protocol manifest schema for parquet files."""
+    paths: list[str]  # File paths
+# Union type for all manifest types, including catch-all dict[str, Any] for backward compatibility
+PayloadManifest = Union[BinaryManifest, ParquetManifest, dict[str, Any]]
+class ProtocolInfo(BaseModel):
+    """
+    The protocol used to serialize the result and the manifest of the result.
+    """
+    name: str
+    version: Optional[str] = None
+    metadata: Optional[dict[str, str]] = None
+    manifest: Optional[PayloadManifest] = None
+    def __str__(self) -> str:
+        result = self.name
+        if self.version:
+            result += f"-{self.version}"
+        return result
+    def with_manifest(self, manifest: PayloadManifest) -> "ProtocolInfo":
+        """
+        Return a new ProtocolInfo object with the manifest.
+        """
+        return ProtocolInfo(
+            name=self.name,
+            version=self.version,
+            metadata=self.metadata,
+            manifest=manifest,
+        )
+class ResultMetadata(BaseModel):
+    """
+    The metadata of a result.
+    """
+    type: str
+    repr: str
+class ExceptionMetadata(ResultMetadata):
+    message: str
+    traceback: str
+class ResultDTO(BaseModel):
+    """
+    A JSON representation of an execution result.
+    Args:
+        success: Whether the execution was successful.
+        value: The value of the execution or the exception if the execution failed.
+        protocol: The protocol used to serialize the result.
+        metadata: The metadata of the result.
+    """
+    success: bool
+    value: Optional[Any] = None
+    protocol: Optional[ProtocolInfo] = None
+    metadata: Optional[Union[ResultMetadata, ExceptionMetadata]] = None
+    serialize_error: Optional[str] = None
+    @model_validator(mode="before")
+    @classmethod
+    def validate_fields(cls, data: Any) -> Any:
+        """Ensure at least one of value, protocol, or metadata keys is specified."""
+        if isinstance(data, dict):
+            required_fields = {"value", "protocol", "metadata"}
+            if not any(field in data for field in required_fields):
+                raise ValueError("At least one of 'value', 'protocol', or 'metadata' must be specified")
+        return data

snowflake/ml/jobs/{_utils/interop_utils.py → _interop/exception_utils.py} RENAMED Viewed

@@ -1,19 +1,12 @@
 import builtins
 import functools
 import importlib
-import json
-import os
-import pickle
 import re
 import sys
 import traceback
 from collections import namedtuple
-from dataclasses import dataclass
 from types import TracebackType
-from typing import Any, Callable, Optional, Union, cast
-from snowflake import snowpark
-from snowflake.snowpark import exceptions as sp_exceptions
+from typing import Any, Callable, Optional, cast
 _TRACEBACK_ENTRY_PATTERN = re.compile(
     r'File "(?P<filename>[^"]+)", line (?P<lineno>\d+), in (?P<name>[^\n]+)(?:\n(?!^\s*File)^\s*(?P<line>[^\n]+))?\n',
@@ -21,175 +14,46 @@ _TRACEBACK_ENTRY_PATTERN = re.compile(
 )
 _REMOTE_ERROR_ATTR_NAME = "_remote_error"
-RemoteError = namedtuple("RemoteError", ["exc_type", "exc_msg", "exc_tb"])
-@dataclass(frozen=True)
-class ExecutionResult:
-    result: Any = None
-    exception: Optional[BaseException] = None
-    @property
-    def success(self) -> bool:
-        return self.exception is None
-    def to_dict(self) -> dict[str, Any]:
-        """Return the serializable dictionary."""
-        if isinstance(self.exception, BaseException):
-            exc_type = type(self.exception)
-            return {
-                "success": False,
-                "exc_type": f"{exc_type.__module__}.{exc_type.__name__}",
-                "exc_value": self.exception,
-                "exc_tb": "".join(traceback.format_tb(self.exception.__traceback__)),
-            }
-        return {
-            "success": True,
-            "result_type": type(self.result).__qualname__,
-            "result": self.result,
-        }
-    @classmethod
-    def from_dict(cls, result_dict: dict[str, Any]) -> "ExecutionResult":
-        if not isinstance(result_dict.get("success"), bool):
-            raise ValueError("Invalid result dictionary")
-        if result_dict["success"]:
-            # Load successful result
-            return cls(result=result_dict.get("result"))
-        # Load exception
-        exc_type = result_dict.get("exc_type", "RuntimeError")
-        exc_value = result_dict.get("exc_value", "Unknown error")
-        exc_tb = result_dict.get("exc_tb", "")
-        return cls(exception=load_exception(exc_type, exc_value, exc_tb))
-def fetch_result(session: snowpark.Session, result_path: str) -> ExecutionResult:
-    """
-    Fetch the serialized result from the specified path.
+RemoteErrorInfo = namedtuple("RemoteErrorInfo", ["exc_type", "exc_msg", "exc_tb"])
-    Args:
-        session: Snowpark Session to use for file operations.
-        result_path: The path to the serialized result file.
-    Returns:
-        A dictionary containing the execution result if available, None otherwise.
+class RemoteError(RuntimeError):
+    """Base exception for errors from remote execution environment which could not be reconstructed locally."""
-    Raises:
-        RuntimeError: If both pickle and JSON result retrieval fail.
-    """
-    try:
-        # TODO: Check if file exists
-        with session.file.get_stream(result_path) as result_stream:
-            return ExecutionResult.from_dict(pickle.load(result_stream))
-    except (
-        sp_exceptions.SnowparkSQLException,
-        pickle.UnpicklingError,
-        TypeError,
-        ImportError,
-        AttributeError,
-        MemoryError,
-    ) as pickle_error:
-        # Fall back to JSON result if loading pickled result fails for any reason
-        try:
-            result_json_path = os.path.splitext(result_path)[0] + ".json"
-            with session.file.get_stream(result_json_path) as result_stream:
-                return ExecutionResult.from_dict(json.load(result_stream))
-        except Exception as json_error:
-            # Both pickle and JSON failed - provide helpful error message
-            raise RuntimeError(_fetch_result_error_message(pickle_error, result_path, json_error)) from pickle_error
-def _fetch_result_error_message(error: Exception, result_path: str, json_error: Optional[Exception] = None) -> str:
-    """Create helpful error messages for common result retrieval failures."""
-    # Package import issues
-    if isinstance(error, ImportError):
-        return f"Failed to retrieve job result: Package not installed in your local environment. Error: {str(error)}"
-    # Package versions differ between runtime and local environment
-    if isinstance(error, AttributeError):
-        return f"Failed to retrieve job result: Package version mismatch. Error: {str(error)}"
-    # Serialization issues
-    if isinstance(error, TypeError):
-        return f"Failed to retrieve job result: Non-serializable objects were returned. Error: {str(error)}"
-    # Python version pickling incompatibility
-    if isinstance(error, pickle.UnpicklingError) and "protocol" in str(error).lower():
-        # TODO: Update this once we support different Python versions
-        client_version = f"Python {sys.version_info.major}.{sys.version_info.minor}"
-        runtime_version = "Python 3.10"
-        return (
-            f"Failed to retrieve job result: Python version mismatch - job ran on {runtime_version}, "
-            f"local environment using Python {client_version}. Error: {str(error)}"
-        )
-    # File access issues
-    if isinstance(error, sp_exceptions.SnowparkSQLException):
-        if "not found" in str(error).lower() or "does not exist" in str(error).lower():
-            return (
-                f"Failed to retrieve job result: No result file found. Check job.get_logs() for execution "
-                f"errors. Error: {str(error)}"
-            )
-        else:
-            return f"Failed to retrieve job result: Cannot access result file. Error: {str(error)}"
-    if isinstance(error, MemoryError):
-        return f"Failed to retrieve job result: Result too large for memory. Error: {str(error)}"
-    # Generic fallback
-    base_message = f"Failed to retrieve job result: {str(error)}"
-    if json_error:
-        base_message += f" (JSON fallback also failed: {str(json_error)})"
-    return base_message
-def load_exception(exc_type_name: str, exc_value: Union[Exception, str], exc_tb: str) -> Exception:
-    """
-    Create an exception with a string-formatted traceback.
-    When this exception is raised and not caught, it will display the original traceback.
-    When caught, it behaves like a regular exception without showing the traceback.
-    Args:
-        exc_type_name: Name of the exception type (e.g., 'ValueError', 'RuntimeError')
-        exc_value: The deserialized exception value or exception string (i.e. message)
-        exc_tb: String representation of the traceback
+def build_exception(type_str: str, message: str, traceback: str, original_repr: Optional[str] = None) -> BaseException:
+    """Build an exception from metadata, attaching remote error info."""
+    if not original_repr:
+        original_repr = f"{type_str}('{message}')"
+    try:
+        ex = reconstruct_exception(type_str=type_str, message=message)
+    except Exception as e:
+        # Fallback to a generic error type if reconstruction fails
+        ex = RemoteError(original_repr)
+        ex.__cause__ = e
+    return attach_remote_error_info(ex, type_str, message, traceback)
-    Returns:
-        An exception object with the original traceback information
-    # noqa: DAR401
-    """
-    if isinstance(exc_value, Exception):
-        exception = exc_value
-    else:
-        # Try to load the original exception type if possible
-        try:
-            # First check built-in exceptions
-            exc_type = getattr(builtins, exc_type_name, None)
-            if exc_type is None and "." in exc_type_name:
-                # Try to import from module path if it's a qualified name
-                module_path, class_name = exc_type_name.rsplit(".", 1)
-                module = importlib.import_module(module_path)
-                exc_type = getattr(module, class_name)
-            if exc_type is None or not issubclass(exc_type, Exception):
-                raise TypeError(f"{exc_type_name} is not a known exception type")
-            # Create the exception instance
-            exception = exc_type(exc_value)
-        except (ImportError, AttributeError, TypeError):
-            # Fall back to a generic exception
-            exception = RuntimeError(
-                f"Exception deserialization failed, original exception: {exc_type_name}: {exc_value}"
-            )
+def reconstruct_exception(type_str: str, message: str) -> BaseException:
+    """Best effort reconstruction of an exception from metadata."""
+    try:
+        type_split = type_str.rsplit(".", 1)
+        if len(type_split) == 1:
+            module = builtins
+        else:
+            module = importlib.import_module(type_split[0])
+        exc_type = getattr(module, type_split[-1])
+    except (ImportError, AttributeError):
+        raise ModuleNotFoundError(
+            f"Unrecognized exception type '{type_str}', likely due to a missing or unavailable package"
+        ) from None
-    # Attach the traceback information to the exception
-    return _attach_remote_error_info(exception, exc_type_name, str(exc_value), exc_tb)
+    if not issubclass(exc_type, BaseException):
+        raise TypeError(f"Imported type {type_str} is not a known exception type, possibly due to a name conflict")
+    return cast(BaseException, exc_type(message))
-def _attach_remote_error_info(ex: Exception, exc_type: str, exc_msg: str, traceback_str: str) -> Exception:
+def attach_remote_error_info(ex: BaseException, exc_type: str, exc_msg: str, traceback_str: str) -> BaseException:
     """
     Attach a string-formatted traceback to an exception.
@@ -207,11 +71,11 @@ def _attach_remote_error_info(ex: Exception, exc_type: str, exc_msg: str, traceb
     """
     # Store the traceback information
     exc_type = exc_type.rsplit(".", 1)[-1]  # Remove module path
-    setattr(ex, _REMOTE_ERROR_ATTR_NAME, RemoteError(exc_type=exc_type, exc_msg=exc_msg, exc_tb=traceback_str))
+    setattr(ex, _REMOTE_ERROR_ATTR_NAME, RemoteErrorInfo(exc_type=exc_type, exc_msg=exc_msg, exc_tb=traceback_str))
     return ex
-def _retrieve_remote_error_info(ex: Optional[BaseException]) -> Optional[RemoteError]:
+def retrieve_remote_error_info(ex: Optional[BaseException]) -> Optional[RemoteErrorInfo]:
     """
     Retrieve the string-formatted traceback from an exception if it exists.
@@ -285,7 +149,7 @@ def _install_sys_excepthook() -> None:
     sys.excepthook is the global hook that Python calls when an unhandled exception occurs.
     By default it prints the exception type, message and traceback to stderr.
-    We override sys.excepthook to intercept exceptions that contain our special RemoteError
+    We override sys.excepthook to intercept exceptions that contain our special RemoteErrorInfo
     attribute. These exceptions come from deserialized remote execution results and contain
     the original traceback information from where they occurred.
@@ -327,7 +191,7 @@ def _install_sys_excepthook() -> None:
                     "\nDuring handling of the above exception, another exception occurred:\n", file=sys.stderr
                 )
-            if (remote_err := _retrieve_remote_error_info(exc_value)) and isinstance(remote_err, RemoteError):
+            if (remote_err := retrieve_remote_error_info(exc_value)) and isinstance(remote_err, RemoteErrorInfo):
                 # Display stored traceback for deserialized exceptions
                 print("Traceback (from remote execution):", file=sys.stderr)  # noqa: T201
                 print(remote_err.exc_tb, end="", file=sys.stderr)  # noqa: T201
@@ -408,7 +272,7 @@ def _install_ipython_hook() -> bool:
             tb_offset: Optional[int],
             **kwargs: Any,
         ) -> list[list[str]]:
-            if (remote_err := _retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteError):
+            if (remote_err := retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteErrorInfo):
                 # Implementation forked from IPython.core.ultratb.VerboseTB.format_exception_as_a_whole
                 head = self.prepare_header(remote_err.exc_type, long_version=False).replace(
                     "(most recent call last)",
@@ -448,7 +312,7 @@ def _install_ipython_hook() -> bool:
             tb_offset: Optional[int] = None,
             **kwargs: Any,
         ) -> list[str]:
-            if (remote_err := _retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteError):
+            if (remote_err := retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteErrorInfo):
                 tb_list = [
                     (m.group("filename"), m.group("lineno"), m.group("name"), m.group("line"))
                     for m in re.finditer(_TRACEBACK_ENTRY_PATTERN, remote_err.exc_tb or "")
@@ -493,9 +357,16 @@ def _uninstall_ipython_hook() -> None:
 def install_exception_display_hooks() -> None:
-    if not _install_ipython_hook():
-        _install_sys_excepthook()
+    """Install custom exception display hooks for improved remote error reporting.
+    This function should be called once during package initialization to set up
+    enhanced error handling for remote job execution errors. The hooks will:
-# ------ Install the custom traceback hooks by default ------ #
-install_exception_display_hooks()
+    - Display original remote tracebacks instead of local deserialization traces
+    - Work in both standard Python and IPython/Jupyter environments
+    - Safely fall back to original behavior if errors occur
+    Note: This function is idempotent and safe to call multiple times.
+    """
+    if not _install_ipython_hook():
+        _install_sys_excepthook()

snowflake-ml-python 1.16.0__py3-none-any.whl → 1.17.0__py3-none-any.whl

snowflake-ml-python 1.16.0py3-none-any.whl → 1.17.0py3-none-any.whl