PyPI - snowflake-ml-python - Versions diffs - 1.24.0__py3-none-any.whl → 1.25.0__py3-none-any.whl - Mend

snowflake-ml-python 1.24.0py3-none-any.whl → 1.25.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

snowflake/ml/_internal/utils/mixins.py +26 -1
snowflake/ml/data/_internal/arrow_ingestor.py +5 -1
snowflake/ml/data/data_connector.py +2 -2
snowflake/ml/data/data_ingestor.py +2 -1
snowflake/ml/experiment/_experiment_info.py +3 -3
snowflake/ml/jobs/_interop/data_utils.py +8 -8
snowflake/ml/jobs/_interop/dto_schema.py +52 -7
snowflake/ml/jobs/_interop/protocols.py +124 -7
snowflake/ml/jobs/_interop/utils.py +92 -33
snowflake/ml/jobs/_utils/arg_protocol.py +7 -0
snowflake/ml/jobs/_utils/constants.py +4 -0
snowflake/ml/jobs/_utils/feature_flags.py +97 -13
snowflake/ml/jobs/_utils/payload_utils.py +6 -40
snowflake/ml/jobs/_utils/runtime_env_utils.py +12 -111
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +204 -27
snowflake/ml/jobs/decorators.py +17 -22
snowflake/ml/jobs/job.py +25 -10
snowflake/ml/jobs/job_definition.py +100 -8
snowflake/ml/model/_client/model/model_version_impl.py +25 -14
snowflake/ml/model/_client/ops/service_ops.py +6 -6
snowflake/ml/model/_client/service/model_deployment_spec.py +3 -0
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
snowflake/ml/model/models/huggingface_pipeline.py +3 -0
snowflake/ml/model/openai_signatures.py +154 -0
snowflake/ml/registry/_manager/model_parameter_reconciler.py +2 -3
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/METADATA +41 -2
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/RECORD +31 -32
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/WHEEL +1 -1
snowflake/ml/jobs/_utils/function_payload_utils.py +0 -43
snowflake/ml/jobs/_utils/spec_utils.py +0 -22
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/top_level.txt +0 -0

snowflake/ml/_internal/utils/mixins.py CHANGED Viewed

@@ -9,6 +9,7 @@ _SESSION_ACCOUNT_KEY = "session$account"
 _SESSION_ROLE_KEY = "session$role"
 _SESSION_DATABASE_KEY = "session$database"
 _SESSION_SCHEMA_KEY = "session$schema"
+_SESSION_STATE_ATTR = "_session_state"
 def _identifiers_match(saved: Optional[str], current: Optional[str]) -> bool:
@@ -61,7 +62,7 @@ class SerializableSessionMixin:
         else:
             self.__dict__.update(state)
-        self._set_session(session_state)
+        setattr(self, _SESSION_STATE_ATTR, session_state)
     def _set_session(self, session_state: _SessionState) -> None:
@@ -86,3 +87,27 @@ class SerializableSessionMixin:
                     ),
                 ),
             )
+    @property
+    def session(self) -> Optional[snowpark_session.Session]:
+        if _SESSION_KEY not in self.__dict__:
+            session_state = getattr(self, _SESSION_STATE_ATTR, None)
+            if session_state is not None:
+                self._set_session(session_state)
+        return self.__dict__.get(_SESSION_KEY)
+    @session.setter
+    def session(self, value: Optional[snowpark_session.Session]) -> None:
+        self.__dict__[_SESSION_KEY] = value
+    # _getattr__ is only called when an attribute is NOT found through normal lookup.
+    # 1. Data descriptors (like @property with setter) from the class hierarchy
+    # 2. Instance __dict__ (e.g., self.x = 10)
+    # 3. Non-data descriptors (methods, `@property without setter) from the class hierarchy
+    # __getattr__ — only called if steps 1-3 all fail
+    def __getattr__(self, name: str) -> Any:
+        if name == _SESSION_KEY:
+            return self.session
+        if hasattr(super(), "__getattr__"):
+            return super().__getattr__(name)  # type: ignore[misc]
+        raise AttributeError(f"{type(self).__name__!s} object has no attribute {name!r}")

snowflake/ml/data/_internal/arrow_ingestor.py CHANGED Viewed

@@ -73,15 +73,19 @@ class ArrowIngestor(data_ingestor.DataIngestor, mixins.SerializableSessionMixin)
         self._schema: Optional[pa.Schema] = None
     @classmethod
-    def from_sources(cls, session: snowpark.Session, sources: Sequence[data_source.DataSource]) -> "ArrowIngestor":
+    def from_sources(
+        cls, session: snowpark.Session, sources: Sequence[data_source.DataSource], **kwargs: Any
+    ) -> "ArrowIngestor":
         if session is None:
             raise ValueError("Session is required")
+        # Skipping kwargs until needed to avoid impact other workflows.
         return cls(session, sources)
     @classmethod
     def from_ray_dataset(
         cls,
         ray_ds: "ray.data.Dataset",
+        **kwargs: Any,
     ) -> "ArrowIngestor":
         raise NotImplementedError

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -94,7 +94,7 @@ class DataConnector:
         **kwargs: Any,
     ) -> DataConnectorType:
         ingestor_class = ingestor_class or cls.DEFAULT_INGESTOR_CLASS
-        ray_ingestor = ingestor_class.from_ray_dataset(ray_ds=ray_ds)
+        ray_ingestor = ingestor_class.from_ray_dataset(ray_ds=ray_ds, **kwargs)
         return cls(ray_ingestor, **kwargs)
     @classmethod
@@ -111,7 +111,7 @@ class DataConnector:
         **kwargs: Any,
     ) -> DataConnectorType:
         ingestor_class = ingestor_class or cls.DEFAULT_INGESTOR_CLASS
-        ingestor = ingestor_class.from_sources(session, sources)
+        ingestor = ingestor_class.from_sources(session, sources, **kwargs)
         return cls(ingestor, **kwargs)
     @property

snowflake/ml/data/data_ingestor.py CHANGED Viewed

@@ -16,7 +16,7 @@ DataIngestorType = TypeVar("DataIngestorType", bound="DataIngestor")
 class DataIngestor(Protocol):
     @classmethod
     def from_sources(
-        cls: type[DataIngestorType], session: snowpark.Session, sources: Sequence[data_source.DataSource]
+        cls: type[DataIngestorType], session: snowpark.Session, sources: Sequence[data_source.DataSource], **kwargs: Any
     ) -> DataIngestorType:
         raise NotImplementedError
@@ -24,6 +24,7 @@ class DataIngestor(Protocol):
     def from_ray_dataset(
         cls: type[DataIngestorType],
         ray_ds: "ray.data.Dataset",
+        **kwargs: Any,
     ) -> DataIngestorType:
         raise NotImplementedError

snowflake/ml/experiment/_experiment_info.py CHANGED Viewed

@@ -3,7 +3,7 @@ import functools
 import types
 from typing import Callable, Optional
-from snowflake.ml import model
+from snowflake.ml.model._client.model import model_version_impl
 from snowflake.ml.registry._manager import model_manager
@@ -23,7 +23,7 @@ class ExperimentInfoPatcher:
     """
     # Store original method at class definition time to avoid recursive patching
-    _original_log_model: Callable[..., model.ModelVersion] = model_manager.ModelManager.log_model
+    _original_log_model: Callable[..., model_version_impl.ModelVersion] = model_manager.ModelManager.log_model
     # Stack of active experiment_info contexts for nested experiment support
     _experiment_info_stack: list[ExperimentInfo] = []
@@ -36,7 +36,7 @@ class ExperimentInfoPatcher:
         if not ExperimentInfoPatcher._experiment_info_stack:
             @functools.wraps(ExperimentInfoPatcher._original_log_model)
-            def patched(*args, **kwargs) -> model.ModelVersion:  # type: ignore[no-untyped-def]
+            def patched(*args, **kwargs) -> model_version_impl.ModelVersion:  # type: ignore[no-untyped-def]
                 # Use the most recent (top of stack) experiment_info for nested contexts
                 current_experiment_info = ExperimentInfoPatcher._experiment_info_stack[-1]
                 return ExperimentInfoPatcher._original_log_model(

snowflake/ml/jobs/_interop/data_utils.py CHANGED Viewed

@@ -31,7 +31,7 @@ class StageFileWriter(io.IOBase):
             # Only upload if buffer has content and no exception occurred
             if write_contents and self._buffer.tell() > 0:
                 self._buffer.seek(0)
-                self._session.file.put_stream(self._buffer, self._path)
+                self._session.file.put_stream(self._buffer, self._path, auto_compress=False)
             self._buffer.close()
             self._closed = True
@@ -84,15 +84,15 @@ class DtoCodec(Protocol):
     @overload
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.ResultDTO:
+    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.PayloadDTO:
         ...
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.ResultDTO, dict[str, Any]]:
+    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.PayloadDTO, dict[str, Any]]:
         pass
     @staticmethod
-    def encode(dto: dto_schema.ResultDTO) -> bytes:
+    def encode(dto: dto_schema.PayloadDTO) -> bytes:
         pass
@@ -104,18 +104,18 @@ class JsonDtoCodec(DtoCodec):
     @overload
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.ResultDTO:
+    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.PayloadDTO:
         ...
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.ResultDTO, dict[str, Any]]:
+    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.PayloadDTO, dict[str, Any]]:
         data = cast(dict[str, Any], json.load(stream))
         if as_dict:
             return data
-        return dto_schema.ResultDTO.model_validate(data)
+        return dto_schema.ResultDTOAdapter.validate_python(data)
     @staticmethod
-    def encode(dto: dto_schema.ResultDTO) -> bytes:
+    def encode(dto: dto_schema.PayloadDTO) -> bytes:
         # Temporarily extract the value to avoid accidentally applying model_dump() on it
         result_value = dto.value
         dto.value = None  # Clear value to avoid serializing it in the model_dump

snowflake/ml/jobs/_interop/dto_schema.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
-from pydantic import BaseModel, model_validator
-from typing_extensions import NotRequired, TypedDict
+from pydantic import BaseModel, Discriminator, Tag, TypeAdapter, model_validator
+from typing_extensions import Annotated, NotRequired, TypedDict
 class BinaryManifest(TypedDict):
@@ -67,22 +67,47 @@ class ExceptionMetadata(ResultMetadata):
     traceback: str
-class ResultDTO(BaseModel):
+class PayloadDTO(BaseModel):
+    """
+    Base class for serializable payloads.
+    Args:
+        kind: Discriminator field for DTO type dispatch.
+        value: The payload value (if JSON-serializable).
+        protocol: The protocol used to serialize the payload (if not JSON-serializable).
+    """
+    kind: Literal["base"] = "base"
+    value: Optional[Any] = None
+    protocol: Optional[ProtocolInfo] = None
+    serialize_error: Optional[str] = None
+    @model_validator(mode="before")
+    @classmethod
+    def validate_fields(cls, data: Any) -> Any:
+        """Ensure at least one of value or protocol keys is specified."""
+        if cls is PayloadDTO and isinstance(data, dict):
+            required_fields = {"value", "protocol"}
+            if not any(field in data for field in required_fields):
+                raise ValueError("At least one of 'value' or 'protocol' must be specified")
+        return data
+class ResultDTO(PayloadDTO):
     """
     A JSON representation of an execution result.
     Args:
+        kind: Discriminator field for DTO type dispatch.
         success: Whether the execution was successful.
         value: The value of the execution or the exception if the execution failed.
         protocol: The protocol used to serialize the result.
         metadata: The metadata of the result.
     """
+    kind: Literal["result"] = "result"  # type: ignore[assignment]
     success: bool
-    value: Optional[Any] = None
-    protocol: Optional[ProtocolInfo] = None
     metadata: Optional[Union[ResultMetadata, ExceptionMetadata]] = None
-    serialize_error: Optional[str] = None
     @model_validator(mode="before")
     @classmethod
@@ -93,3 +118,23 @@ class ResultDTO(BaseModel):
             if not any(field in data for field in required_fields):
                 raise ValueError("At least one of 'value', 'protocol', or 'metadata' must be specified")
         return data
+def _get_dto_kind(data: Any) -> str:
+    """Extract the 'kind' discriminator from input, defaulting to 'result' for backward compatibility."""
+    if isinstance(data, dict):
+        kind = data.get("kind", "result")
+    else:
+        kind = getattr(data, "kind", "result")
+    return str(kind)
+AnyResultDTO = Annotated[
+    Union[
+        Annotated[ResultDTO, Tag("result")],
+        Annotated[PayloadDTO, Tag("base")],
+    ],
+    Discriminator(_get_dto_kind),
+]
+ResultDTOAdapter: TypeAdapter[AnyResultDTO] = TypeAdapter(AnyResultDTO)

snowflake/ml/jobs/_interop/protocols.py CHANGED Viewed

@@ -17,6 +17,8 @@ Condition = Union[type, tuple[type, ...], Callable[[Any], bool], None]
 logger = logging.getLogger(__name__)
+SESSION_KEY_PREFIX = "session@"
 class SerializationError(TypeError):
     """Exception raised when a serialization protocol fails."""
@@ -136,9 +138,10 @@ class CloudPickleProtocol(SerializationProtocol):
     def save(self, obj: Any, dest_dir: str, session: Optional[snowpark.Session] = None) -> ProtocolInfo:
         """Save the object to the destination directory."""
+        replaced_obj = self._pack_obj(obj)
         result_path = posixpath.join(dest_dir, self.DEFAULT_PATH)
         with data_utils.open_stream(result_path, "wb", session=session) as f:
-            self._backend.dump(obj, f)
+            self._backend.dump(replaced_obj, f)
         manifest: BinaryManifest = {"path": result_path}
         return self.protocol_info.with_manifest(manifest)
@@ -157,12 +160,15 @@ class CloudPickleProtocol(SerializationProtocol):
         payload_manifest = cast(BinaryManifest, payload_info.manifest)
         try:
             if payload_bytes := payload_manifest.get("bytes"):
-                return self._backend.loads(payload_bytes)
-            if payload_b64 := payload_manifest.get("base64"):
-                return self._backend.loads(base64.b64decode(payload_b64))
-            result_path = path_transform(payload_manifest["path"]) if path_transform else payload_manifest["path"]
-            with data_utils.open_stream(result_path, "rb", session=session) as f:
-                return self._backend.load(f)
+                result = self._backend.loads(payload_bytes)
+            elif payload_b64 := payload_manifest.get("base64"):
+                result = self._backend.loads(base64.b64decode(payload_b64))
+            else:
+                result_path = path_transform(payload_manifest["path"]) if path_transform else payload_manifest["path"]
+                with data_utils.open_stream(result_path, "rb", session=session) as f:
+                    result = self._backend.load(f)
+            return self._unpack_obj(result, session=session)
         except (
             pickle.UnpicklingError,
             TypeError,
@@ -173,6 +179,117 @@ class CloudPickleProtocol(SerializationProtocol):
                 raise error from pickle_error
             raise
+    def _pack_obj(self, obj: Any) -> Any:
+        """Pack objects into JSON-safe dicts using reserved marker keys.
+        Markers:
+        - "type@": container type for list/tuple (list or tuple)
+        - "#<i>": positional element for list/tuple at index i
+        - "session@": placeholder for snowpark.Session values
+          - "session@#<i>" for list/tuple entries
+          - "session@<key>" for dict entries
+          - {"session@": None} for a bare Session object
+        Example:
+            obj = {"x": [1, session], "s": session}
+            packed = {
+                "x": {"type@": list, "#0": 1, "session@#1": None},
+                "session@s": None,
+            }
+            _unpack_obj(packed, session) == obj
+        Args:
+            obj: Object to pack into JSON-safe marker dictionaries.
+        Returns:
+            Packed representation with markers for session references.
+        """
+        arguments: dict[str, Any] = {}
+        if isinstance(obj, tuple) or isinstance(obj, list):
+            arguments = {"type@": type(obj)}
+            for i, arg in enumerate(obj):
+                if isinstance(arg, snowpark.Session):
+                    arguments[f"{SESSION_KEY_PREFIX}#{i}"] = None
+                else:
+                    arguments[f"#{i}"] = self._pack_obj(arg)
+            return arguments
+        elif isinstance(obj, dict):
+            for k, v in obj.items():
+                if isinstance(v, snowpark.Session):
+                    arguments[f"{SESSION_KEY_PREFIX}{k}"] = None
+                else:
+                    arguments[k] = self._pack_obj(v)
+            return arguments
+        elif isinstance(obj, snowpark.Session):
+            # Box session into a dict marker so we can distinguish it from other plain objects.
+            arguments[f"{SESSION_KEY_PREFIX}"] = None
+            return arguments
+        else:
+            return obj
+    def _unpack_obj(self, obj: Any, session: Optional[snowpark.Session] = None) -> Any:
+        """Unpack dict markers back into containers and Session references.
+        Markers:
+        - "type@": container type for list/tuple (list or tuple)
+        - "#<i>": positional element for list/tuple at index i
+        - "session@": placeholder for snowpark.Session values
+          - "session@#<i>" for list/tuple entries
+          - "session@<key>" for dict entries
+          - {"session@": None} for a bare Session object
+        Example:
+            packed = {
+                "x": {"type@": list, "#0": 1, "session@#1": None},
+                "session@s": None,
+            }
+            obj = _unpack_obj(packed, session)
+            # obj == {"x": [1, session], "s": session}
+        Args:
+            obj: Packed object with marker dictionaries.
+            session: Session to inject for session markers.
+        Returns:
+            Unpacked object with session references restored.
+        """
+        if not isinstance(obj, dict):
+            return obj
+        elif len(obj) == 1 and SESSION_KEY_PREFIX in obj:
+            return session
+        else:
+            type = obj.get("type@", None)
+            # If type is None, we are unpacking a dict
+            if type is None:
+                result_dict = {}
+                for k, v in obj.items():
+                    if k.startswith(SESSION_KEY_PREFIX):
+                        result_key = k[len(SESSION_KEY_PREFIX) :]
+                        result_dict[result_key] = session
+                    else:
+                        result_dict[k] = self._unpack_obj(v, session)
+                return result_dict
+            # If type is not None, we are unpacking a tuple or list
+            else:
+                indexes = []
+                for k, _ in obj.items():
+                    if "#" in k:
+                        indexes.append(int(k.split("#")[-1]))
+                if not indexes:
+                    return tuple() if type is tuple else []
+                result_list: list[Any] = [None] * (max(indexes) + 1)
+                for k, v in obj.items():
+                    if k == "type@":
+                        continue
+                    idx = int(k.split("#")[-1])
+                    if k.startswith(SESSION_KEY_PREFIX):
+                        result_list[idx] = session
+                    else:
+                        result_list[idx] = self._unpack_obj(v, session)
+                return tuple(result_list) if type is tuple else result_list
 class ArrowTableProtocol(SerializationProtocol):
     """

snowflake/ml/jobs/_interop/utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import io
 import logging
 import os
 import traceback
@@ -10,7 +11,9 @@ from snowflake import snowpark
 from snowflake.ml.jobs._interop import data_utils, exception_utils, legacy, protocols
 from snowflake.ml.jobs._interop.dto_schema import (
     ExceptionMetadata,
+    PayloadDTO,
     ResultDTO,
+    ResultDTOAdapter,
     ResultMetadata,
 )
 from snowflake.ml.jobs._interop.results import ExecutionResult, LoadedExecutionResult
@@ -23,79 +26,137 @@ DEFAULT_PROTOCOL.try_register_protocol(protocols.ArrowTableProtocol)
 DEFAULT_PROTOCOL.try_register_protocol(protocols.PandasDataFrameProtocol)
 DEFAULT_PROTOCOL.try_register_protocol(protocols.NumpyArrayProtocol)
+# Constants for argument encoding
+_MAX_INLINE_SIZE = 1024 * 1024  # 1MB - https://docs.snowflake.com/en/user-guide/query-size-limits
 logger = logging.getLogger(__name__)
-def save_result(result: ExecutionResult, path: str, session: Optional[snowpark.Session] = None) -> None:
+def save(
+    value: Any,
+    path: str,
+    session: Optional[snowpark.Session] = None,
+    max_inline_size: int = 0,
+) -> Optional[bytes]:
     """
-    Save the result to a file.
+    Serialize a value. Returns inline bytes if small enough, else writes to file.
+    Args:
+        value: The value to serialize. If ExecutionResult, creates ResultDTO with success flag.
+        path: Full file path for writing the DTO (if needed). Protocol data saved to path's parent.
+        session: Snowpark session for stage operations.
+        max_inline_size: Max bytes for inline return. 0 = always write to file.
+    Returns:
+        Encoded bytes if <= max_inline_size, else None (written to file).
+    Raises:
+        Exception: If session validation fails during serialization.
     """
-    result_dto = ResultDTO(
-        success=result.success,
-        value=result.value,
-    )
+    if isinstance(value, ExecutionResult):
+        dto: PayloadDTO = ResultDTO(success=value.success, value=value.value)
+        raw_value = value.value
+    else:
+        dto = PayloadDTO(value=value)
+        raw_value = value
     try:
-        # Try to encode result directly
-        payload = DEFAULT_CODEC.encode(result_dto)
+        payload = DEFAULT_CODEC.encode(dto)
     except TypeError:
-        result_dto.value = None  # Remove raw value to avoid serialization error
-        result_dto.metadata = _get_metadata(result.value)  # Add metadata for client fallback on protocol mismatch
+        dto.value = None  # Remove raw value to avoid serialization error
+        if isinstance(dto, ResultDTO):
+            # Metadata enables client fallback display when result can't be deserialized (protocol mismatch)..
+            dto.metadata = _get_metadata(raw_value)
         try:
             path_dir = PurePath(path).parent.as_posix()
-            protocol_info = DEFAULT_PROTOCOL.save(result.value, path_dir, session=session)
-            result_dto.protocol = protocol_info
+            protocol_info = DEFAULT_PROTOCOL.save(raw_value, path_dir, session=session)
+            dto.protocol = protocol_info
         except Exception as e:
             logger.warning(f"Error dumping result value: {repr(e)}")
-            result_dto.serialize_error = repr(e)
-        # Encode the modified result DTO
-        payload = DEFAULT_CODEC.encode(result_dto)
+            # We handle serialization failures differently based on the DTO type:
+            # 1. Job Results (ResultDTO): Allow a "soft-fail."
+            #    Since the job has already executed, we return the serialization error
+            #    to the client so they can debug the output or update their protocol version.
+            # 2. Input Arguments: Trigger a "hard-fail."
+            #    If arguments cannot be saved, the job script cannot run. We raise
+            #    an immediate exception to prevent execution with invalid state.
+            if not isinstance(dto, ResultDTO):
+                raise
+            dto.serialize_error = repr(e)
+        # Encode the modified DTO
+        payload = DEFAULT_CODEC.encode(dto)
+    if not isinstance(dto, ResultDTO) and len(payload) <= max_inline_size:
+        return payload
     with data_utils.open_stream(path, "wb", session=session) as stream:
         stream.write(payload)
+    return None
+save_result = save  # Backwards compatibility
-def load_result(
-    path: str, session: Optional[snowpark.Session] = None, path_transform: Optional[Callable[[str], str]] = None
-) -> ExecutionResult:
-    """Load the result from a file on a Snowflake stage."""
+def load(
+    path_or_data: str,
+    session: Optional[snowpark.Session] = None,
+    path_transform: Optional[Callable[[str], str]] = None,
+) -> Any:
+    """Load data from a file path or inline string."""
     try:
-        with data_utils.open_stream(path, "r", session=session) as stream:
+        with data_utils.open_stream(path_or_data, "r", session=session) as stream:
             # Load the DTO as a dict for easy fallback to legacy loading if necessary
             dto_dict = DEFAULT_CODEC.decode(stream, as_dict=True)
+    # the exception could be OSError or BlockingIOError(the file name is too long)
+    except OSError as e:
+        # path_or_data might be inline data
+        try:
+            dto_dict = DEFAULT_CODEC.decode(io.StringIO(path_or_data), as_dict=True)
+        except Exception:
+            raise e
     except UnicodeDecodeError:
         # Path may be a legacy result file (cloudpickle)
-        # TODO: Re-use the stream
         assert session is not None
-        return legacy.load_legacy_result(session, path)
+        return legacy.load_legacy_result(session, path_or_data)
     try:
-        dto = ResultDTO.model_validate(dto_dict)
+        dto = ResultDTOAdapter.validate_python(dto_dict)
     except pydantic.ValidationError as e:
         if "success" in dto_dict:
             assert session is not None
-            if path.endswith(".json"):
-                path = os.path.splitext(path)[0] + ".pkl"
-            return legacy.load_legacy_result(session, path, result_json=dto_dict)
+            if path_or_data.endswith(".json"):
+                path_or_data = os.path.splitext(path_or_data)[0] + ".pkl"
+            return legacy.load_legacy_result(session, path_or_data, result_json=dto_dict)
         raise ValueError("Invalid result schema") from e
     # Try loading data from file using the protocol info
-    result_value = None
+    payload_value = None
     data_load_error = None
     if dto.protocol is not None:
         try:
             logger.debug(f"Loading result value with protocol {dto.protocol}")
-            result_value = DEFAULT_PROTOCOL.load(dto.protocol, session=session, path_transform=path_transform)
+            payload_value = DEFAULT_PROTOCOL.load(dto.protocol, session=session, path_transform=path_transform)
         except sp_exceptions.SnowparkSQLException:
             raise  # Data retrieval errors should be bubbled up
         except Exception as e:
             logger.debug(f"Error loading result value with protocol {dto.protocol}: {repr(e)}")
+            # Error handling strategy depends on the DTO type:
+            # 1. ResultDTO: Soft-fail. The job has already finished.
+            #    We package the load error into the result so the client can
+            #    debug or adjust their protocol version to retrieve the output.
+            # 2. PayloadDTO : Raise a hard error. If arguments cannot be
+            #    loaded, the job cannot run. We abort early to prevent execution.
+            if not isinstance(dto, ResultDTO):
+                raise
             data_load_error = e
-    # Wrap serialize_error in a TypeError
+    # Prepare to assemble the final result
+    if not isinstance(dto, ResultDTO):
+        return payload_value
     if dto.serialize_error:
         serialize_error = TypeError("Original result serialization failed with error: " + dto.serialize_error)
         if data_load_error:
@@ -103,8 +164,7 @@ def load_result(
         else:
             data_load_error = serialize_error
-    # Prepare to assemble the final result
-    result_value = result_value if result_value is not None else dto.value
+    result_value = payload_value if payload_value is not None else dto.value
     if not dto.success and result_value is None:
         # Try to reconstruct exception from metadata if available
         if isinstance(dto.metadata, ExceptionMetadata):
@@ -115,7 +175,6 @@ def load_result(
                 traceback=dto.metadata.traceback,
                 original_repr=dto.metadata.repr,
             )
         # Generate a generic error if we still don't have a value,
         # attaching the data load error if any
         if result_value is None:

snowflake/ml/jobs/_utils/arg_protocol.py ADDED Viewed

@@ -0,0 +1,7 @@
+from enum import Enum
+class ArgProtocol(Enum):
+    NONE = 0
+    CLI = 1
+    PICKLE = 2

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -5,6 +5,7 @@ from snowflake.ml.jobs._utils.types import ComputeResources
 DEFAULT_CONTAINER_NAME = "main"
 MEMORY_VOLUME_NAME = "dshm"
 STAGE_VOLUME_NAME = "stage-volume"
+RESULT_VOLUME_NAME = "result-volume"
 DEFAULT_PYTHON_VERSION = "3.10"
 # Environment variables
@@ -109,3 +110,6 @@ CLOUD_INSTANCE_FAMILIES = {
     SnowflakeCloudType.AZURE: AZURE_INSTANCE_FAMILIES,
     SnowflakeCloudType.GCP: GCP_INSTANCE_FAMILIES,
 }
+# Magic attributes
+IS_MLJOB_REMOTE_ATTR = "_is_mljob_remote_callable"

snowflake-ml-python 1.24.0__py3-none-any.whl → 1.25.0__py3-none-any.whl

snowflake-ml-python 1.24.0py3-none-any.whl → 1.25.0py3-none-any.whl