PyPI - snowflake-ml-python - Versions diffs - 1.23.0__py3-none-any.whl → 1.25.0__py3-none-any.whl - Mend

snowflake-ml-python 1.23.0py3-none-any.whl → 1.25.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

snowflake/ml/_internal/platform_capabilities.py +0 -4
snowflake/ml/_internal/utils/mixins.py +26 -1
snowflake/ml/data/_internal/arrow_ingestor.py +5 -1
snowflake/ml/data/data_connector.py +2 -2
snowflake/ml/data/data_ingestor.py +2 -1
snowflake/ml/experiment/_experiment_info.py +3 -3
snowflake/ml/feature_store/__init__.py +2 -0
snowflake/ml/feature_store/aggregation.py +367 -0
snowflake/ml/feature_store/feature.py +366 -0
snowflake/ml/feature_store/feature_store.py +234 -20
snowflake/ml/feature_store/feature_view.py +189 -4
snowflake/ml/feature_store/metadata_manager.py +425 -0
snowflake/ml/feature_store/tile_sql_generator.py +1079 -0
snowflake/ml/jobs/_interop/data_utils.py +8 -8
snowflake/ml/jobs/_interop/dto_schema.py +52 -7
snowflake/ml/jobs/_interop/protocols.py +124 -7
snowflake/ml/jobs/_interop/utils.py +92 -33
snowflake/ml/jobs/_utils/arg_protocol.py +7 -0
snowflake/ml/jobs/_utils/constants.py +4 -0
snowflake/ml/jobs/_utils/feature_flags.py +97 -13
snowflake/ml/jobs/_utils/payload_utils.py +6 -40
snowflake/ml/jobs/_utils/runtime_env_utils.py +12 -111
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +204 -27
snowflake/ml/jobs/decorators.py +17 -22
snowflake/ml/jobs/job.py +25 -10
snowflake/ml/jobs/job_definition.py +100 -8
snowflake/ml/model/__init__.py +4 -0
snowflake/ml/model/_client/model/batch_inference_specs.py +38 -2
snowflake/ml/model/_client/model/model_version_impl.py +56 -28
snowflake/ml/model/_client/ops/model_ops.py +2 -8
snowflake/ml/model/_client/ops/service_ops.py +6 -11
snowflake/ml/model/_client/service/model_deployment_spec.py +3 -0
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
snowflake/ml/model/_client/sql/service.py +21 -29
snowflake/ml/model/_model_composer/model_method/model_method.py +2 -1
snowflake/ml/model/_packager/model_handlers/huggingface.py +20 -0
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +70 -14
snowflake/ml/model/_signatures/utils.py +76 -1
snowflake/ml/model/models/huggingface_pipeline.py +3 -0
snowflake/ml/model/openai_signatures.py +154 -0
snowflake/ml/registry/_manager/model_parameter_reconciler.py +2 -3
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/METADATA +79 -2
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/RECORD +47 -44
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/WHEEL +1 -1
snowflake/ml/jobs/_utils/function_payload_utils.py +0 -43
snowflake/ml/jobs/_utils/spec_utils.py +0 -22
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/_interop/data_utils.py CHANGED Viewed

@@ -31,7 +31,7 @@ class StageFileWriter(io.IOBase):
             # Only upload if buffer has content and no exception occurred
             if write_contents and self._buffer.tell() > 0:
                 self._buffer.seek(0)
-                self._session.file.put_stream(self._buffer, self._path)
+                self._session.file.put_stream(self._buffer, self._path, auto_compress=False)
             self._buffer.close()
             self._closed = True
@@ -84,15 +84,15 @@ class DtoCodec(Protocol):
     @overload
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.ResultDTO:
+    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.PayloadDTO:
         ...
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.ResultDTO, dict[str, Any]]:
+    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.PayloadDTO, dict[str, Any]]:
         pass
     @staticmethod
-    def encode(dto: dto_schema.ResultDTO) -> bytes:
+    def encode(dto: dto_schema.PayloadDTO) -> bytes:
         pass
@@ -104,18 +104,18 @@ class JsonDtoCodec(DtoCodec):
     @overload
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.ResultDTO:
+    def decode(stream: io.IOBase, as_dict: Literal[False] = False) -> dto_schema.PayloadDTO:
         ...
     @staticmethod
-    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.ResultDTO, dict[str, Any]]:
+    def decode(stream: io.IOBase, as_dict: bool = False) -> Union[dto_schema.PayloadDTO, dict[str, Any]]:
         data = cast(dict[str, Any], json.load(stream))
         if as_dict:
             return data
-        return dto_schema.ResultDTO.model_validate(data)
+        return dto_schema.ResultDTOAdapter.validate_python(data)
     @staticmethod
-    def encode(dto: dto_schema.ResultDTO) -> bytes:
+    def encode(dto: dto_schema.PayloadDTO) -> bytes:
         # Temporarily extract the value to avoid accidentally applying model_dump() on it
         result_value = dto.value
         dto.value = None  # Clear value to avoid serializing it in the model_dump

snowflake/ml/jobs/_interop/dto_schema.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
-from pydantic import BaseModel, model_validator
-from typing_extensions import NotRequired, TypedDict
+from pydantic import BaseModel, Discriminator, Tag, TypeAdapter, model_validator
+from typing_extensions import Annotated, NotRequired, TypedDict
 class BinaryManifest(TypedDict):
@@ -67,22 +67,47 @@ class ExceptionMetadata(ResultMetadata):
     traceback: str
-class ResultDTO(BaseModel):
+class PayloadDTO(BaseModel):
+    """
+    Base class for serializable payloads.
+    Args:
+        kind: Discriminator field for DTO type dispatch.
+        value: The payload value (if JSON-serializable).
+        protocol: The protocol used to serialize the payload (if not JSON-serializable).
+    """
+    kind: Literal["base"] = "base"
+    value: Optional[Any] = None
+    protocol: Optional[ProtocolInfo] = None
+    serialize_error: Optional[str] = None
+    @model_validator(mode="before")
+    @classmethod
+    def validate_fields(cls, data: Any) -> Any:
+        """Ensure at least one of value or protocol keys is specified."""
+        if cls is PayloadDTO and isinstance(data, dict):
+            required_fields = {"value", "protocol"}
+            if not any(field in data for field in required_fields):
+                raise ValueError("At least one of 'value' or 'protocol' must be specified")
+        return data
+class ResultDTO(PayloadDTO):
     """
     A JSON representation of an execution result.
     Args:
+        kind: Discriminator field for DTO type dispatch.
         success: Whether the execution was successful.
         value: The value of the execution or the exception if the execution failed.
         protocol: The protocol used to serialize the result.
         metadata: The metadata of the result.
     """
+    kind: Literal["result"] = "result"  # type: ignore[assignment]
     success: bool
-    value: Optional[Any] = None
-    protocol: Optional[ProtocolInfo] = None
     metadata: Optional[Union[ResultMetadata, ExceptionMetadata]] = None
-    serialize_error: Optional[str] = None
     @model_validator(mode="before")
     @classmethod
@@ -93,3 +118,23 @@ class ResultDTO(BaseModel):
             if not any(field in data for field in required_fields):
                 raise ValueError("At least one of 'value', 'protocol', or 'metadata' must be specified")
         return data
+def _get_dto_kind(data: Any) -> str:
+    """Extract the 'kind' discriminator from input, defaulting to 'result' for backward compatibility."""
+    if isinstance(data, dict):
+        kind = data.get("kind", "result")
+    else:
+        kind = getattr(data, "kind", "result")
+    return str(kind)
+AnyResultDTO = Annotated[
+    Union[
+        Annotated[ResultDTO, Tag("result")],
+        Annotated[PayloadDTO, Tag("base")],
+    ],
+    Discriminator(_get_dto_kind),
+]
+ResultDTOAdapter: TypeAdapter[AnyResultDTO] = TypeAdapter(AnyResultDTO)

snowflake/ml/jobs/_interop/protocols.py CHANGED Viewed

@@ -17,6 +17,8 @@ Condition = Union[type, tuple[type, ...], Callable[[Any], bool], None]
 logger = logging.getLogger(__name__)
+SESSION_KEY_PREFIX = "session@"
 class SerializationError(TypeError):
     """Exception raised when a serialization protocol fails."""
@@ -136,9 +138,10 @@ class CloudPickleProtocol(SerializationProtocol):
     def save(self, obj: Any, dest_dir: str, session: Optional[snowpark.Session] = None) -> ProtocolInfo:
         """Save the object to the destination directory."""
+        replaced_obj = self._pack_obj(obj)
         result_path = posixpath.join(dest_dir, self.DEFAULT_PATH)
         with data_utils.open_stream(result_path, "wb", session=session) as f:
-            self._backend.dump(obj, f)
+            self._backend.dump(replaced_obj, f)
         manifest: BinaryManifest = {"path": result_path}
         return self.protocol_info.with_manifest(manifest)
@@ -157,12 +160,15 @@ class CloudPickleProtocol(SerializationProtocol):
         payload_manifest = cast(BinaryManifest, payload_info.manifest)
         try:
             if payload_bytes := payload_manifest.get("bytes"):
-                return self._backend.loads(payload_bytes)
-            if payload_b64 := payload_manifest.get("base64"):
-                return self._backend.loads(base64.b64decode(payload_b64))
-            result_path = path_transform(payload_manifest["path"]) if path_transform else payload_manifest["path"]
-            with data_utils.open_stream(result_path, "rb", session=session) as f:
-                return self._backend.load(f)
+                result = self._backend.loads(payload_bytes)
+            elif payload_b64 := payload_manifest.get("base64"):
+                result = self._backend.loads(base64.b64decode(payload_b64))
+            else:
+                result_path = path_transform(payload_manifest["path"]) if path_transform else payload_manifest["path"]
+                with data_utils.open_stream(result_path, "rb", session=session) as f:
+                    result = self._backend.load(f)
+            return self._unpack_obj(result, session=session)
         except (
             pickle.UnpicklingError,
             TypeError,
@@ -173,6 +179,117 @@ class CloudPickleProtocol(SerializationProtocol):
                 raise error from pickle_error
             raise
+    def _pack_obj(self, obj: Any) -> Any:
+        """Pack objects into JSON-safe dicts using reserved marker keys.
+        Markers:
+        - "type@": container type for list/tuple (list or tuple)
+        - "#<i>": positional element for list/tuple at index i
+        - "session@": placeholder for snowpark.Session values
+          - "session@#<i>" for list/tuple entries
+          - "session@<key>" for dict entries
+          - {"session@": None} for a bare Session object
+        Example:
+            obj = {"x": [1, session], "s": session}
+            packed = {
+                "x": {"type@": list, "#0": 1, "session@#1": None},
+                "session@s": None,
+            }
+            _unpack_obj(packed, session) == obj
+        Args:
+            obj: Object to pack into JSON-safe marker dictionaries.
+        Returns:
+            Packed representation with markers for session references.
+        """
+        arguments: dict[str, Any] = {}
+        if isinstance(obj, tuple) or isinstance(obj, list):
+            arguments = {"type@": type(obj)}
+            for i, arg in enumerate(obj):
+                if isinstance(arg, snowpark.Session):
+                    arguments[f"{SESSION_KEY_PREFIX}#{i}"] = None
+                else:
+                    arguments[f"#{i}"] = self._pack_obj(arg)
+            return arguments
+        elif isinstance(obj, dict):
+            for k, v in obj.items():
+                if isinstance(v, snowpark.Session):
+                    arguments[f"{SESSION_KEY_PREFIX}{k}"] = None
+                else:
+                    arguments[k] = self._pack_obj(v)
+            return arguments
+        elif isinstance(obj, snowpark.Session):
+            # Box session into a dict marker so we can distinguish it from other plain objects.
+            arguments[f"{SESSION_KEY_PREFIX}"] = None
+            return arguments
+        else:
+            return obj
+    def _unpack_obj(self, obj: Any, session: Optional[snowpark.Session] = None) -> Any:
+        """Unpack dict markers back into containers and Session references.
+        Markers:
+        - "type@": container type for list/tuple (list or tuple)
+        - "#<i>": positional element for list/tuple at index i
+        - "session@": placeholder for snowpark.Session values
+          - "session@#<i>" for list/tuple entries
+          - "session@<key>" for dict entries
+          - {"session@": None} for a bare Session object
+        Example:
+            packed = {
+                "x": {"type@": list, "#0": 1, "session@#1": None},
+                "session@s": None,
+            }
+            obj = _unpack_obj(packed, session)
+            # obj == {"x": [1, session], "s": session}
+        Args:
+            obj: Packed object with marker dictionaries.
+            session: Session to inject for session markers.
+        Returns:
+            Unpacked object with session references restored.
+        """
+        if not isinstance(obj, dict):
+            return obj
+        elif len(obj) == 1 and SESSION_KEY_PREFIX in obj:
+            return session
+        else:
+            type = obj.get("type@", None)
+            # If type is None, we are unpacking a dict
+            if type is None:
+                result_dict = {}
+                for k, v in obj.items():
+                    if k.startswith(SESSION_KEY_PREFIX):
+                        result_key = k[len(SESSION_KEY_PREFIX) :]
+                        result_dict[result_key] = session
+                    else:
+                        result_dict[k] = self._unpack_obj(v, session)
+                return result_dict
+            # If type is not None, we are unpacking a tuple or list
+            else:
+                indexes = []
+                for k, _ in obj.items():
+                    if "#" in k:
+                        indexes.append(int(k.split("#")[-1]))
+                if not indexes:
+                    return tuple() if type is tuple else []
+                result_list: list[Any] = [None] * (max(indexes) + 1)
+                for k, v in obj.items():
+                    if k == "type@":
+                        continue
+                    idx = int(k.split("#")[-1])
+                    if k.startswith(SESSION_KEY_PREFIX):
+                        result_list[idx] = session
+                    else:
+                        result_list[idx] = self._unpack_obj(v, session)
+                return tuple(result_list) if type is tuple else result_list
 class ArrowTableProtocol(SerializationProtocol):
     """

snowflake/ml/jobs/_interop/utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import io
 import logging
 import os
 import traceback
@@ -10,7 +11,9 @@ from snowflake import snowpark
 from snowflake.ml.jobs._interop import data_utils, exception_utils, legacy, protocols
 from snowflake.ml.jobs._interop.dto_schema import (
     ExceptionMetadata,
+    PayloadDTO,
     ResultDTO,
+    ResultDTOAdapter,
     ResultMetadata,
 )
 from snowflake.ml.jobs._interop.results import ExecutionResult, LoadedExecutionResult
@@ -23,79 +26,137 @@ DEFAULT_PROTOCOL.try_register_protocol(protocols.ArrowTableProtocol)
 DEFAULT_PROTOCOL.try_register_protocol(protocols.PandasDataFrameProtocol)
 DEFAULT_PROTOCOL.try_register_protocol(protocols.NumpyArrayProtocol)
+# Constants for argument encoding
+_MAX_INLINE_SIZE = 1024 * 1024  # 1MB - https://docs.snowflake.com/en/user-guide/query-size-limits
 logger = logging.getLogger(__name__)
-def save_result(result: ExecutionResult, path: str, session: Optional[snowpark.Session] = None) -> None:
+def save(
+    value: Any,
+    path: str,
+    session: Optional[snowpark.Session] = None,
+    max_inline_size: int = 0,
+) -> Optional[bytes]:
     """
-    Save the result to a file.
+    Serialize a value. Returns inline bytes if small enough, else writes to file.
+    Args:
+        value: The value to serialize. If ExecutionResult, creates ResultDTO with success flag.
+        path: Full file path for writing the DTO (if needed). Protocol data saved to path's parent.
+        session: Snowpark session for stage operations.
+        max_inline_size: Max bytes for inline return. 0 = always write to file.
+    Returns:
+        Encoded bytes if <= max_inline_size, else None (written to file).
+    Raises:
+        Exception: If session validation fails during serialization.
     """
-    result_dto = ResultDTO(
-        success=result.success,
-        value=result.value,
-    )
+    if isinstance(value, ExecutionResult):
+        dto: PayloadDTO = ResultDTO(success=value.success, value=value.value)
+        raw_value = value.value
+    else:
+        dto = PayloadDTO(value=value)
+        raw_value = value
     try:
-        # Try to encode result directly
-        payload = DEFAULT_CODEC.encode(result_dto)
+        payload = DEFAULT_CODEC.encode(dto)
     except TypeError:
-        result_dto.value = None  # Remove raw value to avoid serialization error
-        result_dto.metadata = _get_metadata(result.value)  # Add metadata for client fallback on protocol mismatch
+        dto.value = None  # Remove raw value to avoid serialization error
+        if isinstance(dto, ResultDTO):
+            # Metadata enables client fallback display when result can't be deserialized (protocol mismatch)..
+            dto.metadata = _get_metadata(raw_value)
         try:
             path_dir = PurePath(path).parent.as_posix()
-            protocol_info = DEFAULT_PROTOCOL.save(result.value, path_dir, session=session)
-            result_dto.protocol = protocol_info
+            protocol_info = DEFAULT_PROTOCOL.save(raw_value, path_dir, session=session)
+            dto.protocol = protocol_info
         except Exception as e:
             logger.warning(f"Error dumping result value: {repr(e)}")
-            result_dto.serialize_error = repr(e)
-        # Encode the modified result DTO
-        payload = DEFAULT_CODEC.encode(result_dto)
+            # We handle serialization failures differently based on the DTO type:
+            # 1. Job Results (ResultDTO): Allow a "soft-fail."
+            #    Since the job has already executed, we return the serialization error
+            #    to the client so they can debug the output or update their protocol version.
+            # 2. Input Arguments: Trigger a "hard-fail."
+            #    If arguments cannot be saved, the job script cannot run. We raise
+            #    an immediate exception to prevent execution with invalid state.
+            if not isinstance(dto, ResultDTO):
+                raise
+            dto.serialize_error = repr(e)
+        # Encode the modified DTO
+        payload = DEFAULT_CODEC.encode(dto)
+    if not isinstance(dto, ResultDTO) and len(payload) <= max_inline_size:
+        return payload
     with data_utils.open_stream(path, "wb", session=session) as stream:
         stream.write(payload)
+    return None
+save_result = save  # Backwards compatibility
-def load_result(
-    path: str, session: Optional[snowpark.Session] = None, path_transform: Optional[Callable[[str], str]] = None
-) -> ExecutionResult:
-    """Load the result from a file on a Snowflake stage."""
+def load(
+    path_or_data: str,
+    session: Optional[snowpark.Session] = None,
+    path_transform: Optional[Callable[[str], str]] = None,
+) -> Any:
+    """Load data from a file path or inline string."""
     try:
-        with data_utils.open_stream(path, "r", session=session) as stream:
+        with data_utils.open_stream(path_or_data, "r", session=session) as stream:
             # Load the DTO as a dict for easy fallback to legacy loading if necessary
             dto_dict = DEFAULT_CODEC.decode(stream, as_dict=True)
+    # the exception could be OSError or BlockingIOError(the file name is too long)
+    except OSError as e:
+        # path_or_data might be inline data
+        try:
+            dto_dict = DEFAULT_CODEC.decode(io.StringIO(path_or_data), as_dict=True)
+        except Exception:
+            raise e
     except UnicodeDecodeError:
         # Path may be a legacy result file (cloudpickle)
-        # TODO: Re-use the stream
         assert session is not None
-        return legacy.load_legacy_result(session, path)
+        return legacy.load_legacy_result(session, path_or_data)
     try:
-        dto = ResultDTO.model_validate(dto_dict)
+        dto = ResultDTOAdapter.validate_python(dto_dict)
     except pydantic.ValidationError as e:
         if "success" in dto_dict:
             assert session is not None
-            if path.endswith(".json"):
-                path = os.path.splitext(path)[0] + ".pkl"
-            return legacy.load_legacy_result(session, path, result_json=dto_dict)
+            if path_or_data.endswith(".json"):
+                path_or_data = os.path.splitext(path_or_data)[0] + ".pkl"
+            return legacy.load_legacy_result(session, path_or_data, result_json=dto_dict)
         raise ValueError("Invalid result schema") from e
     # Try loading data from file using the protocol info
-    result_value = None
+    payload_value = None
     data_load_error = None
     if dto.protocol is not None:
         try:
             logger.debug(f"Loading result value with protocol {dto.protocol}")
-            result_value = DEFAULT_PROTOCOL.load(dto.protocol, session=session, path_transform=path_transform)
+            payload_value = DEFAULT_PROTOCOL.load(dto.protocol, session=session, path_transform=path_transform)
         except sp_exceptions.SnowparkSQLException:
             raise  # Data retrieval errors should be bubbled up
         except Exception as e:
             logger.debug(f"Error loading result value with protocol {dto.protocol}: {repr(e)}")
+            # Error handling strategy depends on the DTO type:
+            # 1. ResultDTO: Soft-fail. The job has already finished.
+            #    We package the load error into the result so the client can
+            #    debug or adjust their protocol version to retrieve the output.
+            # 2. PayloadDTO : Raise a hard error. If arguments cannot be
+            #    loaded, the job cannot run. We abort early to prevent execution.
+            if not isinstance(dto, ResultDTO):
+                raise
             data_load_error = e
-    # Wrap serialize_error in a TypeError
+    # Prepare to assemble the final result
+    if not isinstance(dto, ResultDTO):
+        return payload_value
     if dto.serialize_error:
         serialize_error = TypeError("Original result serialization failed with error: " + dto.serialize_error)
         if data_load_error:
@@ -103,8 +164,7 @@ def load_result(
         else:
             data_load_error = serialize_error
-    # Prepare to assemble the final result
-    result_value = result_value if result_value is not None else dto.value
+    result_value = payload_value if payload_value is not None else dto.value
     if not dto.success and result_value is None:
         # Try to reconstruct exception from metadata if available
         if isinstance(dto.metadata, ExceptionMetadata):
@@ -115,7 +175,6 @@ def load_result(
                 traceback=dto.metadata.traceback,
                 original_repr=dto.metadata.repr,
             )
         # Generate a generic error if we still don't have a value,
         # attaching the data load error if any
         if result_value is None:

snowflake/ml/jobs/_utils/arg_protocol.py ADDED Viewed

@@ -0,0 +1,7 @@
+from enum import Enum
+class ArgProtocol(Enum):
+    NONE = 0
+    CLI = 1
+    PICKLE = 2

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -5,6 +5,7 @@ from snowflake.ml.jobs._utils.types import ComputeResources
 DEFAULT_CONTAINER_NAME = "main"
 MEMORY_VOLUME_NAME = "dshm"
 STAGE_VOLUME_NAME = "stage-volume"
+RESULT_VOLUME_NAME = "result-volume"
 DEFAULT_PYTHON_VERSION = "3.10"
 # Environment variables
@@ -109,3 +110,6 @@ CLOUD_INSTANCE_FAMILIES = {
     SnowflakeCloudType.AZURE: AZURE_INSTANCE_FAMILIES,
     SnowflakeCloudType.GCP: GCP_INSTANCE_FAMILIES,
 }
+# Magic attributes
+IS_MLJOB_REMOTE_ATTR = "_is_mljob_remote_callable"

snowflake/ml/jobs/_utils/feature_flags.py CHANGED Viewed

@@ -1,6 +1,11 @@
 import os
-from enum import Enum
-from typing import Optional
+from typing import Callable, Optional, Union
+from snowflake.ml._internal.utils.snowflake_env import SnowflakeCloudType
+from snowflake.snowpark import context as sp_context
+# Default value type: can be a bool or a callable that returns a bool
+DefaultValue = Union[bool, Callable[[], bool]]
 def parse_bool_env_value(value: Optional[str], default: bool = False) -> bool:
@@ -28,22 +33,101 @@ def parse_bool_env_value(value: Optional[str], default: bool = False) -> bool:
         return default
-class FeatureFlags(Enum):
-    USE_SUBMIT_JOB_V2 = "MLRS_USE_SUBMIT_JOB_V2"
-    ENABLE_RUNTIME_VERSIONS = "MLRS_ENABLE_RUNTIME_VERSIONS"
-    ENABLE_STAGE_MOUNT_V2 = "MLRS_ENABLE_STAGE_MOUNT_V2"
+def _enabled_in_clouds(*clouds: SnowflakeCloudType) -> Callable[[], bool]:
+    """Create a callable that checks if the current environment is in any of the specified clouds.
+    This factory function returns a callable that can be used as a dynamic default
+    for feature flags. The returned callable will check if the current Snowflake
+    session is connected to a region in any of the specified cloud providers.
+    Args:
+        *clouds: One or more SnowflakeCloudType values to check against.
+    Returns:
+        A callable that returns True if running in any of the specified clouds,
+        False otherwise (including when no session is available).
+    Example:
+        >>> # Enable feature only in GCP
+        >>> default=_enabled_in_clouds(SnowflakeCloudType.GCP)
+        >>>
+        >>> # Enable feature in both GCP and Azure
+        >>> default=_enabled_in_clouds(SnowflakeCloudType.GCP, SnowflakeCloudType.AZURE)
+    """
+    cloud_set = frozenset(clouds)
+    def check() -> bool:
+        try:
+            from snowflake.ml._internal.utils.snowflake_env import get_current_cloud
+            session = sp_context.get_active_session()
+            current_cloud = get_current_cloud(session, default=SnowflakeCloudType.AWS)
+            return current_cloud in cloud_set
+        except Exception:
+            # If we can't determine the cloud (no session, SQL error, etc.),
+            # default to False for safety
+            return False
+    return check
-    def is_enabled(self, default: bool = False) -> bool:
-        """Check if the feature flag is enabled.
+class _FeatureFlag:
+    """A feature flag backed by an environment variable with a configurable default.
+    The default value can be a constant boolean or a callable that dynamically
+    determines the default based on runtime context (e.g., cloud provider).
+    """
+    def __init__(self, env_var: str, default: DefaultValue = False) -> None:
+        """Initialize a feature flag.
         Args:
-            default: The default value to return if the environment variable is not set.
+            env_var: The environment variable name that controls this flag.
+            default: The default value when the env var is not set. Can be:
+                - A boolean constant (True/False)
+                - A callable that returns a boolean (evaluated at check time)
+        """
+        self._env_var = env_var
+        self._default = default
+    @property
+    def value(self) -> str:
+        """Return the environment variable name (for compatibility with Enum-style access)."""
+        return self._env_var
+    def _get_default(self) -> bool:
+        """Get the default value, calling it if it's a callable."""
+        if callable(self._default):
+            return self._default()
+        return self._default
+    def is_enabled(self) -> bool:
+        """Check if the feature flag is enabled.
+        First checks the environment variable. If not set or unrecognized,
+        falls back to the configured default value.
         Returns:
-            True if the environment variable is set to a truthy value,
-            False if set to a falsy value, or the default value if not set.
+            True if the feature is enabled, False otherwise.
         """
-        return parse_bool_env_value(os.getenv(self.value), default)
+        env_value = os.getenv(self._env_var)
+        if env_value is not None:
+            # Environment variable is set, parse it
+            result = parse_bool_env_value(env_value, default=self._get_default())
+            return result
+        else:
+            # Environment variable not set, use the default
+            return self._get_default()
     def __str__(self) -> str:
-        return self.value
+        return self._env_var
+class FeatureFlags:
+    """Collection of feature flags for ML Jobs."""
+    ENABLE_RUNTIME_VERSIONS = _FeatureFlag("MLRS_ENABLE_RUNTIME_VERSIONS", default=True)
+    ENABLE_STAGE_MOUNT_V2 = _FeatureFlag(
+        "MLRS_ENABLE_STAGE_MOUNT_V2",
+        default=_enabled_in_clouds(SnowflakeCloudType.GCP),
+    )

snowflake-ml-python 1.23.0__py3-none-any.whl → 1.25.0__py3-none-any.whl

snowflake-ml-python 1.23.0py3-none-any.whl → 1.25.0py3-none-any.whl