PyPI - snowflake-ml-python - Versions diffs - 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

snowflake-ml-python 1.9.2py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

snowflake/ml/_internal/utils/service_logger.py +31 -17
snowflake/ml/experiment/callback/keras.py +63 -0
snowflake/ml/experiment/callback/lightgbm.py +59 -0
snowflake/ml/experiment/callback/xgboost.py +67 -0
snowflake/ml/experiment/utils.py +14 -0
snowflake/ml/jobs/_utils/__init__.py +0 -0
snowflake/ml/jobs/_utils/constants.py +4 -1
snowflake/ml/jobs/_utils/payload_utils.py +55 -21
snowflake/ml/jobs/_utils/query_helper.py +5 -1
snowflake/ml/jobs/_utils/runtime_env_utils.py +63 -0
snowflake/ml/jobs/_utils/scripts/get_instance_ip.py +2 -2
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +5 -5
snowflake/ml/jobs/_utils/spec_utils.py +41 -8
snowflake/ml/jobs/_utils/stage_utils.py +22 -9
snowflake/ml/jobs/_utils/types.py +5 -7
snowflake/ml/jobs/job.py +1 -1
snowflake/ml/jobs/manager.py +1 -13
snowflake/ml/model/_client/model/model_version_impl.py +219 -55
snowflake/ml/model/_client/ops/service_ops.py +230 -30
snowflake/ml/model/_client/service/model_deployment_spec.py +103 -27
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +11 -5
snowflake/ml/model/_model_composer/model_composer.py +1 -70
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +2 -43
snowflake/ml/model/event_handler.py +87 -18
snowflake/ml/model/inference_engine.py +5 -0
snowflake/ml/model/models/huggingface_pipeline.py +74 -51
snowflake/ml/model/type_hints.py +26 -1
snowflake/ml/registry/_manager/model_manager.py +37 -70
snowflake/ml/registry/_manager/model_parameter_reconciler.py +294 -0
snowflake/ml/registry/registry.py +0 -19
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/METADATA +523 -491
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/RECORD +36 -29
snowflake/ml/experiment/callback.py +0 -121
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/top_level.txt +0 -0

snowflake/ml/_internal/utils/service_logger.py CHANGED Viewed

@@ -9,6 +9,15 @@ from typing import Optional
 import platformdirs
+# Module-level logger for operational messages that should appear on console
+stdout_handler = logging.StreamHandler(sys.stdout)
+stdout_handler.setFormatter(logging.Formatter("%(message)s"))
+console_logger = logging.getLogger(__name__)
+console_logger.addHandler(stdout_handler)
+console_logger.setLevel(logging.INFO)
+console_logger.propagate = False
 class LogColor(enum.Enum):
     GREY = "\x1b[38;20m"
@@ -109,42 +118,36 @@ def _get_or_create_parent_logger(operation_id: str) -> logging.Logger:
     """Get or create a parent logger with FileHandler for the operation."""
     parent_logger_name = f"snowflake_ml_operation_{operation_id}"
     parent_logger = logging.getLogger(parent_logger_name)
+    parent_logger.setLevel(logging.DEBUG)
+    parent_logger.propagate = False
-    # Only add handler if it doesn't exist yet
     if not parent_logger.handlers:
         log_file_path = _get_log_file_path(operation_id)
         if log_file_path:
-            # Successfully found a writable location
             try:
                 file_handler = logging.FileHandler(log_file_path)
                 file_handler.setFormatter(logging.Formatter("%(name)s [%(asctime)s] [%(levelname)s] %(message)s"))
                 parent_logger.addHandler(file_handler)
-                parent_logger.setLevel(logging.DEBUG)
-                parent_logger.propagate = False  # Don't propagate to root logger
-                # Log the file location
-                parent_logger.warning(f"Operation logs saved to: {log_file_path}")
+                console_logger.info(f"create_service logs saved to: {log_file_path}")
             except OSError as e:
-                # Even though we found a path, file creation failed
-                # Fall back to console-only logging
-                parent_logger.setLevel(logging.DEBUG)
-                parent_logger.propagate = False
-                parent_logger.warning(f"Could not create log file at {log_file_path}: {e}. Using console-only logging.")
+                console_logger.warning(f"Could not create log file at {log_file_path}: {e}.")
         else:
             # No writable location found, use console-only logging
-            parent_logger.setLevel(logging.DEBUG)
-            parent_logger.propagate = False
-            parent_logger.warning("Filesystem appears to be readonly. Using console-only logging.")
+            console_logger.warning("No writable location found for create_service log file.")
+        if logging.getLogger().level > logging.INFO:
+            console_logger.info(
+                "To see logs in console, set log level to INFO: logging.getLogger().setLevel(logging.INFO)"
+            )
     return parent_logger
 def get_logger(logger_name: str, info_color: LogColor, operation_id: Optional[str] = None) -> logging.Logger:
     logger = logging.getLogger(logger_name)
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setFormatter(CustomFormatter(info_color))
-    logger.addHandler(handler)
+    root_logger = logging.getLogger()
     # If operation_id provided, set up parent logger with file handler
     if operation_id:
@@ -152,6 +155,17 @@ def get_logger(logger_name: str, info_color: LogColor, operation_id: Optional[st
         logger.parent = parent_logger
         logger.propagate = True
+        if root_logger.level <= logging.INFO:
+            handler = logging.StreamHandler(sys.stdout)
+            handler.setFormatter(CustomFormatter(info_color))
+            logger.addHandler(handler)
+    else:
+        # No operation_id - add console handler only if user wants verbose logging
+        if root_logger.level <= logging.INFO and not logger.handlers:
+            handler = logging.StreamHandler(sys.stdout)
+            handler.setFormatter(CustomFormatter(info_color))
+            logger.addHandler(handler)
     return logger

snowflake/ml/experiment/callback/keras.py ADDED Viewed

@@ -0,0 +1,63 @@
+import json
+from typing import TYPE_CHECKING, Any, Optional
+from warnings import warn
+import keras
+from snowflake.ml.experiment import utils
+if TYPE_CHECKING:
+    from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
+    from snowflake.ml.model.model_signature import ModelSignature
+class SnowflakeKerasCallback(keras.callbacks.Callback):
+    def __init__(
+        self,
+        experiment_tracking: "ExperimentTracking",
+        log_model: bool = True,
+        log_metrics: bool = True,
+        log_params: bool = True,
+        log_every_n_epochs: int = 1,
+        model_name: Optional[str] = None,
+        model_signature: Optional["ModelSignature"] = None,
+    ) -> None:
+        self._experiment_tracking = experiment_tracking
+        self.log_model = log_model
+        self.log_metrics = log_metrics
+        self.log_params = log_params
+        if log_every_n_epochs < 1:
+            raise ValueError("`log_every_n_epochs` must be positive.")
+        self.log_every_n_epochs = log_every_n_epochs
+        self.model_name = model_name
+        self.model_signature = model_signature
+    def on_train_begin(self, logs: Optional[dict[str, Any]] = None) -> None:
+        if self.log_params:
+            params = json.loads(self.model.to_json())
+            self._experiment_tracking.log_params(utils.flatten_nested_params(params))
+    def on_epoch_end(self, epoch: int, logs: Optional[dict[str, Any]] = None) -> None:
+        if self.log_metrics and logs and epoch % self.log_every_n_epochs == 0:
+            for key, value in logs.items():
+                try:
+                    value = float(value)
+                except Exception:
+                    pass
+                else:
+                    self._experiment_tracking.log_metric(key=key, value=value, step=epoch)
+    def on_train_end(self, logs: Optional[dict[str, Any]] = None) -> None:
+        if self.log_model:
+            if not self.model_signature:
+                warn(
+                    "Model will not be logged because model signature is missing. "
+                    "To autolog the model, please specify `model_signature` when constructing SnowflakeKerasCallback."
+                )
+                return
+            model_name = self.model_name or self._experiment_tracking._get_or_set_experiment().name + "_model"
+            self._experiment_tracking.log_model(  # type: ignore[call-arg]
+                model=self.model,
+                model_name=model_name,
+                signatures={"predict": self.model_signature},
+            )

snowflake/ml/experiment/callback/lightgbm.py ADDED Viewed

@@ -0,0 +1,59 @@
+from typing import TYPE_CHECKING, Optional
+from warnings import warn
+import lightgbm as lgb
+if TYPE_CHECKING:
+    from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
+    from snowflake.ml.model.model_signature import ModelSignature
+class SnowflakeLightgbmCallback(lgb.callback._RecordEvaluationCallback):
+    def __init__(
+        self,
+        experiment_tracking: "ExperimentTracking",
+        log_model: bool = True,
+        log_metrics: bool = True,
+        log_params: bool = True,
+        log_every_n_epochs: int = 1,
+        model_name: Optional[str] = None,
+        model_signature: Optional["ModelSignature"] = None,
+    ) -> None:
+        self._experiment_tracking = experiment_tracking
+        self.log_model = log_model
+        self.log_metrics = log_metrics
+        self.log_params = log_params
+        if log_every_n_epochs < 1:
+            raise ValueError("`log_every_n_epochs` must be positive.")
+        self.log_every_n_epochs = log_every_n_epochs
+        self.model_name = model_name
+        self.model_signature = model_signature
+        super().__init__(eval_result={})
+    def __call__(self, env: lgb.callback.CallbackEnv) -> None:
+        if self.log_params:
+            if env.iteration == env.begin_iteration:  # Log params only at the first iteration
+                self._experiment_tracking.log_params(env.params)
+        if self.log_metrics and env.iteration % self.log_every_n_epochs == 0:
+            super().__call__(env)
+            for dataset_name, metrics in self.eval_result.items():
+                for metric_name, log in metrics.items():
+                    metric_key = dataset_name + ":" + metric_name
+                    self._experiment_tracking.log_metric(key=metric_key, value=log[-1], step=env.iteration)
+        if self.log_model:
+            if env.iteration == env.end_iteration - 1:  # Log model only at the last iteration
+                if self.model_signature:
+                    model_name = self.model_name or self._experiment_tracking._get_or_set_experiment().name + "_model"
+                    self._experiment_tracking.log_model(  # type: ignore[call-arg]
+                        model=env.model,
+                        model_name=model_name,
+                        signatures={"predict": self.model_signature},
+                    )
+                else:
+                    warn(
+                        "Model will not be logged because model signature is missing. To autolog the model, "
+                        "please specify `model_signature` when constructing SnowflakeLightgbmCallback."
+                    )

snowflake/ml/experiment/callback/xgboost.py ADDED Viewed

@@ -0,0 +1,67 @@
+import json
+from typing import TYPE_CHECKING, Any, Optional
+from warnings import warn
+import xgboost as xgb
+from snowflake.ml.experiment import utils
+if TYPE_CHECKING:
+    from snowflake.ml.experiment.experiment_tracking import ExperimentTracking
+    from snowflake.ml.model.model_signature import ModelSignature
+class SnowflakeXgboostCallback(xgb.callback.TrainingCallback):
+    def __init__(
+        self,
+        experiment_tracking: "ExperimentTracking",
+        log_model: bool = True,
+        log_metrics: bool = True,
+        log_params: bool = True,
+        log_every_n_epochs: int = 1,
+        model_name: Optional[str] = None,
+        model_signature: Optional["ModelSignature"] = None,
+    ) -> None:
+        self._experiment_tracking = experiment_tracking
+        self.log_model = log_model
+        self.log_metrics = log_metrics
+        self.log_params = log_params
+        if log_every_n_epochs < 1:
+            raise ValueError("`log_every_n_epochs` must be positive.")
+        self.log_every_n_epochs = log_every_n_epochs
+        self.model_name = model_name
+        self.model_signature = model_signature
+    def before_training(self, model: xgb.Booster) -> xgb.Booster:
+        if self.log_params:
+            params = json.loads(model.save_config())
+            self._experiment_tracking.log_params(utils.flatten_nested_params(params))
+        return model
+    def after_iteration(self, model: Any, epoch: int, evals_log: dict[str, dict[str, Any]]) -> bool:
+        if self.log_metrics and epoch % self.log_every_n_epochs == 0:
+            for dataset_name, metrics in evals_log.items():
+                for metric_name, log in metrics.items():
+                    metric_key = dataset_name + ":" + metric_name
+                    self._experiment_tracking.log_metric(key=metric_key, value=log[-1], step=epoch)
+        return False
+    def after_training(self, model: xgb.Booster) -> xgb.Booster:
+        if self.log_model:
+            if not self.model_signature:
+                warn(
+                    "Model will not be logged because model signature is missing. "
+                    "To autolog the model, please specify `model_signature` when constructing SnowflakeXgboostCallback."
+                )
+                return model
+            model_name = self.model_name or self._experiment_tracking._get_or_set_experiment().name + "_model"
+            self._experiment_tracking.log_model(  # type: ignore[call-arg]
+                model=model,
+                model_name=model_name,
+                signatures={"predict": self.model_signature},
+            )
+        return model

snowflake/ml/experiment/utils.py ADDED Viewed

@@ -0,0 +1,14 @@
+from typing import Any, Union
+def flatten_nested_params(params: Union[list[Any], dict[str, Any]], prefix: str = "") -> dict[str, Any]:
+    flat_params = {}
+    items = params.items() if isinstance(params, dict) else enumerate(params)
+    for key, value in items:
+        key = str(key).replace(".", "_")  # Replace dots in keys to avoid collisions involving nested keys
+        new_prefix = f"{prefix}.{key}" if prefix else key
+        if isinstance(value, (dict, list)):
+            flat_params.update(flatten_nested_params(value, new_prefix))
+        else:
+            flat_params[new_prefix] = value
+    return flat_params

snowflake/ml/jobs/_utils/__init__.py ADDED Viewed

File without changes

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -28,7 +28,7 @@ OUTPUT_MOUNT_PATH = f"{STAGE_VOLUME_MOUNT_PATH}/{OUTPUT_STAGE_SUBPATH}"
 DEFAULT_IMAGE_REPO = "/snowflake/images/snowflake_images"
 DEFAULT_IMAGE_CPU = "st_plat/runtime/x86/runtime_image/snowbooks"
 DEFAULT_IMAGE_GPU = "st_plat/runtime/x86/generic_gpu/runtime_image/snowbooks"
-DEFAULT_IMAGE_TAG = "1.5.0"
+DEFAULT_IMAGE_TAG = "1.6.2"
 DEFAULT_ENTRYPOINT_PATH = "func.py"
 # Percent of container memory to allocate for /dev/shm volume
@@ -98,3 +98,6 @@ CLOUD_INSTANCE_FAMILIES = {
     SnowflakeCloudType.AWS: AWS_INSTANCE_FAMILIES,
     SnowflakeCloudType.AZURE: AZURE_INSTANCE_FAMILIES,
 }
+# runtime version environment variable
+ENABLE_IMAGE_VERSION_ENV_VAR = "MLRS_ENABLE_RUNTIME_VERSIONS"

snowflake/ml/jobs/_utils/payload_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import functools
+import importlib
 import inspect
 import io
 import itertools
@@ -7,6 +8,7 @@ import logging
 import pickle
 import sys
 import textwrap
+from importlib.abc import Traversable
 from pathlib import Path, PurePath
 from typing import Any, Callable, Optional, Union, cast, get_args, get_origin
@@ -63,6 +65,13 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
     ##### Set up Python environment #####
     export PYTHONPATH=/opt/env/site-packages/
+    MLRS_SYSTEM_REQUIREMENTS_FILE=${{MLRS_SYSTEM_REQUIREMENTS_FILE:-"${{SYSTEM_DIR}}/requirements.txt"}}
+    if [ -f "${{MLRS_SYSTEM_REQUIREMENTS_FILE}}" ]; then
+        echo "Installing packages from $MLRS_SYSTEM_REQUIREMENTS_FILE"
+        pip install -r $MLRS_SYSTEM_REQUIREMENTS_FILE
+    fi
     MLRS_REQUIREMENTS_FILE=${{MLRS_REQUIREMENTS_FILE:-"requirements.txt"}}
     if [ -f "${{MLRS_REQUIREMENTS_FILE}}" ]; then
         # TODO: Prevent collisions with MLRS packages using virtualenvs
@@ -255,11 +264,24 @@ def upload_payloads(session: snowpark.Session, stage_path: PurePath, *payload_sp
                 # Manually traverse the directory and upload each file, since Snowflake PUT
                 # can't handle directories. Reduce the number of PUT operations by using
                 # wildcard patterns to batch upload files with the same extension.
-                for path in {
-                    p.parent.joinpath(f"*{p.suffix}") if p.suffix else p
-                    for p in source_path.resolve().rglob("*")
-                    if p.is_file()
-                }:
+                upload_path_patterns = set()
+                for p in source_path.resolve().rglob("*"):
+                    if p.is_dir():
+                        continue
+                    if p.name.startswith("."):
+                        # Hidden files: use .* pattern for batch upload
+                        if p.suffix:
+                            upload_path_patterns.add(p.parent.joinpath(f".*{p.suffix}"))
+                        else:
+                            upload_path_patterns.add(p.parent.joinpath(".*"))
+                    else:
+                        # Regular files: use * pattern for batch upload
+                        if p.suffix:
+                            upload_path_patterns.add(p.parent.joinpath(f"*{p.suffix}"))
+                        else:
+                            upload_path_patterns.add(p)
+                for path in upload_path_patterns:
                     session.file.put(
                         str(path),
                         payload_stage_path.joinpath(path.parent.relative_to(source_path)).as_posix(),
@@ -275,6 +297,27 @@ def upload_payloads(session: snowpark.Session, stage_path: PurePath, *payload_sp
                 )
+def upload_system_resources(session: snowpark.Session, stage_path: PurePath) -> None:
+    resource_ref = importlib.resources.files(__package__).joinpath("scripts")
+    def upload_dir(ref: Traversable, relative_path: str = "") -> None:
+        for item in ref.iterdir():
+            current_path = Path(relative_path) / item.name if relative_path else Path(item.name)
+            if item.is_dir():
+                # Recursively process subdirectories
+                upload_dir(item, str(current_path))
+            elif item.is_file():
+                content = item.read_bytes()
+                session.file.put_stream(
+                    io.BytesIO(content),
+                    stage_path.joinpath(current_path).as_posix(),
+                    auto_compress=False,
+                    overwrite=True,
+                )
+    upload_dir(resource_ref)
 def resolve_source(
     source: Union[types.PayloadPath, Callable[..., Any]]
 ) -> Union[types.PayloadPath, Callable[..., Any]]:
@@ -454,8 +497,6 @@ class JobPayload:
                 overwrite=True,
             )
             source = Path(entrypoint.file_path.parent)
-            if not any(r.startswith("cloudpickle") for r in pip_requirements):
-                pip_requirements.append(f"cloudpickle~={version.parse(cp.__version__).major}.0")
         elif isinstance(source, stage_utils.StagePath):
             # copy payload to stage
@@ -470,19 +511,20 @@ class JobPayload:
         upload_payloads(session, app_stage_path, *additional_payload_specs)
-        # Upload requirements to app/ directory
-        # TODO: Check if payload includes both a requirements.txt file and pip_requirements
+        if not any(r.startswith("cloudpickle") for r in pip_requirements):
+            pip_requirements.append(f"cloudpickle~={version.parse(cp.__version__).major}.0")
+        #  Upload system scripts and requirements.txt generated by pip_requirements to system/ directory
+        system_stage_path = stage_path.joinpath(constants.SYSTEM_STAGE_SUBPATH)
         if pip_requirements:
             # Upload requirements.txt to stage
             session.file.put_stream(
                 io.BytesIO("\n".join(pip_requirements).encode()),
-                stage_location=app_stage_path.joinpath("requirements.txt").as_posix(),
+                stage_location=system_stage_path.joinpath("requirements.txt").as_posix(),
                 auto_compress=False,
                 overwrite=True,
             )
-        # Upload startup script to system/ directory within payload
-        system_stage_path = stage_path.joinpath(constants.SYSTEM_STAGE_SUBPATH)
         # TODO: Make sure payload does not include file with same name
         session.file.put_stream(
             io.BytesIO(_STARTUP_SCRIPT_CODE.encode()),
@@ -491,15 +533,7 @@ class JobPayload:
             overwrite=False,  # FIXME
         )
-        scripts_dir = Path(__file__).parent.joinpath("scripts")
-        for script_file in scripts_dir.glob("*"):
-            if script_file.is_file():
-                session.file.put(
-                    script_file.as_posix(),
-                    system_stage_path.as_posix(),
-                    overwrite=True,
-                    auto_compress=False,
-                )
+        upload_system_resources(session, system_stage_path)
         python_entrypoint: list[Union[str, PurePath]] = [
             PurePath(f"{constants.SYSTEM_MOUNT_PATH}/mljob_launcher.py"),
             PurePath(f"{constants.APP_MOUNT_PATH}/{entrypoint.file_path.relative_to(source).as_posix()}"),

snowflake/ml/jobs/_utils/query_helper.py CHANGED Viewed

@@ -4,6 +4,7 @@ from snowflake import snowpark
 from snowflake.snowpark import Row
 from snowflake.snowpark._internal import utils
 from snowflake.snowpark._internal.analyzer import snowflake_plan
+from snowflake.snowpark._internal.utils import is_in_stored_procedure
 def result_set_to_rows(session: snowpark.Session, result: dict[str, Any]) -> list[Row]:
@@ -14,7 +15,10 @@ def result_set_to_rows(session: snowpark.Session, result: dict[str, Any]) -> lis
 @snowflake_plan.SnowflakePlan.Decorator.wrap_exception  # type: ignore[misc]
 def run_query(session: snowpark.Session, query_text: str, params: Optional[Sequence[Any]] = None) -> list[Row]:
-    result = session._conn.run_query(query=query_text, params=params, _force_qmark_paramstyle=True)
+    kwargs: dict[str, Any] = {"query": query_text, "params": params}
+    if not is_in_stored_procedure():  # type: ignore[no-untyped-call]
+        kwargs["_force_qmark_paramstyle"] = True
+    result = session._conn.run_query(**kwargs)
     if not isinstance(result, dict) or "data" not in result:
         raise ValueError(f"Unprocessable result: {result}")
     return result_set_to_rows(session, result)

snowflake/ml/jobs/_utils/runtime_env_utils.py ADDED Viewed

@@ -0,0 +1,63 @@
+from typing import Any, Optional, Union
+from packaging.version import Version
+from pydantic import BaseModel, Field, RootModel, field_validator
+class SpcsContainerRuntime(BaseModel):
+    python_version: Version = Field(alias="pythonVersion")
+    hardware_type: str = Field(alias="hardwareType")
+    runtime_container_image: str = Field(alias="runtimeContainerImage")
+    @field_validator("python_version", mode="before")
+    @classmethod
+    def validate_python_version(cls, v: Union[str, Version]) -> Version:
+        if isinstance(v, Version):
+            return v
+        try:
+            return Version(v)
+        except Exception:
+            raise ValueError(f"Invalid Python version format: {v}")
+    class Config:
+        frozen = True
+        extra = "allow"
+        arbitrary_types_allowed = True
+class RuntimeEnvironmentEntry(BaseModel):
+    spcs_container_runtime: Optional[SpcsContainerRuntime] = Field(alias="spcsContainerRuntime", default=None)
+    class Config:
+        extra = "allow"
+        frozen = True
+class RuntimeEnvironmentsDict(RootModel[dict[str, RuntimeEnvironmentEntry]]):
+    @field_validator("root", mode="before")
+    @classmethod
+    def _filter_to_dict_entries(cls, data: Any) -> dict[str, dict[str, Any]]:
+        """
+        Pre-validation hook: keep only those items at the root level
+        whose values are dicts. Non-dict values will be dropped.
+        Args:
+            data: The input data to filter, expected to be a dictionary.
+        Returns:
+            A dictionary containing only the key-value pairs where values are dictionaries.
+        Raises:
+            ValueError: If input data is not a dictionary.
+        """
+        # If the entire root is not a dict, raise error immediately
+        if not isinstance(data, dict):
+            raise ValueError(f"Expected dictionary data, but got {type(data).__name__}: {data}")
+        # Filter out any key whose value is not a dict
+        return {key: value for key, value in data.items() if isinstance(value, dict)}
+    def get_spcs_container_runtimes(self) -> list[SpcsContainerRuntime]:
+        return [
+            entry.spcs_container_runtime for entry in self.root.values() if entry.spcs_container_runtime is not None
+        ]

snowflake/ml/jobs/_utils/scripts/get_instance_ip.py CHANGED Viewed

@@ -47,8 +47,8 @@ def get_first_instance(service_name: str) -> Optional[tuple[str, str, str]]:
     if not result:
         return None
-    # Sort by start_time first, then by instance_id
-    sorted_instances = sorted(result, key=lambda x: (x["start_time"], int(x["instance_id"])))
+    # Sort by start_time first, then by instance_id. If start_time is null/empty, it will be sorted to the end.
+    sorted_instances = sorted(result, key=lambda x: (not bool(x["start_time"]), x["start_time"], int(x["instance_id"])))
     head_instance = sorted_instances[0]
     if not head_instance["instance_id"] or not head_instance["ip_address"]:
         return None

snowflake/ml/jobs/_utils/scripts/mljob_launcher.py CHANGED Viewed

@@ -173,10 +173,10 @@ def wait_for_instances(
     start_time = time.time()
     current_interval = max(min(1, check_interval), 0.1)  # Default 1s, minimum 0.1s
-    logger.debug(
+    logger.info(
         "Waiting for instances to be ready "
-        "(min_instances={}, target_instances={}, timeout={}s, max_check_interval={}s)".format(
-            min_instances, target_instances, timeout, check_interval
+        "(min_instances={}, target_instances={}, min_wait_time={}s, timeout={}s, max_check_interval={}s)".format(
+            min_instances, target_instances, min_wait_time, timeout, check_interval
         )
     )
@@ -191,7 +191,7 @@ def wait_for_instances(
             logger.info(f"Minimum instance requirement met: {total_nodes} instances available after {elapsed:.1f}s")
             return
-        logger.debug(
+        logger.info(
             f"Waiting for instances: current_instances={total_nodes}, min_instances={min_instances}, "
             f"target_instances={target_instances}, elapsed={elapsed:.1f}s, next check in {current_interval:.1f}s"
         )
@@ -199,7 +199,7 @@ def wait_for_instances(
         current_interval = min(current_interval * 2, check_interval)  # Exponential backoff
     raise TimeoutError(
-        f"Timed out after {timeout}s waiting for {min_instances} instances, only " f"{total_nodes} available"
+        f"Timed out after {elapsed}s waiting for {min_instances} instances, only " f"{total_nodes} available"
     )

snowflake/ml/jobs/_utils/spec_utils.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import logging
 import os
+import sys
 from math import ceil
 from pathlib import PurePath
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
 from snowflake import snowpark
 from snowflake.ml._internal.utils import snowflake_env
 from snowflake.ml.jobs._utils import constants, query_helper, types
+from snowflake.ml.jobs._utils.runtime_env_utils import RuntimeEnvironmentsDict
 def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.ComputeResources:
@@ -28,22 +30,53 @@ def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.C
     )
+def _get_runtime_image(session: snowpark.Session, target_hardware: Literal["CPU", "GPU"]) -> Optional[str]:
+    rows = query_helper.run_query(session, "CALL SYSTEM$NOTEBOOKS_FIND_LABELED_RUNTIMES()")
+    if not rows:
+        return None
+    try:
+        runtime_envs = RuntimeEnvironmentsDict.model_validate_json(rows[0][0])
+        spcs_container_runtimes = runtime_envs.get_spcs_container_runtimes()
+    except Exception as e:
+        logging.warning(f"Failed to parse runtime image name from {rows[0][0]}, error: {e}")
+        return None
+    selected_runtime = next(
+        (
+            runtime
+            for runtime in spcs_container_runtimes
+            if (
+                runtime.hardware_type.lower() == target_hardware.lower()
+                and runtime.python_version.major == sys.version_info.major
+                and runtime.python_version.minor == sys.version_info.minor
+            )
+        ),
+        None,
+    )
+    return selected_runtime.runtime_container_image if selected_runtime else None
 def _get_image_spec(session: snowpark.Session, compute_pool: str) -> types.ImageSpec:
     # Retrieve compute pool node resources
     resources = _get_node_resources(session, compute_pool=compute_pool)
     # Use MLRuntime image
-    image_repo = constants.DEFAULT_IMAGE_REPO
-    image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
-    image_tag = _get_runtime_image_tag()
+    hardware = "GPU" if resources.gpu > 0 else "CPU"
+    container_image = None
+    if os.environ.get(constants.ENABLE_IMAGE_VERSION_ENV_VAR, "").lower() == "true":
+        container_image = _get_runtime_image(session, hardware)  # type: ignore[arg-type]
+    if not container_image:
+        image_repo = constants.DEFAULT_IMAGE_REPO
+        image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
+        image_tag = _get_runtime_image_tag()
+        container_image = f"{image_repo}/{image_name}:{image_tag}"
     # TODO: Should each instance consume the entire pod?
     return types.ImageSpec(
-        repo=image_repo,
-        image_name=image_name,
-        image_tag=image_tag,
         resource_requests=resources,
         resource_limits=resources,
+        container_image=container_image,
     )
@@ -220,7 +253,7 @@ def generate_service_spec(
         "containers": [
             {
                 "name": constants.DEFAULT_CONTAINER_NAME,
-                "image": image_spec.full_name,
+                "image": image_spec.container_image,
                 "command": ["/usr/local/bin/_entrypoint.sh"],
                 "args": [
                     (stage_mount.joinpath(v).as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint

snowflake-ml-python 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl

snowflake-ml-python 1.9.2py3-none-any.whl → 1.11.0py3-none-any.whl