PyPI - snowflake-ml-python - Versions diffs - 1.10.0__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

snowflake-ml-python 1.10.0py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

snowflake/ml/feature_store/feature_view.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import json
+import logging
 import re
 import warnings
 from collections import OrderedDict
@@ -31,10 +32,12 @@ from snowflake.snowpark.types import (
     _NumericType,
 )
+_DEFAULT_TARGET_LAG = "10 seconds"
 _FEATURE_VIEW_NAME_DELIMITER = "$"
 _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS = ["FS_TIMESTAMP_COL_PLACEHOLDER_VAL", "NULL"]
 _TIMESTAMP_COL_PLACEHOLDER = "NULL"
 _FEATURE_OBJ_TYPE = "FEATURE_OBJ_TYPE"
+_ONLINE_TABLE_SUFFIX = "$ONLINE"
 # Feature view version rule is aligned with dataset version rule in SQL.
 _FEATURE_VIEW_VERSION_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_.\-]*$")
 _FEATURE_VIEW_VERSION_MAX_LENGTH = 128
@@ -45,6 +48,44 @@ _RESULT_SCAN_QUERY_PATTERN = re.compile(
 )
+@dataclass(frozen=True)
+class OnlineConfig:
+    """Configuration for online feature storage."""
+    enable: bool = False
+    target_lag: Optional[str] = None
+    def __post_init__(self) -> None:
+        if self.target_lag is None:
+            return
+        if not isinstance(self.target_lag, str) or not self.target_lag.strip():
+            raise ValueError("target_lag must be a non-empty string")
+        object.__setattr__(self, "target_lag", self.target_lag.strip())
+    def to_json(self) -> str:
+        data: dict[str, Any] = asdict(self)
+        return json.dumps(data)
+    @classmethod
+    def from_json(cls, json_str: str) -> OnlineConfig:
+        data = json.loads(json_str)
+        return cls(**data)
+class StoreType(Enum):
+    """
+    Enumeration for specifying the storage type when reading from or refreshing feature views.
+    The Feature View supports two storage modes:
+    - OFFLINE: Traditional batch storage for historical feature data and training
+    - ONLINE: Low-latency storage optimized for real-time feature serving
+    """
+    ONLINE = "online"
+    OFFLINE = "offline"
 @dataclass(frozen=True)
 class _FeatureViewMetadata:
     """Represent metadata tracked on top of FV backend object"""
@@ -171,6 +212,7 @@ class FeatureView(lineage_node.LineageNode):
         initialize: str = "ON_CREATE",
         refresh_mode: str = "AUTO",
         cluster_by: Optional[list[str]] = None,
+        online_config: Optional[OnlineConfig] = None,
         **_kwargs: Any,
     ) -> None:
         """
@@ -204,6 +246,8 @@ class FeatureView(lineage_node.LineageNode):
             cluster_by: Columns to cluster the feature view by.
                 - Defaults to the join keys from entities.
                 - If `timestamp_col` is provided, it is added to the default clustering keys.
+            online_config: Optional configuration for online storage. If provided with enable=True,
+                online storage will be enabled. Defaults to None (no online storage).
             _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
         Example::
@@ -227,9 +271,26 @@ class FeatureView(lineage_node.LineageNode):
             >>> registered_fv = fs.register_feature_view(draft_fv, "v1")
             >>> print(registered_fv.status)
             FeatureViewStatus.ACTIVE
+            <BLANKLINE>
+            >>> # Example with online configuration for online feature storage
+            >>> config = OnlineConfig(enable=True, target_lag='15s')
+            >>> online_fv = FeatureView(
+            ...     name="my_online_fv",
+            ...     entities=[e1, e2],
+            ...     feature_df=feature_df,
+            ...     timestamp_col='TS',
+            ...     refresh_freq='1d',
+            ...     desc='Feature view with online storage',
+            ...     online_config=config  # optional, enables online feature storage
+            ... )
+            >>> registered_online_fv = fs.register_feature_view(online_fv, "v1")
+            >>> print(registered_online_fv.online)
+            True
         # noqa: DAR401
         """
+        if online_config is not None:
+            logging.warning("'online_config' is in private preview since 1.12.0. Do not use it in production.")
         self._name: SqlIdentifier = SqlIdentifier(name)
         self._entities: list[Entity] = entities
@@ -257,6 +318,7 @@ class FeatureView(lineage_node.LineageNode):
         self._cluster_by: list[SqlIdentifier] = (
             [SqlIdentifier(col) for col in cluster_by] if cluster_by is not None else self._get_default_cluster_by()
         )
+        self._online_config: Optional[OnlineConfig] = online_config
         # Validate kwargs
         if _kwargs:
@@ -470,6 +532,31 @@ class FeatureView(lineage_node.LineageNode):
     def feature_descs(self) -> Optional[dict[SqlIdentifier, str]]:
         return self._feature_desc
+    @property
+    def online(self) -> bool:
+        return self._online_config.enable if self._online_config else False
+    @property
+    def online_config(self) -> Optional[OnlineConfig]:
+        return self._online_config
+    def fully_qualified_online_table_name(self) -> str:
+        """Get the fully qualified name for the online feature table.
+        Returns:
+            The fully qualified name (<database_name>.<schema_name>.<online_table_name>) for the
+            online feature table in Snowflake.
+        Raises:
+            RuntimeError: if the FeatureView is not registered or not configured for online storage.
+        """
+        if self.status == FeatureViewStatus.DRAFT or self.version is None:
+            raise RuntimeError(f"FeatureView {self.name} has not been registered.")
+        if not self.online:
+            raise RuntimeError(f"FeatureView {self.name} is not configured for online storage.")
+        online_table_name = self._get_online_table_name(self.name, self.version)
+        return f"{self._database}.{self._schema}.{online_table_name}"
     def list_columns(self) -> DataFrame:
         """List all columns and their information.
@@ -756,6 +843,8 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
                 feature_desc_dict[k.identifier()] = v
             fv_dict["_feature_desc"] = feature_desc_dict
+        fv_dict["_online_config"] = self._online_config.to_json() if self._online_config is not None else None
         lineage_node_keys = [key for key in fv_dict if key.startswith("_node") or key == "_session"]
         for key in lineage_node_keys:
@@ -844,6 +933,9 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
             owner=json_dict["_owner"],
             infer_schema_df=session.sql(json_dict.get("_infer_schema_query", None)),
             session=session,
+            online_config=OnlineConfig.from_json(json_dict["_online_config"])
+            if json_dict.get("_online_config")
+            else None,
         )
     def _get_compact_repr(self) -> _CompactRepresentation:
@@ -916,6 +1008,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
         infer_schema_df: Optional[DataFrame],
         session: Session,
         cluster_by: Optional[list[str]] = None,
+        online_config: Optional[OnlineConfig] = None,
     ) -> FeatureView:
         fv = FeatureView(
             name=name,
@@ -925,6 +1018,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
             desc=desc,
             _infer_schema_df=infer_schema_df,
             cluster_by=cluster_by,
+            online_config=online_config,
         )
         fv._version = FeatureViewVersion(version) if version is not None else None
         fv._status = status
@@ -961,5 +1055,33 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
         return default_cluster_by_cols
+    @staticmethod
+    def _get_online_table_name(
+        feature_view_name: Union[SqlIdentifier, str], version: Optional[Union[FeatureViewVersion, str]] = None
+    ) -> SqlIdentifier:
+        """Get the online feature table name without qualification.
+        Args:
+            feature_view_name: Offline feature view name.
+            version: Feature view version. If not provided, feature_view_name must be a SqlIdentifier.
+        Returns:
+            The online table name SqlIdentifier
+        """
+        if version is None:
+            assert isinstance(feature_view_name, SqlIdentifier), "Single argument must be SqlIdentifier"
+            online_name = f"{feature_view_name.resolved()}{_ONLINE_TABLE_SUFFIX}"
+            return SqlIdentifier(online_name, case_sensitive=True)
+        else:
+            fv_name = (
+                feature_view_name
+                if isinstance(feature_view_name, SqlIdentifier)
+                else SqlIdentifier(feature_view_name, case_sensitive=True)
+            )
+            fv_version = version if isinstance(version, FeatureViewVersion) else FeatureViewVersion(version)
+            physical_name = FeatureView._get_physical_name(fv_name, fv_version).resolved()
+            online_name = f"{physical_name}{_ONLINE_TABLE_SUFFIX}"
+            return SqlIdentifier(online_name, case_sensitive=True)
 lineage_node.DOMAIN_LINEAGE_REGISTRY["feature_view"] = FeatureView

snowflake/ml/jobs/_utils/__init__.py ADDED Viewed

File without changes

snowflake/ml/jobs/_utils/constants.py CHANGED Viewed

@@ -3,32 +3,29 @@ from snowflake.ml.jobs._utils.types import ComputeResources
 # SPCS specification constants
 DEFAULT_CONTAINER_NAME = "main"
+MEMORY_VOLUME_NAME = "dshm"
+STAGE_VOLUME_NAME = "stage-volume"
+# Environment variables
+STAGE_MOUNT_PATH_ENV_VAR = "MLRS_STAGE_MOUNT_PATH"
 PAYLOAD_DIR_ENV_VAR = "MLRS_PAYLOAD_DIR"
 RESULT_PATH_ENV_VAR = "MLRS_RESULT_PATH"
 MIN_INSTANCES_ENV_VAR = "MLRS_MIN_INSTANCES"
 TARGET_INSTANCES_ENV_VAR = "SNOWFLAKE_JOBS_COUNT"
 RUNTIME_IMAGE_TAG_ENV_VAR = "MLRS_CONTAINER_IMAGE_TAG"
-MEMORY_VOLUME_NAME = "dshm"
-STAGE_VOLUME_NAME = "stage-volume"
-# Base mount path
-STAGE_VOLUME_MOUNT_PATH = "/mnt/job_stage"
-# Stage subdirectory paths
+# Stage mount paths
+STAGE_VOLUME_MOUNT_PATH = "/mnt/job_stage"
 APP_STAGE_SUBPATH = "app"
 SYSTEM_STAGE_SUBPATH = "system"
 OUTPUT_STAGE_SUBPATH = "output"
-# Complete mount paths (automatically generated from base + subpath)
-APP_MOUNT_PATH = f"{STAGE_VOLUME_MOUNT_PATH}/{APP_STAGE_SUBPATH}"
-SYSTEM_MOUNT_PATH = f"{STAGE_VOLUME_MOUNT_PATH}/{SYSTEM_STAGE_SUBPATH}"
-OUTPUT_MOUNT_PATH = f"{STAGE_VOLUME_MOUNT_PATH}/{OUTPUT_STAGE_SUBPATH}"
+RESULT_PATH_DEFAULT_VALUE = f"{OUTPUT_STAGE_SUBPATH}/mljob_result.pkl"
 # Default container image information
 DEFAULT_IMAGE_REPO = "/snowflake/images/snowflake_images"
 DEFAULT_IMAGE_CPU = "st_plat/runtime/x86/runtime_image/snowbooks"
 DEFAULT_IMAGE_GPU = "st_plat/runtime/x86/generic_gpu/runtime_image/snowbooks"
-DEFAULT_IMAGE_TAG = "1.5.0"
+DEFAULT_IMAGE_TAG = "1.6.2"
 DEFAULT_ENTRYPOINT_PATH = "func.py"
 # Percent of container memory to allocate for /dev/shm volume
@@ -59,8 +56,6 @@ ENABLE_HEALTH_CHECKS = "false"
 JOB_POLL_INITIAL_DELAY_SECONDS = 0.1
 JOB_POLL_MAX_DELAY_SECONDS = 30
-RESULT_PATH_DEFAULT_VALUE = f"{OUTPUT_MOUNT_PATH}/mljob_result.pkl"
 # Log start and end messages
 LOG_START_MSG = "--------------------------------\nML job started\n--------------------------------"
 LOG_END_MSG = "--------------------------------\nML job finished\n--------------------------------"

snowflake/ml/jobs/_utils/feature_flags.py ADDED Viewed

@@ -0,0 +1,16 @@
+import os
+from enum import Enum
+class FeatureFlags(Enum):
+    USE_SUBMIT_JOB_V2 = "MLRS_USE_SUBMIT_JOB_V2"
+    ENABLE_IMAGE_VERSION_ENV_VAR = "MLRS_ENABLE_RUNTIME_VERSIONS"
+    def is_enabled(self) -> bool:
+        return os.getenv(self.value, "false").lower() == "true"
+    def is_disabled(self) -> bool:
+        return not self.is_enabled()
+    def __str__(self) -> str:
+        return self.value

snowflake/ml/jobs/_utils/payload_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import functools
+import importlib
 import inspect
 import io
 import itertools
@@ -7,6 +8,7 @@ import logging
 import pickle
 import sys
 import textwrap
+from importlib.abc import Traversable
 from pathlib import Path, PurePath
 from typing import Any, Callable, Optional, Union, cast, get_args, get_origin
@@ -58,7 +60,7 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
     # Change directory to user payload directory
     if [ -n "${constants.PAYLOAD_DIR_ENV_VAR}" ]; then
-        cd ${constants.PAYLOAD_DIR_ENV_VAR}
+        cd ${constants.STAGE_MOUNT_PATH_ENV_VAR}/${constants.PAYLOAD_DIR_ENV_VAR}
     fi
     ##### Set up Python environment #####
@@ -67,7 +69,10 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
     if [ -f "${{MLRS_SYSTEM_REQUIREMENTS_FILE}}" ]; then
         echo "Installing packages from $MLRS_SYSTEM_REQUIREMENTS_FILE"
-        pip install -r $MLRS_SYSTEM_REQUIREMENTS_FILE
+        if ! pip install --no-index -r $MLRS_SYSTEM_REQUIREMENTS_FILE; then
+            echo "Offline install failed, falling back to regular pip install"
+            pip install -r $MLRS_SYSTEM_REQUIREMENTS_FILE
+        fi
     fi
     MLRS_REQUIREMENTS_FILE=${{MLRS_REQUIREMENTS_FILE:-"requirements.txt"}}
@@ -262,11 +267,24 @@ def upload_payloads(session: snowpark.Session, stage_path: PurePath, *payload_sp
                 # Manually traverse the directory and upload each file, since Snowflake PUT
                 # can't handle directories. Reduce the number of PUT operations by using
                 # wildcard patterns to batch upload files with the same extension.
-                for path in {
-                    p.parent.joinpath(f"*{p.suffix}") if p.suffix else p
-                    for p in source_path.resolve().rglob("*")
-                    if p.is_file()
-                }:
+                upload_path_patterns = set()
+                for p in source_path.resolve().rglob("*"):
+                    if p.is_dir():
+                        continue
+                    if p.name.startswith("."):
+                        # Hidden files: use .* pattern for batch upload
+                        if p.suffix:
+                            upload_path_patterns.add(p.parent.joinpath(f".*{p.suffix}"))
+                        else:
+                            upload_path_patterns.add(p.parent.joinpath(".*"))
+                    else:
+                        # Regular files: use * pattern for batch upload
+                        if p.suffix:
+                            upload_path_patterns.add(p.parent.joinpath(f"*{p.suffix}"))
+                        else:
+                            upload_path_patterns.add(p)
+                for path in upload_path_patterns:
                     session.file.put(
                         str(path),
                         payload_stage_path.joinpath(path.parent.relative_to(source_path)).as_posix(),
@@ -282,6 +300,27 @@ def upload_payloads(session: snowpark.Session, stage_path: PurePath, *payload_sp
                 )
+def upload_system_resources(session: snowpark.Session, stage_path: PurePath) -> None:
+    resource_ref = importlib.resources.files(__package__).joinpath("scripts")
+    def upload_dir(ref: Traversable, relative_path: str = "") -> None:
+        for item in ref.iterdir():
+            current_path = Path(relative_path) / item.name if relative_path else Path(item.name)
+            if item.is_dir():
+                # Recursively process subdirectories
+                upload_dir(item, str(current_path))
+            elif item.is_file():
+                content = item.read_bytes()
+                session.file.put_stream(
+                    io.BytesIO(content),
+                    stage_path.joinpath(current_path).as_posix(),
+                    auto_compress=False,
+                    overwrite=True,
+                )
+    upload_dir(resource_ref)
 def resolve_source(
     source: Union[types.PayloadPath, Callable[..., Any]]
 ) -> Union[types.PayloadPath, Callable[..., Any]]:
@@ -497,29 +536,32 @@ class JobPayload:
             overwrite=False,  # FIXME
         )
-        scripts_dir = Path(__file__).parent.joinpath("scripts")
-        for script_file in scripts_dir.glob("*"):
-            if script_file.is_file():
-                session.file.put(
-                    script_file.as_posix(),
-                    system_stage_path.as_posix(),
-                    overwrite=True,
-                    auto_compress=False,
-                )
+        upload_system_resources(session, system_stage_path)
         python_entrypoint: list[Union[str, PurePath]] = [
-            PurePath(f"{constants.SYSTEM_MOUNT_PATH}/mljob_launcher.py"),
-            PurePath(f"{constants.APP_MOUNT_PATH}/{entrypoint.file_path.relative_to(source).as_posix()}"),
+            PurePath(constants.STAGE_VOLUME_MOUNT_PATH, constants.SYSTEM_STAGE_SUBPATH, "mljob_launcher.py"),
+            PurePath(
+                constants.STAGE_VOLUME_MOUNT_PATH,
+                constants.APP_STAGE_SUBPATH,
+                entrypoint.file_path.relative_to(source).as_posix(),
+            ),
         ]
         if entrypoint.main_func:
             python_entrypoint += ["--script_main_func", entrypoint.main_func]
+        env_vars = {
+            constants.STAGE_MOUNT_PATH_ENV_VAR: constants.STAGE_VOLUME_MOUNT_PATH,
+            constants.PAYLOAD_DIR_ENV_VAR: constants.APP_STAGE_SUBPATH,
+            constants.RESULT_PATH_ENV_VAR: constants.RESULT_PATH_DEFAULT_VALUE,
+        }
         return types.UploadedPayload(
             stage_path=stage_path,
             entrypoint=[
                 "bash",
-                f"{constants.SYSTEM_MOUNT_PATH}/{_STARTUP_SCRIPT_PATH}",
+                f"{constants.STAGE_VOLUME_MOUNT_PATH}/{constants.SYSTEM_STAGE_SUBPATH}/{_STARTUP_SCRIPT_PATH}",
                 *python_entrypoint,
             ],
+            env_vars=env_vars,
         )

snowflake/ml/jobs/_utils/query_helper.py CHANGED Viewed

@@ -4,6 +4,7 @@ from snowflake import snowpark
 from snowflake.snowpark import Row
 from snowflake.snowpark._internal import utils
 from snowflake.snowpark._internal.analyzer import snowflake_plan
+from snowflake.snowpark._internal.utils import is_in_stored_procedure
 def result_set_to_rows(session: snowpark.Session, result: dict[str, Any]) -> list[Row]:
@@ -14,7 +15,10 @@ def result_set_to_rows(session: snowpark.Session, result: dict[str, Any]) -> lis
 @snowflake_plan.SnowflakePlan.Decorator.wrap_exception  # type: ignore[misc]
 def run_query(session: snowpark.Session, query_text: str, params: Optional[Sequence[Any]] = None) -> list[Row]:
-    result = session._conn.run_query(query=query_text, params=params, _force_qmark_paramstyle=True)
+    kwargs: dict[str, Any] = {"query": query_text, "params": params}
+    if not is_in_stored_procedure():  # type: ignore[no-untyped-call]
+        kwargs["_force_qmark_paramstyle"] = True
+    result = session._conn.run_query(**kwargs)
     if not isinstance(result, dict) or "data" not in result:
         raise ValueError(f"Unprocessable result: {result}")
     return result_set_to_rows(session, result)

snowflake/ml/jobs/_utils/runtime_env_utils.py ADDED Viewed

@@ -0,0 +1,63 @@
+from typing import Any, Optional, Union
+from packaging.version import Version
+from pydantic import BaseModel, Field, RootModel, field_validator
+class SpcsContainerRuntime(BaseModel):
+    python_version: Version = Field(alias="pythonVersion")
+    hardware_type: str = Field(alias="hardwareType")
+    runtime_container_image: str = Field(alias="runtimeContainerImage")
+    @field_validator("python_version", mode="before")
+    @classmethod
+    def validate_python_version(cls, v: Union[str, Version]) -> Version:
+        if isinstance(v, Version):
+            return v
+        try:
+            return Version(v)
+        except Exception:
+            raise ValueError(f"Invalid Python version format: {v}")
+    class Config:
+        frozen = True
+        extra = "allow"
+        arbitrary_types_allowed = True
+class RuntimeEnvironmentEntry(BaseModel):
+    spcs_container_runtime: Optional[SpcsContainerRuntime] = Field(alias="spcsContainerRuntime", default=None)
+    class Config:
+        extra = "allow"
+        frozen = True
+class RuntimeEnvironmentsDict(RootModel[dict[str, RuntimeEnvironmentEntry]]):
+    @field_validator("root", mode="before")
+    @classmethod
+    def _filter_to_dict_entries(cls, data: Any) -> dict[str, dict[str, Any]]:
+        """
+        Pre-validation hook: keep only those items at the root level
+        whose values are dicts. Non-dict values will be dropped.
+        Args:
+            data: The input data to filter, expected to be a dictionary.
+        Returns:
+            A dictionary containing only the key-value pairs where values are dictionaries.
+        Raises:
+            ValueError: If input data is not a dictionary.
+        """
+        # If the entire root is not a dict, raise error immediately
+        if not isinstance(data, dict):
+            raise ValueError(f"Expected dictionary data, but got {type(data).__name__}: {data}")
+        # Filter out any key whose value is not a dict
+        return {key: value for key, value in data.items() if isinstance(value, dict)}
+    def get_spcs_container_runtimes(self) -> list[SpcsContainerRuntime]:
+        return [
+            entry.spcs_container_runtime for entry in self.root.values() if entry.spcs_container_runtime is not None
+        ]

snowflake/ml/jobs/_utils/scripts/get_instance_ip.py CHANGED Viewed

@@ -41,18 +41,29 @@ def get_first_instance(service_name: str) -> Optional[tuple[str, str, str]]:
     from snowflake.runtime.utils import session_utils
     session = session_utils.get_session()
-    df = session.sql(f"show service instances in service {service_name}")
-    result = df.select('"instance_id"', '"ip_address"', '"start_time"', '"status"').collect()
+    result = session.sql(f"show service instances in service {service_name}").collect()
     if not result:
         return None
-    # Sort by start_time first, then by instance_id
-    sorted_instances = sorted(result, key=lambda x: (x["start_time"], int(x["instance_id"])))
-    head_instance = sorted_instances[0]
+    # we have already integrated with first_instance startup policy,
+    # the instance 0 is guaranteed to be the head instance
+    head_instance = next(
+        (
+            row
+            for row in result
+            if "instance_id" in row and row["instance_id"] is not None and int(row["instance_id"]) == 0
+        ),
+        None,
+    )
+    # fallback to find the first instance if the instance 0 is not found
+    if not head_instance:
+        # Sort by start_time first, then by instance_id. If start_time is null/empty, it will be sorted to the end.
+        sorted_instances = sorted(
+            result, key=lambda x: (not bool(x["start_time"]), x["start_time"], int(x["instance_id"]))
+        )
+        head_instance = sorted_instances[0]
     if not head_instance["instance_id"] or not head_instance["ip_address"]:
         return None
     # Validate head instance IP
     ip_address = head_instance["ip_address"]
     try:

snowflake/ml/jobs/_utils/scripts/mljob_launcher.py CHANGED Viewed

@@ -48,8 +48,8 @@ MIN_INSTANCES_ENV_VAR = getattr(constants, "MIN_INSTANCES_ENV_VAR", "MLRS_MIN_IN
 TARGET_INSTANCES_ENV_VAR = getattr(constants, "TARGET_INSTANCES_ENV_VAR", "SNOWFLAKE_JOBS_COUNT")
 # Fallbacks in case of SnowML version mismatch
+STAGE_MOUNT_PATH_ENV_VAR = getattr(constants, "STAGE_MOUNT_PATH_ENV_VAR", "MLRS_STAGE_MOUNT_PATH")
 RESULT_PATH_ENV_VAR = getattr(constants, "RESULT_PATH_ENV_VAR", "MLRS_RESULT_PATH")
-JOB_RESULT_PATH = os.environ.get(RESULT_PATH_ENV_VAR, "/mnt/job_stage/output/mljob_result.pkl")
 PAYLOAD_DIR_ENV_VAR = getattr(constants, "PAYLOAD_DIR_ENV_VAR", "MLRS_PAYLOAD_DIR")
 # Constants for the wait_for_instances function
@@ -57,6 +57,9 @@ MIN_WAIT_TIME = float(os.getenv("MLRS_INSTANCES_MIN_WAIT") or -1)  # seconds
 TIMEOUT = float(os.getenv("MLRS_INSTANCES_TIMEOUT") or 720)  # seconds
 CHECK_INTERVAL = float(os.getenv("MLRS_INSTANCES_CHECK_INTERVAL") or 10)  # seconds
+STAGE_MOUNT_PATH = os.environ.get(STAGE_MOUNT_PATH_ENV_VAR, "/mnt/job_stage")
+JOB_RESULT_PATH = os.environ.get(RESULT_PATH_ENV_VAR, "output/mljob_result.pkl")
 try:
     from snowflake.ml.jobs._utils.interop_utils import ExecutionResult
@@ -173,10 +176,10 @@ def wait_for_instances(
     start_time = time.time()
     current_interval = max(min(1, check_interval), 0.1)  # Default 1s, minimum 0.1s
-    logger.debug(
+    logger.info(
         "Waiting for instances to be ready "
-        "(min_instances={}, target_instances={}, timeout={}s, max_check_interval={}s)".format(
-            min_instances, target_instances, timeout, check_interval
+        "(min_instances={}, target_instances={}, min_wait_time={}s, timeout={}s, max_check_interval={}s)".format(
+            min_instances, target_instances, min_wait_time, timeout, check_interval
         )
     )
@@ -226,6 +229,8 @@ def run_script(script_path: str, *script_args: Any, main_func: Optional[str] = N
     # This is needed because mljob_launcher.py is now in /mnt/job_stage/system
     # but user scripts are in the payload directory and may import from each other
     payload_dir = os.environ.get(PAYLOAD_DIR_ENV_VAR)
+    if payload_dir and not os.path.isabs(payload_dir):
+        payload_dir = os.path.join(STAGE_MOUNT_PATH, payload_dir)
     if payload_dir and payload_dir not in sys.path:
         sys.path.insert(0, payload_dir)
@@ -276,7 +281,10 @@ def main(script_path: str, *script_args: Any, script_main_func: Optional[str] =
         Exception: Re-raises any exception caught during script execution.
     """
     # Ensure the output directory exists before trying to write result files.
-    output_dir = os.path.dirname(JOB_RESULT_PATH)
+    result_abs_path = (
+        JOB_RESULT_PATH if os.path.isabs(JOB_RESULT_PATH) else os.path.join(STAGE_MOUNT_PATH, JOB_RESULT_PATH)
+    )
+    output_dir = os.path.dirname(result_abs_path)
     os.makedirs(output_dir, exist_ok=True)
     try:
@@ -317,7 +325,7 @@ def main(script_path: str, *script_args: Any, script_main_func: Optional[str] =
         result_dict = result_obj.to_dict()
         try:
             # Serialize result using cloudpickle
-            result_pickle_path = JOB_RESULT_PATH
+            result_pickle_path = result_abs_path
             with open(result_pickle_path, "wb") as f:
                 cloudpickle.dump(result_dict, f)  # Pickle dictionary form for compatibility
         except Exception as pkl_exc:
@@ -326,7 +334,7 @@ def main(script_path: str, *script_args: Any, script_main_func: Optional[str] =
         try:
             # Serialize result to JSON as fallback path in case of cross version incompatibility
             # TODO: Manually convert non-serializable types to strings
-            result_json_path = os.path.splitext(JOB_RESULT_PATH)[0] + ".json"
+            result_json_path = os.path.splitext(result_abs_path)[0] + ".json"
             with open(result_json_path, "w") as f:
                 json.dump(result_dict, f, indent=2, cls=SimpleJSONEncoder)
         except Exception as json_exc:

snowflake-ml-python 1.10.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

snowflake-ml-python 1.10.0py3-none-any.whl → 1.12.0py3-none-any.whl