PyPI - snowflake-ml-python - Versions diffs - 1.8.5__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

snowflake-ml-python 1.8.5py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

snowflake/ml/_internal/telemetry.py +6 -9
snowflake/ml/_internal/utils/connection_params.py +196 -0
snowflake/ml/_internal/utils/identifier.py +1 -1
snowflake/ml/_internal/utils/mixins.py +61 -0
snowflake/ml/jobs/__init__.py +2 -0
snowflake/ml/jobs/_utils/constants.py +3 -2
snowflake/ml/jobs/_utils/function_payload_utils.py +43 -0
snowflake/ml/jobs/_utils/interop_utils.py +63 -4
snowflake/ml/jobs/_utils/payload_utils.py +89 -40
snowflake/ml/jobs/_utils/query_helper.py +9 -0
snowflake/ml/jobs/_utils/scripts/constants.py +19 -3
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +8 -26
snowflake/ml/jobs/_utils/spec_utils.py +29 -5
snowflake/ml/jobs/_utils/stage_utils.py +119 -0
snowflake/ml/jobs/_utils/types.py +5 -1
snowflake/ml/jobs/decorators.py +20 -28
snowflake/ml/jobs/job.py +197 -61
snowflake/ml/jobs/manager.py +253 -121
snowflake/ml/model/_client/model/model_impl.py +58 -0
snowflake/ml/model/_client/model/model_version_impl.py +90 -0
snowflake/ml/model/_client/ops/model_ops.py +18 -6
snowflake/ml/model/_client/ops/service_ops.py +23 -6
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -0
snowflake/ml/model/_client/sql/service.py +68 -20
snowflake/ml/model/_client/sql/stage.py +5 -2
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -10
snowflake/ml/model/_packager/model_env/model_env.py +35 -27
snowflake/ml/model/_packager/model_handlers/pytorch.py +5 -1
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +103 -73
snowflake/ml/model/_packager/model_meta/model_meta.py +3 -1
snowflake/ml/model/_signatures/core.py +24 -0
snowflake/ml/model/_signatures/snowpark_handler.py +55 -3
snowflake/ml/model/target_platform.py +11 -0
snowflake/ml/model/task.py +9 -0
snowflake/ml/model/type_hints.py +5 -13
snowflake/ml/modeling/metrics/metrics_utils.py +2 -0
snowflake/ml/monitoring/explain_visualize.py +2 -2
snowflake/ml/monitoring/model_monitor.py +0 -4
snowflake/ml/registry/_manager/model_manager.py +30 -15
snowflake/ml/registry/registry.py +144 -47
snowflake/ml/utils/connection_params.py +1 -1
snowflake/ml/utils/html_utils.py +263 -0
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/METADATA +64 -19
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/RECORD +48 -41
snowflake/ml/monitoring/model_monitor_version.py +0 -1
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.8.5.dist-info → snowflake_ml_python-1.9.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/_utils/payload_utils.py CHANGED Viewed

@@ -12,10 +12,17 @@ import cloudpickle as cp
 from packaging import version
 from snowflake import snowpark
-from snowflake.ml.jobs._utils import constants, types
-from snowflake.snowpark import exceptions as sp_exceptions
+from snowflake.connector import errors
+from snowflake.ml.jobs._utils import (
+    constants,
+    function_payload_utils,
+    stage_utils,
+    types,
+)
 from snowflake.snowpark._internal import code_generation
+cp.register_pickle_by_value(function_payload_utils)
 _SUPPORTED_ARG_TYPES = {str, int, float}
 _SUPPORTED_ENTRYPOINT_EXTENSIONS = {".py"}
 _ENTRYPOINT_FUNC_NAME = "func"
@@ -217,20 +224,23 @@ _STARTUP_SCRIPT_CODE = textwrap.dedent(
 ).strip()
-def resolve_source(source: Union[Path, Callable[..., Any]]) -> Union[Path, Callable[..., Any]]:
+def resolve_source(
+    source: Union[Path, stage_utils.StagePath, Callable[..., Any]]
+) -> Union[Path, stage_utils.StagePath, Callable[..., Any]]:
     if callable(source):
         return source
-    elif isinstance(source, Path):
-        # Validate source
-        source = source
+    elif isinstance(source, (Path, stage_utils.StagePath)):
         if not source.exists():
             raise FileNotFoundError(f"{source} does not exist")
         return source.absolute()
     else:
-        raise ValueError("Unsupported source type. Source must be a file, directory, or callable.")
+        raise ValueError("Unsupported source type. Source must be a stage, file, directory, or callable.")
-def resolve_entrypoint(source: Union[Path, Callable[..., Any]], entrypoint: Optional[Path]) -> types.PayloadEntrypoint:
+def resolve_entrypoint(
+    source: Union[Path, stage_utils.StagePath, Callable[..., Any]],
+    entrypoint: Optional[Union[stage_utils.StagePath, Path]],
+) -> types.PayloadEntrypoint:
     if callable(source):
         # Entrypoint is generated for callable payloads
         return types.PayloadEntrypoint(
@@ -245,11 +255,11 @@ def resolve_entrypoint(source: Union[Path, Callable[..., Any]], entrypoint: Opti
             # Infer entrypoint from source
             entrypoint = parent
         else:
-            raise ValueError("entrypoint must be provided when source is a directory")
+            raise ValueError("Entrypoint must be provided when source is a directory")
     elif entrypoint.is_absolute():
         # Absolute path - validate it's a subpath of source dir
         if not entrypoint.is_relative_to(parent):
-            raise ValueError(f"Entrypoint must be a subpath of {parent}, got: {entrypoint})")
+            raise ValueError(f"Entrypoint must be a subpath of {parent}, got: {entrypoint}")
     else:
         # Relative path
         if (abs_entrypoint := entrypoint.absolute()).is_relative_to(parent) and abs_entrypoint.is_file():
@@ -265,6 +275,7 @@ def resolve_entrypoint(source: Union[Path, Callable[..., Any]], entrypoint: Opti
             "Entrypoint not found. Ensure the entrypoint is a valid file and is under"
             f" the source directory (source={parent}, entrypoint={entrypoint})"
         )
     if entrypoint.suffix not in _SUPPORTED_ENTRYPOINT_EXTENSIONS:
         raise ValueError(
             "Unsupported entrypoint type:"
@@ -285,8 +296,9 @@ class JobPayload:
         *,
         pip_requirements: Optional[list[str]] = None,
     ) -> None:
-        self.source = Path(source) if isinstance(source, str) else source
-        self.entrypoint = Path(entrypoint) if isinstance(entrypoint, str) else entrypoint
+        # for stage path like snow://domain....., Path(path) will remove duplicate /, it will become snow:/ domain...
+        self.source = stage_utils.identify_stage_path(source) if isinstance(source, str) else source
+        self.entrypoint = stage_utils.identify_stage_path(entrypoint) if isinstance(entrypoint, str) else entrypoint
         self.pip_requirements = pip_requirements
     def upload(self, session: snowpark.Session, stage_path: Union[str, PurePath]) -> types.UploadedPayload:
@@ -300,17 +312,18 @@ class JobPayload:
         stage_name = stage_path.parts[0].lstrip("@")
         # Explicitly check if stage exists first since we may not have CREATE STAGE privilege
         try:
-            session.sql("describe stage identifier(?)", params=[stage_name]).collect()
-        except sp_exceptions.SnowparkSQLException:
-            session.sql(
+            session._conn.run_query("describe stage identifier(?)", params=[stage_name], _force_qmark_paramstyle=True)
+        except errors.ProgrammingError:
+            session._conn.run_query(
                 "create stage if not exists identifier(?)"
                 " encryption = ( type = 'SNOWFLAKE_SSE' )"
                 " comment = 'Created by snowflake.ml.jobs Python API'",
                 params=[stage_name],
-            ).collect()
+                _force_qmark_paramstyle=True,
+            )
         # Upload payload to stage
-        if not isinstance(source, Path):
+        if not isinstance(source, (Path, stage_utils.StagePath)):
             source_code = generate_python_code(source, source_code_display=True)
             _ = session.file.put_stream(
                 io.BytesIO(source_code.encode()),
@@ -321,27 +334,38 @@ class JobPayload:
             source = Path(entrypoint.file_path.parent)
             if not any(r.startswith("cloudpickle") for r in pip_requirements):
                 pip_requirements.append(f"cloudpickle~={version.parse(cp.__version__).major}.0")
-        elif source.is_dir():
-            # Manually traverse the directory and upload each file, since Snowflake PUT
-            # can't handle directories. Reduce the number of PUT operations by using
-            # wildcard patterns to batch upload files with the same extension.
-            for path in {
-                p.parent.joinpath(f"*{p.suffix}") if p.suffix else p for p in source.resolve().rglob("*") if p.is_file()
-            }:
+        elif isinstance(source, stage_utils.StagePath):
+            # copy payload to stage
+            if source == entrypoint.file_path:
+                source = source.parent
+            source_path = source.as_posix() + "/"
+            session.sql(f"copy files into {stage_path}/ from {source_path}").collect()
+        elif isinstance(source, Path):
+            if source.is_dir():
+                # Manually traverse the directory and upload each file, since Snowflake PUT
+                # can't handle directories. Reduce the number of PUT operations by using
+                # wildcard patterns to batch upload files with the same extension.
+                for path in {
+                    p.parent.joinpath(f"*{p.suffix}") if p.suffix else p
+                    for p in source.resolve().rglob("*")
+                    if p.is_file()
+                }:
+                    session.file.put(
+                        str(path),
+                        stage_path.joinpath(path.parent.relative_to(source)).as_posix(),
+                        overwrite=True,
+                        auto_compress=False,
+                    )
+            else:
                 session.file.put(
-                    str(path),
-                    stage_path.joinpath(path.parent.relative_to(source)).as_posix(),
+                    str(source.resolve()),
+                    stage_path.as_posix(),
                     overwrite=True,
                     auto_compress=False,
                 )
-        else:
-            session.file.put(
-                str(source.resolve()),
-                stage_path.as_posix(),
-                overwrite=True,
-                auto_compress=False,
-            )
-            source = source.parent
+                source = source.parent
         # Upload requirements
         # TODO: Check if payload includes both a requirements.txt file and pip_requirements
@@ -502,9 +526,15 @@ def _generate_param_handler_code(signature: inspect.Signature, output_name: str
     return param_code
-def generate_python_code(func: Callable[..., Any], source_code_display: bool = False) -> str:
+def generate_python_code(payload: Callable[..., Any], source_code_display: bool = False) -> str:
     """Generate an entrypoint script from a Python function."""
-    signature = inspect.signature(func)
+    if isinstance(payload, function_payload_utils.FunctionPayload):
+        function = payload.function
+    else:
+        function = payload
+    signature = inspect.signature(function)
     if any(
         p.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
         for p in signature.parameters.values()
@@ -513,21 +543,20 @@ def generate_python_code(func: Callable[..., Any], source_code_display: bool = F
     # Mirrored from Snowpark generate_python_code() function
     # https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/_internal/udf_utils.py
-    source_code_comment = _generate_source_code_comment(func) if source_code_display else ""
+    source_code_comment = _generate_source_code_comment(function) if source_code_display else ""
     arg_dict_name = "kwargs"
-    if getattr(func, constants.IS_MLJOB_REMOTE_ATTR, None):
+    if isinstance(payload, function_payload_utils.FunctionPayload):
         param_code = f"{arg_dict_name} = {{}}"
     else:
         param_code = _generate_param_handler_code(signature, arg_dict_name)
     return f"""
 import sys
 import pickle
 try:
     {textwrap.indent(source_code_comment, '    ')}
-    {_ENTRYPOINT_FUNC_NAME} = pickle.loads(bytes.fromhex('{_serialize_callable(func).hex()}'))
+    {_ENTRYPOINT_FUNC_NAME} = pickle.loads(bytes.fromhex('{_serialize_callable(payload).hex()}'))
 except (TypeError, pickle.PickleError):
     if sys.version_info.major != {sys.version_info.major} or sys.version_info.minor != {sys.version_info.minor}:
         raise RuntimeError(
@@ -551,3 +580,23 @@ if __name__ == '__main__':
     __return__ = {_ENTRYPOINT_FUNC_NAME}(**{arg_dict_name})
 """
+def create_function_payload(
+    func: Callable[..., Any], *args: Any, **kwargs: Any
+) -> function_payload_utils.FunctionPayload:
+    signature = inspect.signature(func)
+    bound = signature.bind(*args, **kwargs)
+    bound.apply_defaults()
+    session_argument = ""
+    session = None
+    for name, val in list(bound.arguments.items()):
+        if isinstance(val, snowpark.Session):
+            if session:
+                raise TypeError(f"Expected only one Session-type argument, but got both {session_argument} and {name}.")
+            session = val
+            session_argument = name
+            del bound.arguments[name]
+    payload = function_payload_utils.FunctionPayload(func, session, session_argument, *bound.args, **bound.kwargs)
+    return payload

snowflake/ml/jobs/_utils/query_helper.py ADDED Viewed

@@ -0,0 +1,9 @@
+from snowflake import snowpark
+def get_attribute_map(session: snowpark.Session, requested_attributes: dict[str, int]) -> dict[str, int]:
+    metadata = session._conn._cursor.description
+    for index in range(len(metadata)):
+        if metadata[index].name in requested_attributes.keys():
+            requested_attributes[metadata[index].name] = index
+    return requested_attributes

snowflake/ml/jobs/_utils/scripts/constants.py CHANGED Viewed

@@ -1,10 +1,26 @@
+from snowflake.ml.jobs._utils import constants as mljob_constants
 # Constants defining the shutdown signal actor configuration.
 SHUTDOWN_ACTOR_NAME = "ShutdownSignal"
 SHUTDOWN_ACTOR_NAMESPACE = "default"
 SHUTDOWN_RPC_TIMEOUT_SECONDS = 5.0
+# The followings are Inherited from snowflake.ml.jobs._utils.constants
+# We need to copy them here since snowml package on the server side does
+# not have the latest version of the code
 # Log start and end messages
-# Inherited from snowflake.ml.jobs._utils.constants
-LOG_START_MSG = "--------------------------------\nML job started\n--------------------------------"
-LOG_END_MSG = "--------------------------------\nML job finished\n--------------------------------"
+LOG_START_MSG = getattr(
+    mljob_constants,
+    "LOG_START_MSG",
+    "--------------------------------\nML job started\n--------------------------------",
+)
+LOG_END_MSG = getattr(
+    mljob_constants,
+    "LOG_END_MSG",
+    "--------------------------------\nML job finished\n--------------------------------",
+)
+# min_instances environment variable name
+MIN_INSTANCES_ENV_VAR = getattr(mljob_constants, "MIN_INSTANCES_ENV_VAR", "MLRS_MIN_INSTANCES")

snowflake/ml/jobs/_utils/scripts/mljob_launcher.py CHANGED Viewed

@@ -13,7 +13,7 @@ from pathlib import Path
 from typing import Any, Optional
 import cloudpickle
-from constants import LOG_END_MSG, LOG_START_MSG
+from constants import LOG_END_MSG, LOG_START_MSG, MIN_INSTANCES_ENV_VAR
 from snowflake.ml.jobs._utils import constants
 from snowflake.ml.utils.connection_params import SnowflakeLoginOptions
@@ -72,28 +72,6 @@ class SimpleJSONEncoder(json.JSONEncoder):
             return f"Unserializable object: {repr(obj)}"
-def get_active_node_count() -> int:
-    """
-    Count the number of active nodes in the Ray cluster.
-    Returns:
-        int: Total count of active nodes
-    """
-    import ray
-    if not ray.is_initialized():
-        ray.init(address="auto", ignore_reinit_error=True, log_to_driver=False)
-    try:
-        nodes = [node for node in ray.nodes() if node.get("Alive")]
-        total_active = len(nodes)
-        logger.info(f"Active nodes: {total_active}")
-        return total_active
-    except Exception as e:
-        logger.warning(f"Error getting active node count: {e}")
-        return 0
 def wait_for_min_instances(min_instances: int) -> None:
     """
     Wait until the specified minimum number of instances are available in the Ray cluster.
@@ -108,13 +86,16 @@ def wait_for_min_instances(min_instances: int) -> None:
         logger.debug("Minimum instances is 1 or less, no need to wait for additional instances")
         return
+    # mljob_launcher runs inside the CR where mlruntime libraries are available, so we can import common_util directly
+    from common_utils import common_util as mlrs_util
     start_time = time.time()
     timeout = os.getenv("JOB_MIN_INSTANCES_TIMEOUT", TIMEOUT)
     check_interval = os.getenv("JOB_MIN_INSTANCES_CHECK_INTERVAL", CHECK_INTERVAL)
     logger.debug(f"Waiting for at least {min_instances} instances to be ready (timeout: {timeout}s)")
     while time.time() - start_time < timeout:
-        total_nodes = get_active_node_count()
+        total_nodes = mlrs_util.get_num_ray_nodes()
         if total_nodes >= min_instances:
             elapsed = time.time() - start_time
@@ -128,7 +109,8 @@ def wait_for_min_instances(min_instances: int) -> None:
         time.sleep(check_interval)
     raise TimeoutError(
-        f"Timed out after {timeout}s waiting for {min_instances} instances, only {get_active_node_count()} available"
+        f"Timed out after {timeout}s waiting for {min_instances} instances, only "
+        f"{mlrs_util.get_num_ray_nodes()} available"
     )
@@ -199,7 +181,7 @@ def main(script_path: str, *script_args: Any, script_main_func: Optional[str] =
     """
     try:
         # Wait for minimum required instances if specified
-        min_instances_str = os.environ.get("JOB_MIN_INSTANCES", 1)
+        min_instances_str = os.environ.get(MIN_INSTANCES_ENV_VAR) or "1"
         if min_instances_str and int(min_instances_str) > 1:
             wait_for_min_instances(int(min_instances_str))

snowflake/ml/jobs/_utils/spec_utils.py CHANGED Viewed

@@ -1,20 +1,23 @@
 import logging
+import os
 from math import ceil
 from pathlib import PurePath
 from typing import Any, Optional, Union
 from snowflake import snowpark
 from snowflake.ml._internal.utils import snowflake_env
-from snowflake.ml.jobs._utils import constants, types
+from snowflake.ml.jobs._utils import constants, query_helper, types
 def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.ComputeResources:
     """Extract resource information for the specified compute pool"""
     # Get the instance family
-    rows = session.sql("show compute pools like ?", params=[compute_pool]).collect()
-    if not rows:
+    rows = session._conn.run_query("show compute pools like ?", params=[compute_pool], _force_qmark_paramstyle=True)
+    if not rows or not isinstance(rows, dict) or not rows.get("data"):
         raise ValueError(f"Compute pool '{compute_pool}' not found")
-    instance_family: str = rows[0]["instance_family"]
+    requested_attributes = query_helper.get_attribute_map(session, {"instance_family": 4})
+    compute_pool_info = rows["data"]
+    instance_family: str = compute_pool_info[0][requested_attributes["instance_family"]]
     cloud = snowflake_env.get_current_cloud(session, default=snowflake_env.SnowflakeCloudType.AWS)
     return (
@@ -30,7 +33,7 @@ def _get_image_spec(session: snowpark.Session, compute_pool: str) -> types.Image
     # Use MLRuntime image
     image_repo = constants.DEFAULT_IMAGE_REPO
     image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
-    image_tag = constants.DEFAULT_IMAGE_TAG
+    image_tag = _get_runtime_image_tag()
     # TODO: Should each instance consume the entire pod?
     return types.ImageSpec(
@@ -346,3 +349,24 @@ def _merge_lists_of_dicts(
         result[key] = d
     return list(result.values())
+def _get_runtime_image_tag() -> str:
+    """
+    Detect runtime image tag from container environment.
+    Checks in order:
+    1. Environment variable MLRS_CONTAINER_IMAGE_TAG
+    2. Falls back to hardcoded default
+    Returns:
+        str: The runtime image tag to use for job containers
+    """
+    env_tag = os.environ.get(constants.RUNTIME_IMAGE_TAG_ENV_VAR)
+    if env_tag:
+        logging.debug(f"Using runtime image tag from environment: {env_tag}")
+        return env_tag
+    # Fall back to default
+    logging.debug(f"Using default runtime image tag: {constants.DEFAULT_IMAGE_TAG}")
+    return constants.DEFAULT_IMAGE_TAG

snowflake/ml/jobs/_utils/stage_utils.py ADDED Viewed

@@ -0,0 +1,119 @@
+import os
+import re
+from os import PathLike
+from pathlib import Path, PurePath
+from typing import Union
+from snowflake.ml._internal.utils import identifier
+PROTOCOL_NAME = "snow"
+_SNOWURL_PATH_RE = re.compile(
+    rf"^(?:(?:{PROTOCOL_NAME}://)?"
+    r"(?<!@)(?P<domain>\w+)/"
+    rf"(?P<name>(?:{identifier._SF_IDENTIFIER}\.){{,2}}{identifier._SF_IDENTIFIER})/)?"
+    r"(?P<path>versions(?:/(?P<version>[^/]+)(?:/(?P<relpath>.*))?)?)$"
+)
+_STAGEF_PATH_RE = re.compile(r"^@(?P<stage>~|%?\w+)(?:/(?P<relpath>[\w\-./]*))?$")
+class StagePath:
+    def __init__(self, path: str) -> None:
+        stage_match = _SNOWURL_PATH_RE.fullmatch(path) or _STAGEF_PATH_RE.fullmatch(path)
+        if not stage_match:
+            raise ValueError(f"{path} is not a valid stage path")
+        path = path.strip()
+        self._raw_path = path
+        relpath = stage_match.group("relpath")
+        start, _ = stage_match.span("relpath")
+        self._root = self._raw_path[0:start].rstrip("/") if relpath else self._raw_path.rstrip("/")
+        self._path = Path(relpath or "")
+    @property
+    def parent(self) -> "StagePath":
+        if self._path.parent == Path(""):
+            return StagePath(self._root)
+        else:
+            return StagePath(f"{self._root}/{self._path.parent}")
+    @property
+    def root(self) -> str:
+        return self._root
+    @property
+    def suffix(self) -> str:
+        return self._path.suffix
+    def _compose_path(self, path: Path) -> str:
+        # in pathlib, Path("") = "."
+        if path == Path(""):
+            return self.root
+        else:
+            return f"{self.root}/{path}"
+    def is_relative_to(self, path: Union[str, PathLike[str], "StagePath"]) -> bool:
+        stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
+        if stage_path.root == self.root:
+            return self._path.is_relative_to(stage_path._path)
+        else:
+            return False
+    def relative_to(self, path: Union[str, PathLike[str], "StagePath"]) -> PurePath:
+        stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
+        if self.root == stage_path.root:
+            return self._path.relative_to(stage_path._path)
+        raise ValueError(f"{self._raw_path} does not start with {stage_path._raw_path}")
+    def absolute(self) -> "StagePath":
+        return self
+    def as_posix(self) -> str:
+        return self._compose_path(self._path)
+    # TODO Add actual implementation https://snowflakecomputing.atlassian.net/browse/SNOW-2112795
+    def exists(self) -> bool:
+        return True
+    # TODO Add actual implementation https://snowflakecomputing.atlassian.net/browse/SNOW-2112795
+    def is_file(self) -> bool:
+        return True
+    # TODO Add actual implementation https://snowflakecomputing.atlassian.net/browse/SNOW-2112795
+    def is_dir(self) -> bool:
+        return True
+    def is_absolute(self) -> bool:
+        return True
+    def __str__(self) -> str:
+        return self.as_posix()
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, StagePath):
+            raise NotImplementedError
+        return bool(self.root == other.root and self._path == other._path)
+    def __fspath__(self) -> str:
+        return self._compose_path(self._path)
+    def joinpath(self, *args: Union[str, PathLike[str], "StagePath"]) -> "StagePath":
+        path = self
+        for arg in args:
+            path = path._make_child(arg)
+        return path
+    def _make_child(self, path: Union[str, PathLike[str], "StagePath"]) -> "StagePath":
+        stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
+        if self.root == stage_path.root:
+            child_path = self._path.joinpath(stage_path._path)
+            return StagePath(self._compose_path(child_path))
+        else:
+            return stage_path
+def identify_stage_path(path: str) -> Union[StagePath, Path]:
+    try:
+        stage_path = StagePath(path)
+    except ValueError:
+        return Path(path)
+    return stage_path

snowflake/ml/jobs/_utils/types.py CHANGED Viewed

@@ -2,18 +2,22 @@ from dataclasses import dataclass
 from pathlib import PurePath
 from typing import Literal, Optional, Union
+from snowflake.ml.jobs._utils import stage_utils
 JOB_STATUS = Literal[
     "PENDING",
     "RUNNING",
     "FAILED",
     "DONE",
+    "CANCELLING",
+    "CANCELLED",
     "INTERNAL_ERROR",
 ]
 @dataclass(frozen=True)
 class PayloadEntrypoint:
-    file_path: PurePath
+    file_path: Union[PurePath, stage_utils.StagePath]
     main_func: Optional[str]

snowflake/ml/jobs/decorators.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import copy
 import functools
-from typing import Callable, Optional, TypeVar
+from typing import Any, Callable, Optional, TypeVar
 from typing_extensions import ParamSpec
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml.jobs import job as jb, manager as jm
-from snowflake.ml.jobs._utils import constants
+from snowflake.ml.jobs._utils import payload_utils
 _PROJECT = "MLJob"
@@ -20,16 +20,11 @@ def remote(
     compute_pool: str,
     *,
     stage_name: str,
+    target_instances: int = 1,
     pip_requirements: Optional[list[str]] = None,
     external_access_integrations: Optional[list[str]] = None,
-    query_warehouse: Optional[str] = None,
-    env_vars: Optional[dict[str, str]] = None,
-    target_instances: int = 1,
-    min_instances: int = 1,
-    enable_metrics: bool = False,
-    database: Optional[str] = None,
-    schema: Optional[str] = None,
     session: Optional[snowpark.Session] = None,
+    **kwargs: Any,
 ) -> Callable[[Callable[_Args, _ReturnValue]], Callable[_Args, jb.MLJob[_ReturnValue]]]:
     """
     Submit a job to the compute pool.
@@ -37,17 +32,20 @@ def remote(
     Args:
         compute_pool: The compute pool to use for the job.
         stage_name: The name of the stage where the job payload will be uploaded.
+        target_instances: The number of nodes in the job. If none specified, create a single node job.
         pip_requirements: A list of pip requirements for the job.
         external_access_integrations: A list of external access integrations.
-        query_warehouse: The query warehouse to use. Defaults to session warehouse.
-        env_vars: Environment variables to set in container
-        target_instances: The number of nodes in the job. If none specified, create a single node job.
-        min_instances: The minimum number of nodes required to start the job. If none specified, defaults to 1.
-            If set, the job will not start until the minimum number of nodes is available.
-        enable_metrics: Whether to enable metrics publishing for the job.
-        database: The database to use for the job.
-        schema: The schema to use for the job.
         session: The Snowpark session to use. If none specified, uses active session.
+        kwargs: Additional keyword arguments. Supported arguments:
+            database (str): The database to use for the job.
+            schema (str): The schema to use for the job.
+            min_instances (int): The minimum number of nodes required to start the job.
+                If none specified, defaults to target_instances. If set, the job
+                will not start until the minimum number of nodes is available.
+            env_vars (dict): Environment variables to set in container.
+            enable_metrics (bool): Whether to enable metrics publishing for the job.
+            query_warehouse (str): The query warehouse to use. Defaults to session warehouse.
+            spec_overrides (dict): A dictionary of overrides for the service spec.
     Returns:
         Decorator that dispatches invocations of the decorated function as remote jobs.
@@ -61,23 +59,17 @@ def remote(
         wrapped_func.__code__ = wrapped_func.__code__.replace(co_firstlineno=func.__code__.co_firstlineno + 1)
         @functools.wraps(func)
-        def wrapper(*args: _Args.args, **kwargs: _Args.kwargs) -> jb.MLJob[_ReturnValue]:
-            payload = functools.partial(func, *args, **kwargs)
-            setattr(payload, constants.IS_MLJOB_REMOTE_ATTR, True)
+        def wrapper(*_args: _Args.args, **_kwargs: _Args.kwargs) -> jb.MLJob[_ReturnValue]:
+            payload = payload_utils.create_function_payload(func, *_args, **_kwargs)
             job = jm._submit_job(
                 source=payload,
                 stage_name=stage_name,
                 compute_pool=compute_pool,
+                target_instances=target_instances,
                 pip_requirements=pip_requirements,
                 external_access_integrations=external_access_integrations,
-                query_warehouse=query_warehouse,
-                env_vars=env_vars,
-                target_instances=target_instances,
-                min_instances=min_instances,
-                enable_metrics=enable_metrics,
-                database=database,
-                schema=schema,
-                session=session,
+                session=payload.session or session,
+                **kwargs,
             )
             assert isinstance(job, jb.MLJob), f"Unexpected job type: {type(job)}"
             return job

snowflake-ml-python 1.8.5__py3-none-any.whl → 1.9.0__py3-none-any.whl

snowflake-ml-python 1.8.5py3-none-any.whl → 1.9.0py3-none-any.whl