PyPI - snowflake-ml-python - Versions diffs - 1.10.0__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

snowflake-ml-python 1.10.0py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

snowflake/ml/jobs/_utils/spec_utils.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import logging
 import os
+import sys
 from math import ceil
 from pathlib import PurePath
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
 from snowflake import snowpark
 from snowflake.ml._internal.utils import snowflake_env
-from snowflake.ml.jobs._utils import constants, query_helper, types
+from snowflake.ml.jobs._utils import constants, feature_flags, query_helper, types
+from snowflake.ml.jobs._utils.runtime_env_utils import RuntimeEnvironmentsDict
 def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.ComputeResources:
@@ -28,22 +30,53 @@ def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.C
     )
+def _get_runtime_image(session: snowpark.Session, target_hardware: Literal["CPU", "GPU"]) -> Optional[str]:
+    rows = query_helper.run_query(session, "CALL SYSTEM$NOTEBOOKS_FIND_LABELED_RUNTIMES()")
+    if not rows:
+        return None
+    try:
+        runtime_envs = RuntimeEnvironmentsDict.model_validate_json(rows[0][0])
+        spcs_container_runtimes = runtime_envs.get_spcs_container_runtimes()
+    except Exception as e:
+        logging.warning(f"Failed to parse runtime image name from {rows[0][0]}, error: {e}")
+        return None
+    selected_runtime = next(
+        (
+            runtime
+            for runtime in spcs_container_runtimes
+            if (
+                runtime.hardware_type.lower() == target_hardware.lower()
+                and runtime.python_version.major == sys.version_info.major
+                and runtime.python_version.minor == sys.version_info.minor
+            )
+        ),
+        None,
+    )
+    return selected_runtime.runtime_container_image if selected_runtime else None
 def _get_image_spec(session: snowpark.Session, compute_pool: str) -> types.ImageSpec:
     # Retrieve compute pool node resources
     resources = _get_node_resources(session, compute_pool=compute_pool)
     # Use MLRuntime image
-    image_repo = constants.DEFAULT_IMAGE_REPO
-    image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
-    image_tag = _get_runtime_image_tag()
+    hardware = "GPU" if resources.gpu > 0 else "CPU"
+    container_image = None
+    if feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
+        container_image = _get_runtime_image(session, hardware)  # type: ignore[arg-type]
+    if not container_image:
+        image_repo = constants.DEFAULT_IMAGE_REPO
+        image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
+        image_tag = _get_runtime_image_tag()
+        container_image = f"{image_repo}/{image_name}:{image_tag}"
     # TODO: Should each instance consume the entire pod?
     return types.ImageSpec(
-        repo=image_repo,
-        image_name=image_name,
-        image_tag=image_tag,
         resource_requests=resources,
         resource_limits=resources,
+        container_image=container_image,
     )
@@ -65,6 +98,7 @@ def generate_spec_overrides(
     container_spec: dict[str, Any] = {
         "name": constants.DEFAULT_CONTAINER_NAME,
     }
     if environment_vars:
         # TODO: Validate environment variables
         container_spec["env"] = environment_vars
@@ -180,10 +214,7 @@ def generate_service_spec(
     # TODO: Add hooks for endpoints for integration with TensorBoard etc
-    env_vars = {
-        constants.PAYLOAD_DIR_ENV_VAR: constants.APP_MOUNT_PATH,
-        constants.RESULT_PATH_ENV_VAR: constants.RESULT_PATH_DEFAULT_VALUE,
-    }
+    env_vars = payload.env_vars
     endpoints: list[dict[str, Any]] = []
     if target_instances > 1:
@@ -220,7 +251,7 @@ def generate_service_spec(
         "containers": [
             {
                 "name": constants.DEFAULT_CONTAINER_NAME,
-                "image": image_spec.full_name,
+                "image": image_spec.container_image,
                 "command": ["/usr/local/bin/_entrypoint.sh"],
                 "args": [
                     (stage_mount.joinpath(v).as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint

snowflake/ml/jobs/_utils/stage_utils.py CHANGED Viewed

@@ -121,15 +121,28 @@ class StagePath:
         return self._compose_path(self._path)
     def joinpath(self, *args: Union[str, PathLike[str]]) -> "StagePath":
+        """
+        Joins the given path arguments to the current path,
+        mimicking the behavior of pathlib.Path.joinpath.
+        If the argument is a stage path (i.e., an absolute path),
+        it overrides the current path and is returned as the final path.
+        If the argument is a normal path, it is joined with the current relative path
+        using self._path.joinpath(arg).
+        Args:
+            *args: Path components to join.
+        Returns:
+            A new StagePath with the joined path.
+        Raises:
+            NotImplementedError: the argument is a stage path.
+        """
         path = self
         for arg in args:
-            path = path._make_child(arg)
+            if isinstance(arg, StagePath):
+                raise NotImplementedError
+            else:
+                # the arg might be an absolute path, so we need to remove the leading '/'
+                path = StagePath(f"{path.root}/{path._path.joinpath(arg).as_posix().lstrip('/')}")
         return path
-    def _make_child(self, path: Union[str, PathLike[str]]) -> "StagePath":
-        stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
-        if self.root == stage_path.root:
-            child_path = self._path.joinpath(stage_path._path)
-            return StagePath(self._compose_path(child_path))
-        else:
-            return stage_path

snowflake/ml/jobs/_utils/types.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import PurePath
 from typing import Iterator, Literal, Optional, Protocol, Union, runtime_checkable
@@ -30,6 +30,10 @@ class PayloadPath(Protocol):
     def parent(self) -> "PayloadPath":
         ...
+    @property
+    def root(self) -> str:
+        ...
     def exists(self) -> bool:
         ...
@@ -86,6 +90,7 @@ class UploadedPayload:
     # TODO: Include manifest of payload files for validation
     stage_path: PurePath
     entrypoint: list[Union[str, PurePath]]
+    env_vars: dict[str, str] = field(default_factory=dict)
 @dataclass(frozen=True)
@@ -98,12 +103,6 @@ class ComputeResources:
 @dataclass(frozen=True)
 class ImageSpec:
-    repo: str
-    image_name: str
-    image_tag: str
     resource_requests: ComputeResources
     resource_limits: ComputeResources
-    @property
-    def full_name(self) -> str:
-        return f"{self.repo}/{self.image_name}:{self.image_tag}"
+    container_image: str

snowflake/ml/jobs/job.py CHANGED Viewed

@@ -99,21 +99,23 @@ class MLJob(Generic[T], SerializableSessionMixin):
         result_path_str = self._container_spec["env"].get(constants.RESULT_PATH_ENV_VAR)
         if result_path_str is None:
             raise RuntimeError(f"Job {self.name} doesn't have a result path configured")
-        volume_mounts = self._container_spec["volumeMounts"]
-        stage_mount_str = next(v for v in volume_mounts if v.get("name") == constants.STAGE_VOLUME_NAME)["mountPath"]
+        # If result path is relative, it is relative to the stage mount path
         result_path = Path(result_path_str)
+        if not result_path.is_absolute():
+            return f"{self._stage_path}/{result_path.as_posix()}"
+        # If result path is absolute, it is relative to the stage mount path
+        volume_mounts = self._container_spec["volumeMounts"]
+        stage_mount_str = next(v for v in volume_mounts if v.get("name") == constants.STAGE_VOLUME_NAME)["mountPath"]
         stage_mount = Path(stage_mount_str)
         try:
             relative_path = result_path.relative_to(stage_mount)
+            return f"{self._stage_path}/{relative_path.as_posix()}"
         except ValueError:
-            if result_path.is_absolute():
-                raise ValueError(
-                    f"Result path {result_path} is absolute, but should be relative to stage mount {stage_mount}"
-                )
-            relative_path = result_path
-        return f"{self._stage_path}/{relative_path.as_posix()}"
+            raise ValueError(
+                f"Result path {result_path} is absolute, but should be relative to stage mount {stage_mount}"
+            )
     @overload
     def get_logs(
@@ -199,7 +201,7 @@ class MLJob(Generic[T], SerializableSessionMixin):
             elapsed = time.monotonic() - start_time
             if elapsed >= timeout >= 0:
                 raise TimeoutError(f"Job {self.name} did not complete within {timeout} seconds")
-            elif status == "PENDING" and not warning_shown and elapsed >= 2:  # Only show warning after 2s
+            elif status == "PENDING" and not warning_shown and elapsed >= 5:  # Only show warning after 5s
                 pool_info = _get_compute_pool_info(self._session, self._compute_pool)
                 if (pool_info.max_nodes - pool_info.active_nodes) < self.min_instances:
                     logger.warning(
@@ -419,15 +421,29 @@ def _get_head_instance_id(session: snowpark.Session, job_id: str) -> Optional[in
     if not rows:
         return None
-    if target_instances > len(rows):
-        raise RuntimeError("Couldn’t retrieve head instance due to missing instances.")
+    # we have already integrated with first_instance startup policy,
+    # the instance 0 is guaranteed to be the head instance
+    head_instance = next(
+        (
+            row
+            for row in rows
+            if "instance_id" in row and row["instance_id"] is not None and int(row["instance_id"]) == 0
+        ),
+        None,
+    )
+    # fallback to find the first instance if the instance 0 is not found
+    if not head_instance:
+        if target_instances > len(rows):
+            raise RuntimeError(
+                f"Couldn’t retrieve head instance due to missing instances. {target_instances} > {len(rows)}"
+            )
+        # Sort by start_time first, then by instance_id
+        try:
+            sorted_instances = sorted(rows, key=lambda x: (x["start_time"], int(x["instance_id"])))
+        except TypeError:
+            raise RuntimeError("Job instance information unavailable.")
+        head_instance = sorted_instances[0]
-    # Sort by start_time first, then by instance_id
-    try:
-        sorted_instances = sorted(rows, key=lambda x: (x["start_time"], int(x["instance_id"])))
-    except TypeError:
-        raise RuntimeError("Job instance information unavailable.")
-    head_instance = sorted_instances[0]
     if not head_instance["start_time"]:
         # If head instance hasn't started yet, return None
         return None

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -1,6 +1,8 @@
+import json
 import logging
 import pathlib
 import textwrap
+from pathlib import PurePath
 from typing import Any, Callable, Optional, TypeVar, Union, cast, overload
 from uuid import uuid4
@@ -11,7 +13,13 @@ from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
 from snowflake.ml.jobs import job as jb
-from snowflake.ml.jobs._utils import payload_utils, query_helper, spec_utils
+from snowflake.ml.jobs._utils import (
+    feature_flags,
+    payload_utils,
+    query_helper,
+    spec_utils,
+    types,
+)
 from snowflake.snowpark.context import get_active_session
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.functions import coalesce, col, lit, when
@@ -426,7 +434,6 @@ def _submit_job(
     Raises:
         ValueError: If database or schema value(s) are invalid
-        SnowparkSQLException: If there is an error submitting the job.
     """
     session = session or get_active_session()
@@ -446,7 +453,7 @@ def _submit_job(
     env_vars = kwargs.pop("env_vars", None)
     spec_overrides = kwargs.pop("spec_overrides", None)
     enable_metrics = kwargs.pop("enable_metrics", True)
-    query_warehouse = kwargs.pop("query_warehouse", None)
+    query_warehouse = kwargs.pop("query_warehouse", session.get_current_warehouse())
     additional_payloads = kwargs.pop("additional_payloads", None)
     if additional_payloads:
@@ -484,6 +491,27 @@ def _submit_job(
         source, entrypoint=entrypoint, pip_requirements=pip_requirements, additional_payloads=additional_payloads
     ).upload(session, stage_path)
+    if feature_flags.FeatureFlags.USE_SUBMIT_JOB_V2.is_enabled():
+        # Add default env vars (extracted from spec_utils.generate_service_spec)
+        combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
+        return _do_submit_job_v2(
+            session=session,
+            payload=uploaded_payload,
+            args=args,
+            env_vars=combined_env_vars,
+            spec_overrides=spec_overrides,
+            compute_pool=compute_pool,
+            job_id=job_id,
+            external_access_integrations=external_access_integrations,
+            query_warehouse=query_warehouse,
+            target_instances=target_instances,
+            min_instances=min_instances,
+            enable_metrics=enable_metrics,
+            use_async=True,
+        )
+    # Fall back to v1
     # Generate service spec
     spec = spec_utils.generate_service_spec(
         session,
@@ -494,6 +522,8 @@ def _submit_job(
         min_instances=min_instances,
         enable_metrics=enable_metrics,
     )
+    # Generate spec overrides
     spec_overrides = spec_utils.generate_spec_overrides(
         environment_vars=env_vars,
         custom_overrides=spec_overrides,
@@ -501,37 +531,25 @@ def _submit_job(
     if spec_overrides:
         spec = spec_utils.merge_patch(spec, spec_overrides, display_name="spec_overrides")
-    query_text, params = _generate_submission_query(
-        spec, external_access_integrations, query_warehouse, target_instances, session, compute_pool, job_id
+    return _do_submit_job_v1(
+        session, spec, external_access_integrations, query_warehouse, target_instances, compute_pool, job_id
     )
-    try:
-        _ = query_helper.run_query(session, query_text, params=params)
-    except SnowparkSQLException as e:
-        if "Invalid spec: unknown option 'resourceManagement' for 'spec'." in e.message:
-            logger.warning("Dropping 'resourceManagement' from spec because control policy is not enabled.")
-            spec["spec"].pop("resourceManagement", None)
-            query_text, params = _generate_submission_query(
-                spec, external_access_integrations, query_warehouse, target_instances, session, compute_pool, job_id
-            )
-            _ = query_helper.run_query(session, query_text, params=params)
-        else:
-            raise
-    return get_job(job_id, session=session)
-def _generate_submission_query(
+def _do_submit_job_v1(
+    session: snowpark.Session,
     spec: dict[str, Any],
     external_access_integrations: list[str],
     query_warehouse: Optional[str],
     target_instances: int,
-    session: snowpark.Session,
     compute_pool: str,
     job_id: str,
-) -> tuple[str, list[Any]]:
+) -> jb.MLJob[Any]:
     """
     Generate the SQL query for job submission.
     Args:
+        session: The Snowpark session to use.
         spec: The service spec for the job.
         external_access_integrations: The external access integrations for the job.
         query_warehouse: The query warehouse for the job.
@@ -541,7 +559,7 @@ def _generate_submission_query(
         job_id: The ID of the job.
     Returns:
-        A tuple containing the SQL query text and the parameters for the query.
+        The job object.
     """
     query_template = textwrap.dedent(
         """\
@@ -559,12 +577,77 @@ def _generate_submission_query(
     if external_access_integrations:
         external_access_integration_list = ",".join(f"{e}" for e in external_access_integrations)
         query.append(f"EXTERNAL_ACCESS_INTEGRATIONS = ({external_access_integration_list})")
-    query_warehouse = query_warehouse or session.get_current_warehouse()
     if query_warehouse:
         query.append("QUERY_WAREHOUSE = IDENTIFIER(?)")
         params.append(query_warehouse)
     if target_instances > 1:
         query.append("REPLICAS = ?")
         params.append(target_instances)
     query_text = "\n".join(line for line in query if line)
-    return query_text, params
+    _ = query_helper.run_query(session, query_text, params=params)
+    return get_job(job_id, session=session)
+def _do_submit_job_v2(
+    session: snowpark.Session,
+    payload: types.UploadedPayload,
+    args: Optional[list[str]],
+    env_vars: dict[str, str],
+    spec_overrides: dict[str, Any],
+    compute_pool: str,
+    job_id: Optional[str] = None,
+    external_access_integrations: Optional[list[str]] = None,
+    query_warehouse: Optional[str] = None,
+    target_instances: int = 1,
+    min_instances: int = 1,
+    enable_metrics: bool = True,
+    use_async: bool = True,
+) -> jb.MLJob[Any]:
+    """
+    Generate the SQL query for job submission.
+    Args:
+        session: The Snowpark session to use.
+        payload: The uploaded job payload.
+        args: Arguments to pass to the entrypoint script.
+        env_vars: Environment variables to set in the job container.
+        spec_overrides: Custom service specification overrides.
+        compute_pool: The compute pool to use for job execution.
+        job_id: The ID of the job.
+        external_access_integrations: Optional list of external access integrations.
+        query_warehouse: Optional query warehouse to use.
+        target_instances: Number of instances for multi-node job.
+        min_instances: Minimum number of instances required to start the job.
+        enable_metrics: Whether to enable platform metrics for the job.
+        use_async: Whether to run the job asynchronously.
+    Returns:
+        The job object.
+    """
+    args = [
+        (payload.stage_path.joinpath(v).as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint
+    ] + (args or [])
+    spec_options = {
+        "STAGE_PATH": payload.stage_path.as_posix(),
+        "ENTRYPOINT": ["/usr/local/bin/_entrypoint.sh"],
+        "ARGS": args,
+        "ENV_VARS": env_vars,
+        "ENABLE_METRICS": enable_metrics,
+        "SPEC_OVERRIDES": spec_overrides,
+    }
+    job_options = {
+        "EXTERNAL_ACCESS_INTEGRATIONS": external_access_integrations,
+        "QUERY_WAREHOUSE": query_warehouse,
+        "TARGET_INSTANCES": target_instances,
+        "MIN_INSTANCES": min_instances,
+        "ASYNC": use_async,
+    }
+    job_options = {k: v for k, v in job_options.items() if v is not None}
+    query_template = "CALL SYSTEM$EXECUTE_ML_JOB(?, ?, ?, ?)"
+    params = [job_id, compute_pool, json.dumps(spec_options), json.dumps(job_options)]
+    actual_job_id = query_helper.run_query(session, query_template, params=params)[0][0]
+    return get_job(actual_job_id, session=session)

snowflake/ml/model/__init__.py CHANGED Viewed

@@ -1,5 +1,10 @@
+from snowflake.ml.model._client.model.batch_inference_specs import (
+    InputSpec,
+    JobSpec,
+    OutputSpec,
+)
 from snowflake.ml.model._client.model.model_impl import Model
 from snowflake.ml.model._client.model.model_version_impl import ExportMode, ModelVersion
 from snowflake.ml.model.models.huggingface_pipeline import HuggingFacePipelineModel
-__all__ = ["Model", "ModelVersion", "ExportMode", "HuggingFacePipelineModel"]
+__all__ = ["Model", "ModelVersion", "ExportMode", "HuggingFacePipelineModel", "InputSpec", "JobSpec", "OutputSpec"]

snowflake/ml/model/_client/model/batch_inference_specs.py ADDED Viewed

@@ -0,0 +1,27 @@
+from typing import Optional, Union
+from pydantic import BaseModel
+class InputSpec(BaseModel):
+    input_stage_location: str
+    input_file_pattern: str = "*"
+class OutputSpec(BaseModel):
+    output_stage_location: str
+    output_file_prefix: Optional[str] = None
+    completion_filename: str = "_SUCCESS"
+class JobSpec(BaseModel):
+    image_repo: Optional[str] = None
+    job_name: Optional[str] = None
+    num_workers: Optional[int] = None
+    function_name: Optional[str] = None
+    gpu: Optional[Union[str, int]] = None
+    force_rebuild: bool = False
+    max_batch_rows: int = 1024
+    warehouse: Optional[str] = None
+    cpu_requests: Optional[str] = None
+    memory_requests: Optional[str] = None

snowflake-ml-python 1.10.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

snowflake-ml-python 1.10.0py3-none-any.whl → 1.12.0py3-none-any.whl