PyPI - snowflake-ml-python - Versions diffs - 1.24.0__py3-none-any.whl → 1.25.0__py3-none-any.whl - Mend

snowflake-ml-python 1.24.0py3-none-any.whl → 1.25.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

snowflake/ml/_internal/utils/mixins.py +26 -1
snowflake/ml/data/_internal/arrow_ingestor.py +5 -1
snowflake/ml/data/data_connector.py +2 -2
snowflake/ml/data/data_ingestor.py +2 -1
snowflake/ml/experiment/_experiment_info.py +3 -3
snowflake/ml/jobs/_interop/data_utils.py +8 -8
snowflake/ml/jobs/_interop/dto_schema.py +52 -7
snowflake/ml/jobs/_interop/protocols.py +124 -7
snowflake/ml/jobs/_interop/utils.py +92 -33
snowflake/ml/jobs/_utils/arg_protocol.py +7 -0
snowflake/ml/jobs/_utils/constants.py +4 -0
snowflake/ml/jobs/_utils/feature_flags.py +97 -13
snowflake/ml/jobs/_utils/payload_utils.py +6 -40
snowflake/ml/jobs/_utils/runtime_env_utils.py +12 -111
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +204 -27
snowflake/ml/jobs/decorators.py +17 -22
snowflake/ml/jobs/job.py +25 -10
snowflake/ml/jobs/job_definition.py +100 -8
snowflake/ml/model/_client/model/model_version_impl.py +25 -14
snowflake/ml/model/_client/ops/service_ops.py +6 -6
snowflake/ml/model/_client/service/model_deployment_spec.py +3 -0
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
snowflake/ml/model/models/huggingface_pipeline.py +3 -0
snowflake/ml/model/openai_signatures.py +154 -0
snowflake/ml/registry/_manager/model_parameter_reconciler.py +2 -3
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/METADATA +41 -2
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/RECORD +31 -32
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/WHEEL +1 -1
snowflake/ml/jobs/_utils/function_payload_utils.py +0 -43
snowflake/ml/jobs/_utils/spec_utils.py +0 -22
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.24.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/decorators.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import copy
-import functools
 from typing import Any, Callable, Optional, TypeVar
 from typing_extensions import ParamSpec
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
-from snowflake.ml.jobs import job as jb, manager as jm
-from snowflake.ml.jobs._utils import payload_utils
+from snowflake.ml.jobs import job_definition as jd
+from snowflake.ml.jobs._utils import arg_protocol, constants
 _PROJECT = "MLJob"
@@ -25,7 +24,7 @@ def remote(
     external_access_integrations: Optional[list[str]] = None,
     session: Optional[snowpark.Session] = None,
     **kwargs: Any,
-) -> Callable[[Callable[_Args, _ReturnValue]], Callable[_Args, jb.MLJob[_ReturnValue]]]:
+) -> Callable[[Callable[_Args, _ReturnValue]], jd.MLJobDefinition[_Args, _ReturnValue]]:
     """
     Submit a job to the compute pool.
@@ -51,29 +50,25 @@ def remote(
         Decorator that dispatches invocations of the decorated function as remote jobs.
     """
-    def decorator(func: Callable[_Args, _ReturnValue]) -> Callable[_Args, jb.MLJob[_ReturnValue]]:
+    def decorator(func: Callable[_Args, _ReturnValue]) -> jd.MLJobDefinition[_Args, _ReturnValue]:
         # Copy the function to avoid modifying the original
         # We need to modify the line number of the function to exclude the
         # decorator from the copied source code
         wrapped_func = copy.copy(func)
         wrapped_func.__code__ = wrapped_func.__code__.replace(co_firstlineno=func.__code__.co_firstlineno + 1)
-        @functools.wraps(func)
-        def wrapper(*_args: _Args.args, **_kwargs: _Args.kwargs) -> jb.MLJob[_ReturnValue]:
-            payload = payload_utils.create_function_payload(func, *_args, **_kwargs)
-            job = jm._submit_job(
-                source=payload,
-                stage_name=stage_name,
-                compute_pool=compute_pool,
-                target_instances=target_instances,
-                pip_requirements=pip_requirements,
-                external_access_integrations=external_access_integrations,
-                session=payload.session or session,
-                **kwargs,
-            )
-            assert isinstance(job, jb.MLJob), f"Unexpected job type: {type(job)}"
-            return job
-        return wrapper
+        setattr(wrapped_func, constants.IS_MLJOB_REMOTE_ATTR, True)
+        return jd.MLJobDefinition.register(
+            source=wrapped_func,
+            compute_pool=compute_pool,
+            stage_name=stage_name,
+            target_instances=target_instances,
+            pip_requirements=pip_requirements,
+            external_access_integrations=external_access_integrations,
+            session=session or snowpark.context.get_active_session(),
+            arg_protocol=arg_protocol.ArgProtocol.PICKLE,
+            generate_suffix=True,
+            **kwargs,
+        )
     return decorator

snowflake/ml/jobs/job.py CHANGED Viewed

@@ -123,26 +123,41 @@ class MLJob(Generic[T], SerializableSessionMixin):
         return self._transform_path(result_path_str)
-    def _transform_path(self, path_str: str) -> str:
+    # After introducing ML Job definitions, we have additional stage mount for result path
+    # the result path is like @payload_stage/{job_definition_name}/{job_name}/mljob_result
+    @property
+    def _result_stage_path(self) -> Optional[str]:
+        volumes = self._service_spec["spec"]["volumes"]
+        stage_volume = next((v for v in volumes if v["name"] == constants.RESULT_VOLUME_NAME), None)
+        if stage_volume is None:
+            return self._stage_path
+        elif "stageConfig" in stage_volume:
+            return cast(str, stage_volume["stageConfig"]["name"])
+        else:
+            return cast(str, stage_volume["source"])
+    def _transform_path(
+        self,
+        path_str: str,
+    ) -> str:
         """Transform a local path within the container to a stage path."""
         path = stage_utils.resolve_path(path_str)
         if isinstance(path, stage_utils.StagePath):
-            # Stage paths need no transformation
             return path.as_posix()
         if not path.is_absolute():
-            # Assume relative paths are relative to stage mount path
-            return f"{self._stage_path}/{path.as_posix()}"
+            return f"{self._result_stage_path}/{path.as_posix()}"
-        # If result path is absolute, rebase it onto the stage mount path
-        # TODO: Rather than matching by name, use the longest mount path which matches
         volume_mounts = self._container_spec["volumeMounts"]
-        stage_mount_str = next(v for v in volume_mounts if v.get("name") == constants.STAGE_VOLUME_NAME)["mountPath"]
+        stage_volume = next((v for v in volume_mounts if v["name"] == constants.RESULT_VOLUME_NAME), None)
+        if stage_volume is None:
+            stage_volume = next(v for v in volume_mounts if v["name"] == constants.STAGE_VOLUME_NAME)
+        stage_mount_str = stage_volume["mountPath"]
         stage_mount = Path(stage_mount_str)
         try:
             relative_path = path.relative_to(stage_mount)
-            return f"{self._stage_path}/{relative_path.as_posix()}"
+            return f"{self._result_stage_path}/{relative_path.as_posix()}"
         except ValueError:
-            raise ValueError(f"Result path {path} is absolute, but should be relative to stage mount {stage_mount}")
+            raise ValueError(f"Result Path {path} is absolute, but should be relative to stage mount {stage_mount}")
     @overload
     def get_logs(
@@ -279,7 +294,7 @@ class MLJob(Generic[T], SerializableSessionMixin):
         if self._result is None:
             self.wait(timeout)
             try:
-                self._result = interop_utils.load_result(
+                self._result = interop_utils.load(
                     self._result_path, session=self._session, path_transform=self._transform_path
                 )
             except Exception as e:

snowflake/ml/jobs/job_definition.py CHANGED Viewed

@@ -14,11 +14,14 @@ from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
 from snowflake.ml._internal.utils.mixins import SerializableSessionMixin
 from snowflake.ml.jobs import job as jb
+from snowflake.ml.jobs._interop import utils as interop_utils
 from snowflake.ml.jobs._utils import (
+    arg_protocol,
     constants,
     feature_flags,
     payload_utils,
     query_helper,
+    runtime_env_utils,
     types,
 )
 from snowflake.snowpark import context as sp_context
@@ -40,6 +43,8 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         compute_pool: str,
         name: str,
         entrypoint_args: list[Any],
+        arg_protocol: Optional[arg_protocol.ArgProtocol] = arg_protocol.ArgProtocol.NONE,
+        default_args: Optional[list[Any]] = None,
         database: Optional[str] = None,
         schema: Optional[str] = None,
         session: Optional[snowpark.Session] = None,
@@ -49,12 +54,22 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         self.spec_options = spec_options
         self.compute_pool = compute_pool
         self.session = session or sp_context.get_active_session()
-        self.database = database or self.session.get_current_database()
-        self.schema = schema or self.session.get_current_schema()
+        resolved_database = database or self.session.get_current_database()
+        resolved_schema = schema or self.session.get_current_schema()
+        if resolved_database is None:
+            raise ValueError("Database must be specified either in the session context or as a parameter.")
+        if resolved_schema is None:
+            raise ValueError("Schema must be specified either in the session context or as a parameter.")
+        self.database = identifier.resolve_identifier(resolved_database)
+        self.schema = identifier.resolve_identifier(resolved_schema)
         self.job_definition_id = identifier.get_schema_level_object_identifier(self.database, self.schema, name)
         self.entrypoint_args = entrypoint_args
+        self.arg_protocol = arg_protocol
+        self.default_args = default_args
     def delete(self) -> None:
+        if self.session is None:
+            raise RuntimeError("Session is required to delete job definition")
         if self.stage_name:
             try:
                 self.session.sql(f"REMOVE {self.stage_name}/").collect()
@@ -62,9 +77,27 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             except Exception as e:
                 logger.warning(f"Failed to clean up stage files for job definition {self.stage_name}: {e}")
-    def _prepare_arguments(self, *args: _Args.args, **kwargs: _Args.kwargs) -> list[Any]:
-        # TODO: Add ArgProtocol and respective logics
-        return [arg for arg in args]
+    def _prepare_arguments(self, *args: _Args.args, **kwargs: _Args.kwargs) -> Optional[list[Any]]:
+        if self.arg_protocol == arg_protocol.ArgProtocol.NONE:
+            if len(kwargs) > 0:
+                raise ValueError(f"Keyword arguments are not supported with {self.arg_protocol}")
+            return list(args)
+        elif self.arg_protocol == arg_protocol.ArgProtocol.CLI:
+            return _combine_runtime_arguments(self.default_args, *args, **kwargs)
+        elif self.arg_protocol == arg_protocol.ArgProtocol.PICKLE:
+            if not args and not kwargs:
+                return []
+            uid = uuid4().hex[:8]
+            rel_path = f"{uid}/function_args"
+            file_path = f"{self.stage_name}/{constants.APP_STAGE_SUBPATH}/{rel_path}"
+            payload = interop_utils.save_result(
+                (args, kwargs), file_path, session=self.session, max_inline_size=interop_utils._MAX_INLINE_SIZE
+            )
+            if payload is not None:
+                return [f"--function_args={payload.decode('utf-8')}"]
+            return [f"--function_args={rel_path}"]
+        else:
+            raise ValueError(f"Invalid arg_protocol: {self.arg_protocol}")
     @telemetry.send_api_usage_telemetry(project=_PROJECT)
     def __call__(self, *args: _Args.args, **kwargs: _Args.kwargs) -> jb.MLJob[_ReturnValue]:
@@ -98,6 +131,7 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             json.dumps(job_options_dict),
         ]
         query_template = "CALL SYSTEM$EXECUTE_ML_JOB(%s, %s, %s, %s)"
+        assert self.session is not None, "Session is required to generate MLJob SQL query"
         sql = self.session._conn._cursor._preprocess_pyformat_query(query_template, params)
         return sql
@@ -123,6 +157,7 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         entrypoint: Optional[Union[str, list[str]]] = None,
         target_instances: int = 1,
         generate_suffix: bool = True,
+        arg_protocol: Optional[arg_protocol.ArgProtocol] = arg_protocol.ArgProtocol.NONE,
         **kwargs: Any,
     ) -> "MLJobDefinition[_Args, _ReturnValue]":
         # Use kwargs for less common optional parameters
@@ -142,6 +177,7 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         )
         overwrite = kwargs.pop("overwrite", False)
         name = kwargs.pop("name", None)
+        default_args = kwargs.pop("default_args", None)
         # Warn if there are unknown kwargs
         if kwargs:
             logger.warning(f"Ignoring unknown kwargs: {kwargs.keys()}")
@@ -149,6 +185,11 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         # Validate parameters
         if database and not schema:
             raise ValueError("Schema must be specified if database is specified.")
+        compute_pool = identifier.resolve_identifier(compute_pool)
+        if query_warehouse is not None:
+            query_warehouse = identifier.resolve_identifier(query_warehouse)
         if target_instances < 1:
             raise ValueError("target_instances must be greater than 0.")
         if not (0 < min_instances <= target_instances):
@@ -190,10 +231,11 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
                 )
             raise
-        if runtime_environment is None and feature_flags.FeatureFlags.ENABLE_RUNTIME_VERSIONS.is_enabled(default=True):
+        if runtime_environment is None and feature_flags.FeatureFlags.ENABLE_RUNTIME_VERSIONS.is_enabled():
             # Pass a JSON object for runtime versions so it serializes as nested JSON in options
             runtime_environment = json.dumps({"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}"})
+        runtime = runtime_env_utils.get_runtime_image(session, compute_pool, runtime_environment)
         combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
         entrypoint_args = [v.as_posix() if isinstance(v, PurePath) else v for v in uploaded_payload.entrypoint]
         spec_options = types.SpecOptions(
@@ -203,8 +245,8 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             env_vars=combined_env_vars,
             enable_metrics=enable_metrics,
             spec_overrides=spec_overrides,
-            runtime=runtime_environment if runtime_environment else None,
-            enable_stage_mount_v2=feature_flags.FeatureFlags.ENABLE_STAGE_MOUNT_V2.is_enabled(default=True),
+            runtime=runtime,
+            enable_stage_mount_v2=feature_flags.FeatureFlags.ENABLE_STAGE_MOUNT_V2.is_enabled(),
         )
         job_options = types.JobOptions(
@@ -222,6 +264,8 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             compute_pool=compute_pool,
             entrypoint_args=entrypoint_args,
             session=session,
+            arg_protocol=arg_protocol,
+            default_args=default_args,
             database=database,
             schema=schema,
             name=name,
@@ -230,3 +274,51 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
 def _generate_suffix() -> str:
     return str(uuid4().hex)[:8]
+def _combine_runtime_arguments(
+    default_runtime_args: Optional[list[Any]] = None, *args: Any, **kwargs: Any
+) -> list[Any]:
+    """Merge default CLI arguments with runtime overrides into a flat argument list.
+    Parses `default_runtime_args` for flags (e.g., `--key value`) and merges them with
+    `kwargs`. Keyword arguments override defaults unless their value is None. Positional
+    arguments from both `default_args` and `*args` are preserved in order.
+    Args:
+        default_runtime_args: Optional list of default CLI arguments to parse for flags and positional args.
+        *args: Additional positional arguments to include in the output.
+        **kwargs: Keyword arguments that override default flags. Values of None are ignored.
+    Returns:
+        A list of CLI-style arguments: positional args followed by `--key value` pairs.
+    """
+    cli_args = list(args)
+    flags: dict[str, Any] = {}
+    if default_runtime_args:
+        i = 0
+        while i < len(default_runtime_args):
+            arg = default_runtime_args[i]
+            if isinstance(arg, str) and arg.startswith("--"):
+                key = arg[2:]
+                # Check if next arg is a value (not a flag)
+                if i + 1 < len(default_runtime_args):
+                    next_arg = default_runtime_args[i + 1]
+                    if not (isinstance(next_arg, str) and next_arg.startswith("--")):
+                        flags[key] = next_arg
+                        i += 2
+                        continue
+                flags[key] = None
+            else:
+                cli_args.append(arg)
+            i += 1
+    # Prioritize kwargs over default_args. Explicit None values in kwargs
+    # serve as overrides and are converted to the string "None" to match
+    # CLI flag conventions (--key=value)
+    # Downstream logic must handle the parsing of these string-based nulls.
+    for k, v in kwargs.items():
+        flags[k] = v
+    for k, v in flags.items():
+        cli_args.extend([f"--{k}", str(v)])
+    return cli_args

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -33,6 +33,12 @@ _BATCH_INFERENCE_TEMPORARY_FOLDER = "_temporary"
 VLLM_SUPPORTED_TASKS = [
     "text-generation",
     "image-text-to-text",
+    "video-text-to-text",
+    "audio-text-to-text",
+]
+VALID_OPENAI_SIGNATURES = [
+    openai_signatures.OPENAI_CHAT_SIGNATURE,
+    openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING,
 ]
@@ -1140,16 +1146,11 @@ class ModelVersion(lineage_node.LineageNode):
             func_name: core.ModelSignature.from_dict(sig_dict) for func_name, sig_dict in signatures_dict.items()
         }
-        if deserialized_signatures not in [
-            openai_signatures.OPENAI_CHAT_SIGNATURE,
-            openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING,
-        ]:
+        if deserialized_signatures not in VALID_OPENAI_SIGNATURES:
             raise ValueError(
-                "Inference engine requires the model to be logged with openai_signatures.OPENAI_CHAT_SIGNATURE or "
-                "openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING. "
+                "Inference engine requires the model to be logged with one of the following signatures: "
+                f"{VALID_OPENAI_SIGNATURES}. Please log the model again with one of these supported signatures."
                 f"Found signatures: {signatures_dict}. "
-                "Please log the model again with: signatures=openai_signatures.OPENAI_CHAT_SIGNATURE or "
-                "signatures=openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING"
             )
     @overload
@@ -1161,6 +1162,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_compute_pool: str,
         image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
@@ -1187,8 +1189,10 @@ class ModelVersion(lineage_node.LineageNode):
                 will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
-            max_instances: The maximum number of inference service instances to run. The same value it set to
-                MIN_INSTANCES property of the service.
+            min_instances: The minimum number of instances for the inference service. The service will automatically
+                scale between min_instances and max_instances based on traffic and hardware utilization. If set to
+                0 (default), the service will automatically suspend after a period of inactivity.
+            max_instances: The maximum number of instances for the inference service.
             cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
                 None, we attempt to utilize all the vCPU of the node.
             memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
@@ -1224,6 +1228,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_compute_pool: str,
         image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
@@ -1250,8 +1255,10 @@ class ModelVersion(lineage_node.LineageNode):
                 will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
-            max_instances: The maximum number of inference service instances to run. The same value it set to
-                MIN_INSTANCES property of the service.
+            min_instances: The minimum number of instances for the inference service. The service will automatically
+                scale between min_instances and max_instances based on traffic and hardware utilization. If set to
+                0 (default), the service will automatically suspend after a period of inactivity.
+            max_instances: The maximum number of instances for the inference service.
             cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
                 None, we attempt to utilize all the vCPU of the node.
             memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
@@ -1301,6 +1308,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_compute_pool: str,
         image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
@@ -1328,8 +1336,10 @@ class ModelVersion(lineage_node.LineageNode):
                 will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
-            max_instances: The maximum number of inference service instances to run. The same value it set to
-                MIN_INSTANCES property of the service.
+            min_instances: The minimum number of instances for the inference service. The service will automatically
+                scale between min_instances and max_instances based on traffic and hardware utilization. If set to
+                0 (default), the service will automatically suspend after a period of inactivity.
+            max_instances: The maximum number of instances for the inference service.
             cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
                 None, we attempt to utilize all the vCPU of the node.
             memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
@@ -1419,6 +1429,7 @@ class ModelVersion(lineage_node.LineageNode):
                     service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
                     image_repo_name=image_repo,
                     ingress_enabled=ingress_enabled,
+                    min_instances=min_instances,
                     max_instances=max_instances,
                     cpu_requests=cpu_requests,
                     memory_requests=memory_requests,

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -175,6 +175,7 @@ class ServiceOperator:
         service_compute_pool_name: sql_identifier.SqlIdentifier,
         image_repo_name: Optional[str],
         ingress_enabled: bool,
+        min_instances: int,
         max_instances: int,
         cpu_requests: Optional[str],
         memory_requests: Optional[str],
@@ -241,6 +242,7 @@ class ServiceOperator:
             service_name=service_name,
             inference_compute_pool_name=service_compute_pool_name,
             ingress_enabled=ingress_enabled,
+            min_instances=min_instances,
             max_instances=max_instances,
             cpu=cpu_requests,
             memory=memory_requests,
@@ -829,15 +831,13 @@ class ServiceOperator:
         service_seen_before = False
         while True:
-            # Check if async job has failed (but don't return on success - we need specific service status)
+            # Check if async job has completed
             if async_job.is_done():
                 try:
                     async_job.result()
-                    # Async job completed successfully, but we're waiting for a specific service status
-                    # This might mean the service completed and was cleaned up
-                    module_logger.debug(
-                        f"Async job completed but we're still waiting for {service_name} to reach {target_status.value}"
-                    )
+                    # Async job completed successfully - deployment is done
+                    module_logger.debug(f"Async job completed successfully, returning from wait for {service_name}")
+                    return
                 except Exception as e:
                     raise RuntimeError(f"Service deployment failed: {e}")

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -140,6 +140,7 @@ class ModelDeploymentSpec:
         service_database_name: Optional[sql_identifier.SqlIdentifier] = None,
         service_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
         ingress_enabled: bool = True,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu: Optional[str] = None,
         memory: Optional[str] = None,
@@ -156,6 +157,7 @@ class ModelDeploymentSpec:
             service_database_name: Database name for the service.
             service_schema_name: Schema name for the service.
             ingress_enabled: Whether ingress is enabled.
+            min_instances: Minimum number of service instances.
             max_instances: Maximum number of service instances.
             cpu: CPU requirement.
             memory: Memory requirement.
@@ -187,6 +189,7 @@ class ModelDeploymentSpec:
             name=fq_service_name,
             compute_pool=inference_compute_pool_name.identifier(),
             ingress_enabled=ingress_enabled,
+            min_instances=min_instances,
             max_instances=max_instances,
             autocapture=autocapture,
             **self._inference_spec,

snowflake/ml/model/_client/service/model_deployment_spec_schema.py CHANGED Viewed

@@ -26,6 +26,7 @@ class Service(BaseModel):
     name: str
     compute_pool: str
     ingress_enabled: bool
+    min_instances: int
     max_instances: int
     cpu: Optional[str] = None
     memory: Optional[str] = None

snowflake/ml/model/models/huggingface_pipeline.py CHANGED Viewed

@@ -105,6 +105,7 @@ class HuggingFacePipelineModel(huggingface.TransformersPipeline):
         image_repo: Optional[str] = None,
         image_build_compute_pool: Optional[str] = None,
         ingress_enabled: bool = False,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
@@ -133,6 +134,7 @@ class HuggingFacePipelineModel(huggingface.TransformersPipeline):
             image_build_compute_pool: The name of the compute pool used to build the model inference image. It uses
             the service compute pool if None.
             ingress_enabled: Whether ingress is enabled. Defaults to False.
+            min_instances: Minimum number of instances. Defaults to 0.
             max_instances: Maximum number of instances. Defaults to 1.
             cpu_requests: CPU requests configuration. Defaults to None.
             memory_requests: Memory requests configuration. Defaults to None.
@@ -225,6 +227,7 @@ class HuggingFacePipelineModel(huggingface.TransformersPipeline):
                     service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
                     image_repo_name=image_repo,
                     ingress_enabled=ingress_enabled,
+                    min_instances=min_instances,
                     max_instances=max_instances,
                     cpu_requests=cpu_requests,
                     memory_requests=memory_requests,

snowflake-ml-python 1.24.0__py3-none-any.whl → 1.25.0__py3-none-any.whl

snowflake-ml-python 1.24.0py3-none-any.whl → 1.25.0py3-none-any.whl