PyPI - snowflake-ml-python - Versions diffs - 1.23.0__py3-none-any.whl → 1.25.0__py3-none-any.whl - Mend

snowflake-ml-python 1.23.0py3-none-any.whl → 1.25.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

snowflake/ml/_internal/platform_capabilities.py +0 -4
snowflake/ml/_internal/utils/mixins.py +26 -1
snowflake/ml/data/_internal/arrow_ingestor.py +5 -1
snowflake/ml/data/data_connector.py +2 -2
snowflake/ml/data/data_ingestor.py +2 -1
snowflake/ml/experiment/_experiment_info.py +3 -3
snowflake/ml/feature_store/__init__.py +2 -0
snowflake/ml/feature_store/aggregation.py +367 -0
snowflake/ml/feature_store/feature.py +366 -0
snowflake/ml/feature_store/feature_store.py +234 -20
snowflake/ml/feature_store/feature_view.py +189 -4
snowflake/ml/feature_store/metadata_manager.py +425 -0
snowflake/ml/feature_store/tile_sql_generator.py +1079 -0
snowflake/ml/jobs/_interop/data_utils.py +8 -8
snowflake/ml/jobs/_interop/dto_schema.py +52 -7
snowflake/ml/jobs/_interop/protocols.py +124 -7
snowflake/ml/jobs/_interop/utils.py +92 -33
snowflake/ml/jobs/_utils/arg_protocol.py +7 -0
snowflake/ml/jobs/_utils/constants.py +4 -0
snowflake/ml/jobs/_utils/feature_flags.py +97 -13
snowflake/ml/jobs/_utils/payload_utils.py +6 -40
snowflake/ml/jobs/_utils/runtime_env_utils.py +12 -111
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +204 -27
snowflake/ml/jobs/decorators.py +17 -22
snowflake/ml/jobs/job.py +25 -10
snowflake/ml/jobs/job_definition.py +100 -8
snowflake/ml/model/__init__.py +4 -0
snowflake/ml/model/_client/model/batch_inference_specs.py +38 -2
snowflake/ml/model/_client/model/model_version_impl.py +56 -28
snowflake/ml/model/_client/ops/model_ops.py +2 -8
snowflake/ml/model/_client/ops/service_ops.py +6 -11
snowflake/ml/model/_client/service/model_deployment_spec.py +3 -0
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -0
snowflake/ml/model/_client/sql/service.py +21 -29
snowflake/ml/model/_model_composer/model_method/model_method.py +2 -1
snowflake/ml/model/_packager/model_handlers/huggingface.py +20 -0
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +70 -14
snowflake/ml/model/_signatures/utils.py +76 -1
snowflake/ml/model/models/huggingface_pipeline.py +3 -0
snowflake/ml/model/openai_signatures.py +154 -0
snowflake/ml/registry/_manager/model_parameter_reconciler.py +2 -3
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/METADATA +79 -2
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/RECORD +47 -44
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/WHEEL +1 -1
snowflake/ml/jobs/_utils/function_payload_utils.py +0 -43
snowflake/ml/jobs/_utils/spec_utils.py +0 -22
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.23.0.dist-info → snowflake_ml_python-1.25.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/job_definition.py CHANGED Viewed

@@ -14,11 +14,14 @@ from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
 from snowflake.ml._internal.utils.mixins import SerializableSessionMixin
 from snowflake.ml.jobs import job as jb
+from snowflake.ml.jobs._interop import utils as interop_utils
 from snowflake.ml.jobs._utils import (
+    arg_protocol,
     constants,
     feature_flags,
     payload_utils,
     query_helper,
+    runtime_env_utils,
     types,
 )
 from snowflake.snowpark import context as sp_context
@@ -40,6 +43,8 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         compute_pool: str,
         name: str,
         entrypoint_args: list[Any],
+        arg_protocol: Optional[arg_protocol.ArgProtocol] = arg_protocol.ArgProtocol.NONE,
+        default_args: Optional[list[Any]] = None,
         database: Optional[str] = None,
         schema: Optional[str] = None,
         session: Optional[snowpark.Session] = None,
@@ -49,12 +54,22 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         self.spec_options = spec_options
         self.compute_pool = compute_pool
         self.session = session or sp_context.get_active_session()
-        self.database = database or self.session.get_current_database()
-        self.schema = schema or self.session.get_current_schema()
+        resolved_database = database or self.session.get_current_database()
+        resolved_schema = schema or self.session.get_current_schema()
+        if resolved_database is None:
+            raise ValueError("Database must be specified either in the session context or as a parameter.")
+        if resolved_schema is None:
+            raise ValueError("Schema must be specified either in the session context or as a parameter.")
+        self.database = identifier.resolve_identifier(resolved_database)
+        self.schema = identifier.resolve_identifier(resolved_schema)
         self.job_definition_id = identifier.get_schema_level_object_identifier(self.database, self.schema, name)
         self.entrypoint_args = entrypoint_args
+        self.arg_protocol = arg_protocol
+        self.default_args = default_args
     def delete(self) -> None:
+        if self.session is None:
+            raise RuntimeError("Session is required to delete job definition")
         if self.stage_name:
             try:
                 self.session.sql(f"REMOVE {self.stage_name}/").collect()
@@ -62,9 +77,27 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             except Exception as e:
                 logger.warning(f"Failed to clean up stage files for job definition {self.stage_name}: {e}")
-    def _prepare_arguments(self, *args: _Args.args, **kwargs: _Args.kwargs) -> list[Any]:
-        # TODO: Add ArgProtocol and respective logics
-        return [arg for arg in args]
+    def _prepare_arguments(self, *args: _Args.args, **kwargs: _Args.kwargs) -> Optional[list[Any]]:
+        if self.arg_protocol == arg_protocol.ArgProtocol.NONE:
+            if len(kwargs) > 0:
+                raise ValueError(f"Keyword arguments are not supported with {self.arg_protocol}")
+            return list(args)
+        elif self.arg_protocol == arg_protocol.ArgProtocol.CLI:
+            return _combine_runtime_arguments(self.default_args, *args, **kwargs)
+        elif self.arg_protocol == arg_protocol.ArgProtocol.PICKLE:
+            if not args and not kwargs:
+                return []
+            uid = uuid4().hex[:8]
+            rel_path = f"{uid}/function_args"
+            file_path = f"{self.stage_name}/{constants.APP_STAGE_SUBPATH}/{rel_path}"
+            payload = interop_utils.save_result(
+                (args, kwargs), file_path, session=self.session, max_inline_size=interop_utils._MAX_INLINE_SIZE
+            )
+            if payload is not None:
+                return [f"--function_args={payload.decode('utf-8')}"]
+            return [f"--function_args={rel_path}"]
+        else:
+            raise ValueError(f"Invalid arg_protocol: {self.arg_protocol}")
     @telemetry.send_api_usage_telemetry(project=_PROJECT)
     def __call__(self, *args: _Args.args, **kwargs: _Args.kwargs) -> jb.MLJob[_ReturnValue]:
@@ -98,6 +131,7 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             json.dumps(job_options_dict),
         ]
         query_template = "CALL SYSTEM$EXECUTE_ML_JOB(%s, %s, %s, %s)"
+        assert self.session is not None, "Session is required to generate MLJob SQL query"
         sql = self.session._conn._cursor._preprocess_pyformat_query(query_template, params)
         return sql
@@ -123,6 +157,7 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         entrypoint: Optional[Union[str, list[str]]] = None,
         target_instances: int = 1,
         generate_suffix: bool = True,
+        arg_protocol: Optional[arg_protocol.ArgProtocol] = arg_protocol.ArgProtocol.NONE,
         **kwargs: Any,
     ) -> "MLJobDefinition[_Args, _ReturnValue]":
         # Use kwargs for less common optional parameters
@@ -142,6 +177,7 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         )
         overwrite = kwargs.pop("overwrite", False)
         name = kwargs.pop("name", None)
+        default_args = kwargs.pop("default_args", None)
         # Warn if there are unknown kwargs
         if kwargs:
             logger.warning(f"Ignoring unknown kwargs: {kwargs.keys()}")
@@ -149,6 +185,11 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
         # Validate parameters
         if database and not schema:
             raise ValueError("Schema must be specified if database is specified.")
+        compute_pool = identifier.resolve_identifier(compute_pool)
+        if query_warehouse is not None:
+            query_warehouse = identifier.resolve_identifier(query_warehouse)
         if target_instances < 1:
             raise ValueError("target_instances must be greater than 0.")
         if not (0 < min_instances <= target_instances):
@@ -190,10 +231,11 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
                 )
             raise
-        if runtime_environment is None and feature_flags.FeatureFlags.ENABLE_RUNTIME_VERSIONS.is_enabled(default=True):
+        if runtime_environment is None and feature_flags.FeatureFlags.ENABLE_RUNTIME_VERSIONS.is_enabled():
             # Pass a JSON object for runtime versions so it serializes as nested JSON in options
             runtime_environment = json.dumps({"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}"})
+        runtime = runtime_env_utils.get_runtime_image(session, compute_pool, runtime_environment)
         combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
         entrypoint_args = [v.as_posix() if isinstance(v, PurePath) else v for v in uploaded_payload.entrypoint]
         spec_options = types.SpecOptions(
@@ -203,8 +245,8 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             env_vars=combined_env_vars,
             enable_metrics=enable_metrics,
             spec_overrides=spec_overrides,
-            runtime=runtime_environment if runtime_environment else None,
-            enable_stage_mount_v2=feature_flags.FeatureFlags.ENABLE_STAGE_MOUNT_V2.is_enabled(default=True),
+            runtime=runtime,
+            enable_stage_mount_v2=feature_flags.FeatureFlags.ENABLE_STAGE_MOUNT_V2.is_enabled(),
         )
         job_options = types.JobOptions(
@@ -222,6 +264,8 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
             compute_pool=compute_pool,
             entrypoint_args=entrypoint_args,
             session=session,
+            arg_protocol=arg_protocol,
+            default_args=default_args,
             database=database,
             schema=schema,
             name=name,
@@ -230,3 +274,51 @@ class MLJobDefinition(Generic[_Args, _ReturnValue], SerializableSessionMixin):
 def _generate_suffix() -> str:
     return str(uuid4().hex)[:8]
+def _combine_runtime_arguments(
+    default_runtime_args: Optional[list[Any]] = None, *args: Any, **kwargs: Any
+) -> list[Any]:
+    """Merge default CLI arguments with runtime overrides into a flat argument list.
+    Parses `default_runtime_args` for flags (e.g., `--key value`) and merges them with
+    `kwargs`. Keyword arguments override defaults unless their value is None. Positional
+    arguments from both `default_args` and `*args` are preserved in order.
+    Args:
+        default_runtime_args: Optional list of default CLI arguments to parse for flags and positional args.
+        *args: Additional positional arguments to include in the output.
+        **kwargs: Keyword arguments that override default flags. Values of None are ignored.
+    Returns:
+        A list of CLI-style arguments: positional args followed by `--key value` pairs.
+    """
+    cli_args = list(args)
+    flags: dict[str, Any] = {}
+    if default_runtime_args:
+        i = 0
+        while i < len(default_runtime_args):
+            arg = default_runtime_args[i]
+            if isinstance(arg, str) and arg.startswith("--"):
+                key = arg[2:]
+                # Check if next arg is a value (not a flag)
+                if i + 1 < len(default_runtime_args):
+                    next_arg = default_runtime_args[i + 1]
+                    if not (isinstance(next_arg, str) and next_arg.startswith("--")):
+                        flags[key] = next_arg
+                        i += 2
+                        continue
+                flags[key] = None
+            else:
+                cli_args.append(arg)
+            i += 1
+    # Prioritize kwargs over default_args. Explicit None values in kwargs
+    # serve as overrides and are converted to the string "None" to match
+    # CLI flag conventions (--key=value)
+    # Downstream logic must handle the parsing of these string-based nulls.
+    for k, v in kwargs.items():
+        flags[k] = v
+    for k, v in flags.items():
+        cli_args.extend([f"--{k}", str(v)])
+    return cli_args

snowflake/ml/model/__init__.py CHANGED Viewed

@@ -4,6 +4,8 @@ import warnings
 from snowflake.ml.model._client.model.batch_inference_specs import (
     ColumnHandlingOptions,
     FileEncoding,
+    InputFormat,
+    InputSpec,
     JobSpec,
     OutputSpec,
     SaveMode,
@@ -20,6 +22,8 @@ __all__ = [
     "ModelVersion",
     "ExportMode",
     "HuggingFacePipelineModel",
+    "InputSpec",
+    "InputFormat",
     "JobSpec",
     "OutputSpec",
     "SaveMode",

snowflake/ml/model/_client/model/batch_inference_specs.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Optional
+from typing import Any, Optional
 from pydantic import BaseModel
 from typing_extensions import TypedDict
@@ -19,6 +19,12 @@ class SaveMode(str, Enum):
     ERROR = "error"
+class InputFormat(str, Enum):
+    """The format of the input column data."""
+    FULL_STAGE_PATH = "full_stage_path"
 class FileEncoding(str, Enum):
     """The encoding of the file content that will be passed to the custom model."""
@@ -30,7 +36,37 @@ class FileEncoding(str, Enum):
 class ColumnHandlingOptions(TypedDict):
     """Options for handling specific columns during run_batch for file I/O."""
-    encoding: FileEncoding
+    input_format: InputFormat
+    convert_to: FileEncoding
+class InputSpec(BaseModel):
+    """Specification for batch inference input options.
+    Defines optional configuration for processing input data during batch inference.
+    Attributes:
+        params (Optional[dict[str, Any]]): Optional dictionary of model inference parameters
+            (e.g., temperature, top_k for LLMs). These are passed as keyword arguments to the
+            model's inference method. Defaults to None.
+        column_handling (Optional[dict[str, ColumnHandlingOptions]]): Optional dictionary
+            specifying how to handle specific columns during file I/O. Maps column names to their
+            input format and file encoding configuration.
+    Example:
+        >>> input_spec = InputSpec(
+        ...     params={"temperature": 0.7, "top_k": 50},
+        ...     column_handling={
+        ...         "image_col": {
+        ...             "input_format": InputFormat.FULL_STAGE_PATH,
+        ...             "convert_to": FileEncoding.BASE64
+        ...         }
+        ...     }
+        ... )
+    """
+    params: Optional[dict[str, Any]] = None
+    column_handling: Optional[dict[str, ColumnHandlingOptions]] = None
 class OutputSpec(BaseModel):

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -33,6 +33,12 @@ _BATCH_INFERENCE_TEMPORARY_FOLDER = "_temporary"
 VLLM_SUPPORTED_TASKS = [
     "text-generation",
     "image-text-to-text",
+    "video-text-to-text",
+    "audio-text-to-text",
+]
+VALID_OPENAI_SIGNATURES = [
+    openai_signatures.OPENAI_CHAT_SIGNATURE,
+    openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING,
 ]
@@ -661,13 +667,12 @@ class ModelVersion(lineage_node.LineageNode):
     @snowpark._internal.utils.private_preview(version="1.18.0")
     def run_batch(
         self,
+        X: dataframe.DataFrame,
         *,
         compute_pool: str,
-        input_spec: dataframe.DataFrame,
+        input_spec: Optional[batch_inference_specs.InputSpec] = None,
         output_spec: batch_inference_specs.OutputSpec,
         job_spec: Optional[batch_inference_specs.JobSpec] = None,
-        params: Optional[dict[str, Any]] = None,
-        column_handling: Optional[dict[str, batch_inference_specs.ColumnHandlingOptions]] = None,
         inference_engine_options: Optional[dict[str, Any]] = None,
     ) -> job.MLJob[Any]:
         """Execute batch inference on datasets as an SPCS job.
@@ -675,19 +680,16 @@ class ModelVersion(lineage_node.LineageNode):
         Args:
             compute_pool (str): Name of the compute pool to use for building the image containers and batch
                 inference execution.
-            input_spec (dataframe.DataFrame): Snowpark DataFrame containing the input data for inference.
+            X (dataframe.DataFrame): Snowpark DataFrame containing the input data for inference.
                 The DataFrame should contain all required features for model prediction and passthrough columns.
             output_spec (batch_inference_specs.OutputSpec): Configuration for where and how to save
                 the inference results. Specifies the stage location and file handling behavior.
+            input_spec (Optional[batch_inference_specs.InputSpec]): Optional configuration for input
+                processing including model inference parameters and column handling options.
+                If None, default values will be used for params and column_handling.
             job_spec (Optional[batch_inference_specs.JobSpec]): Optional configuration for job
                 execution parameters such as compute resources, worker counts, and job naming.
                 If None, default values will be used.
-            params (Optional[dict[str, Any]]): Optional dictionary of model inference parameters
-                (e.g., temperature, top_k for LLMs). These are passed as keyword arguments to the
-                model's inference method. Defaults to None.
-            column_handling (Optional[dict[str, batch_inference_specs.FileEncoding]]): Optional dictionary
-                specifying how to handle specific columns during file I/O. Maps column names to their
-                file encoding configuration.
             inference_engine_options: Options for the service creation with custom inference engine.
                 Supports `engine` and `engine_args_override`.
                 `engine` is the type of the inference engine to use.
@@ -699,7 +701,7 @@ class ModelVersion(lineage_node.LineageNode):
         Raises:
             ValueError: If warehouse is not set in job_spec and no current warehouse is available.
-            RuntimeError: If the input_spec cannot be processed or written to the staging location.
+            RuntimeError: If the input data cannot be processed or written to the staging location.
         Example:
             >>> # Prepare input data - Example 1: From a table
@@ -732,10 +734,24 @@ class ModelVersion(lineage_node.LineageNode):
             >>> # Run batch inference
             >>> job = model_version.run_batch(
             ...     compute_pool="my_compute_pool",
-            ...     input_spec=input_df,
+            ...     X=input_df,
             ...     output_spec=output_spec,
             ...     job_spec=job_spec
             ... )
+            >>>
+            >>> # Run batch inference with InputSpec for additional options
+            >>> from snowflake.ml.model._client.model.batch_inference_specs import InputSpec, FileEncoding
+            >>> input_spec = InputSpec(
+            ...     params={"temperature": 0.7, "top_k": 50},
+            ...     column_handling={"image_col": {"encoding": FileEncoding.BASE64}}
+            ... )
+            >>> job = model_version.run_batch(
+            ...     compute_pool="my_compute_pool",
+            ...     X=input_df,
+            ...     output_spec=output_spec,
+            ...     input_spec=input_spec,
+            ...     job_spec=job_spec
+            ... )
         Note:
             This method is currently in private preview and requires Snowflake version 1.18.0 or later.
@@ -747,6 +763,13 @@ class ModelVersion(lineage_node.LineageNode):
             subproject=_TELEMETRY_SUBPROJECT,
         )
+        # Extract params and column_handling from input_spec if provided
+        if input_spec is None:
+            input_spec = batch_inference_specs.InputSpec()
+        params = input_spec.params
+        column_handling = input_spec.column_handling
         if job_spec is None:
             job_spec = batch_inference_specs.JobSpec()
@@ -772,10 +795,10 @@ class ModelVersion(lineage_node.LineageNode):
         self._service_ops._enforce_save_mode(output_spec.mode, output_stage_location)
         try:
-            input_spec.write.copy_into_location(location=input_stage_location, file_format_type="parquet", header=True)
+            X.write.copy_into_location(location=input_stage_location, file_format_type="parquet", header=True)
         # todo: be specific about the type of errors to provide better error messages.
         except Exception as e:
-            raise RuntimeError(f"Failed to process input_spec: {e}")
+            raise RuntimeError(f"Failed to process input data: {e}")
         if job_spec.job_name is None:
             # Same as the MLJob ID generation logic with a different prefix
@@ -1123,16 +1146,11 @@ class ModelVersion(lineage_node.LineageNode):
             func_name: core.ModelSignature.from_dict(sig_dict) for func_name, sig_dict in signatures_dict.items()
         }
-        if deserialized_signatures not in [
-            openai_signatures.OPENAI_CHAT_SIGNATURE,
-            openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING,
-        ]:
+        if deserialized_signatures not in VALID_OPENAI_SIGNATURES:
             raise ValueError(
-                "Inference engine requires the model to be logged with openai_signatures.OPENAI_CHAT_SIGNATURE or "
-                "openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING. "
+                "Inference engine requires the model to be logged with one of the following signatures: "
+                f"{VALID_OPENAI_SIGNATURES}. Please log the model again with one of these supported signatures."
                 f"Found signatures: {signatures_dict}. "
-                "Please log the model again with: signatures=openai_signatures.OPENAI_CHAT_SIGNATURE or "
-                "signatures=openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING"
             )
     @overload
@@ -1144,6 +1162,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_compute_pool: str,
         image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
@@ -1170,8 +1189,10 @@ class ModelVersion(lineage_node.LineageNode):
                 will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
-            max_instances: The maximum number of inference service instances to run. The same value it set to
-                MIN_INSTANCES property of the service.
+            min_instances: The minimum number of instances for the inference service. The service will automatically
+                scale between min_instances and max_instances based on traffic and hardware utilization. If set to
+                0 (default), the service will automatically suspend after a period of inactivity.
+            max_instances: The maximum number of instances for the inference service.
             cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
                 None, we attempt to utilize all the vCPU of the node.
             memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
@@ -1207,6 +1228,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_compute_pool: str,
         image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
@@ -1233,8 +1255,10 @@ class ModelVersion(lineage_node.LineageNode):
                 will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
-            max_instances: The maximum number of inference service instances to run. The same value it set to
-                MIN_INSTANCES property of the service.
+            min_instances: The minimum number of instances for the inference service. The service will automatically
+                scale between min_instances and max_instances based on traffic and hardware utilization. If set to
+                0 (default), the service will automatically suspend after a period of inactivity.
+            max_instances: The maximum number of instances for the inference service.
             cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
                 None, we attempt to utilize all the vCPU of the node.
             memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
@@ -1284,6 +1308,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_compute_pool: str,
         image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
         memory_requests: Optional[str] = None,
@@ -1311,8 +1336,10 @@ class ModelVersion(lineage_node.LineageNode):
                 will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
-            max_instances: The maximum number of inference service instances to run. The same value it set to
-                MIN_INSTANCES property of the service.
+            min_instances: The minimum number of instances for the inference service. The service will automatically
+                scale between min_instances and max_instances based on traffic and hardware utilization. If set to
+                0 (default), the service will automatically suspend after a period of inactivity.
+            max_instances: The maximum number of instances for the inference service.
             cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
                 None, we attempt to utilize all the vCPU of the node.
             memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
@@ -1402,6 +1429,7 @@ class ModelVersion(lineage_node.LineageNode):
                     service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
                     image_repo_name=image_repo,
                     ingress_enabled=ingress_enabled,
+                    min_instances=min_instances,
                     max_instances=max_instances,
                     cpu_requests=cpu_requests,
                     memory_requests=memory_requests,

snowflake/ml/model/_client/ops/model_ops.py CHANGED Viewed

@@ -10,7 +10,6 @@ from typing import Any, Literal, Optional, TypedDict, Union, cast, overload
 import yaml
 from typing_extensions import NotRequired
-from snowflake.ml._internal import platform_capabilities
 from snowflake.ml._internal.exceptions import error_codes, exceptions
 from snowflake.ml._internal.utils import formatting, identifier, sql_identifier, url
 from snowflake.ml.model import model_signature, type_hints
@@ -698,9 +697,6 @@ class ModelOperator:
         result: list[ServiceInfo] = []
         is_privatelink_connection = self._is_privatelink_connection()
-        is_autocapture_param_enabled = (
-            platform_capabilities.PlatformCapabilities.get_instance().is_inference_autocapture_enabled()
-        )
         for fully_qualified_service_name in fully_qualified_service_names:
             port: Optional[int] = None
@@ -742,10 +738,8 @@ class ModelOperator:
                 inference_endpoint=inference_endpoint,
                 internal_endpoint=f"http://{internal_dns}:{port}" if port is not None else None,
             )
-            if is_autocapture_param_enabled and self._service_client.DESC_SERVICE_SPEC_COL_NAME in service_description:
-                # Include column only if parameter is enabled and spec exists for service owner caller
-                autocapture_enabled = self._service_client.get_proxy_container_autocapture(service_description)
-                service_info["autocapture_enabled"] = autocapture_enabled
+            autocapture_enabled = self._service_client.is_autocapture_enabled(service_description)
+            service_info["autocapture_enabled"] = autocapture_enabled
             result.append(service_info)

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -155,7 +155,6 @@ class ServiceOperator:
             self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec(
                 workspace_path=pathlib.Path(self._workspace.name)
             )
-        self._inference_autocapture_enabled = pc.PlatformCapabilities.get_instance().is_inference_autocapture_enabled()
     def __eq__(self, __value: object) -> bool:
         if not isinstance(__value, ServiceOperator):
@@ -176,6 +175,7 @@ class ServiceOperator:
         service_compute_pool_name: sql_identifier.SqlIdentifier,
         image_repo_name: Optional[str],
         ingress_enabled: bool,
+        min_instances: int,
         max_instances: int,
         cpu_requests: Optional[str],
         memory_requests: Optional[str],
@@ -216,10 +216,6 @@ class ServiceOperator:
         progress_status.update("preparing deployment artifacts...")
         progress_status.increment()
-        # If autocapture param is disabled, don't allow create service with autocapture
-        if not self._inference_autocapture_enabled and autocapture:
-            raise ValueError("Invalid Argument: Autocapture feature is not supported.")
         if self._workspace:
             stage_path = self._create_temp_stage(database_name, schema_name, statement_params)
         else:
@@ -246,6 +242,7 @@ class ServiceOperator:
             service_name=service_name,
             inference_compute_pool_name=service_compute_pool_name,
             ingress_enabled=ingress_enabled,
+            min_instances=min_instances,
             max_instances=max_instances,
             cpu=cpu_requests,
             memory=memory_requests,
@@ -834,15 +831,13 @@ class ServiceOperator:
         service_seen_before = False
         while True:
-            # Check if async job has failed (but don't return on success - we need specific service status)
+            # Check if async job has completed
             if async_job.is_done():
                 try:
                     async_job.result()
-                    # Async job completed successfully, but we're waiting for a specific service status
-                    # This might mean the service completed and was cleaned up
-                    module_logger.debug(
-                        f"Async job completed but we're still waiting for {service_name} to reach {target_status.value}"
-                    )
+                    # Async job completed successfully - deployment is done
+                    module_logger.debug(f"Async job completed successfully, returning from wait for {service_name}")
+                    return
                 except Exception as e:
                     raise RuntimeError(f"Service deployment failed: {e}")

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -140,6 +140,7 @@ class ModelDeploymentSpec:
         service_database_name: Optional[sql_identifier.SqlIdentifier] = None,
         service_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
         ingress_enabled: bool = True,
+        min_instances: int = 0,
         max_instances: int = 1,
         cpu: Optional[str] = None,
         memory: Optional[str] = None,
@@ -156,6 +157,7 @@ class ModelDeploymentSpec:
             service_database_name: Database name for the service.
             service_schema_name: Schema name for the service.
             ingress_enabled: Whether ingress is enabled.
+            min_instances: Minimum number of service instances.
             max_instances: Maximum number of service instances.
             cpu: CPU requirement.
             memory: Memory requirement.
@@ -187,6 +189,7 @@ class ModelDeploymentSpec:
             name=fq_service_name,
             compute_pool=inference_compute_pool_name.identifier(),
             ingress_enabled=ingress_enabled,
+            min_instances=min_instances,
             max_instances=max_instances,
             autocapture=autocapture,
             **self._inference_spec,

snowflake/ml/model/_client/service/model_deployment_spec_schema.py CHANGED Viewed

@@ -26,6 +26,7 @@ class Service(BaseModel):
     name: str
     compute_pool: str
     ingress_enabled: bool
+    min_instances: int
     max_instances: int
     cpu: Optional[str] = None
     memory: Optional[str] = None

snowflake-ml-python 1.23.0__py3-none-any.whl → 1.25.0__py3-none-any.whl

snowflake-ml-python 1.23.0py3-none-any.whl → 1.25.0py3-none-any.whl