PyPI - snowflake-ml-python - Versions diffs - 1.15.0__py3-none-any.whl → 1.16.0__py3-none-any.whl - Mend

snowflake-ml-python 1.15.0py3-none-any.whl → 1.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (178) hide show

snowflake/ml/_internal/platform_capabilities.py CHANGED Viewed

@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
 LIVE_COMMIT_PARAMETER = "ENABLE_LIVE_VERSION_IN_SDK"
 INLINE_DEPLOYMENT_SPEC_PARAMETER = "ENABLE_INLINE_DEPLOYMENT_SPEC_FROM_CLIENT_VERSION"
+SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST = "SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST"
 class PlatformCapabilities:
@@ -73,6 +74,9 @@ class PlatformCapabilities:
     def is_inlined_deployment_spec_enabled(self) -> bool:
         return self._is_version_feature_enabled(INLINE_DEPLOYMENT_SPEC_PARAMETER)
+    def is_set_module_functions_volatility_from_manifest(self) -> bool:
+        return self._get_bool_feature(SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST, False)
     def is_live_commit_enabled(self) -> bool:
         return self._get_bool_feature(LIVE_COMMIT_PARAMETER, False)

snowflake/ml/_internal/utils/mixins.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from typing import Any, Optional
 from snowflake.ml._internal.utils import identifier
@@ -16,6 +17,14 @@ def _identifiers_match(saved: Optional[str], current: Optional[str]) -> bool:
     return saved_resolved == current_resolved
+@dataclass(frozen=True)
+class _SessionState:
+    account: Optional[str]
+    role: Optional[str]
+    database: Optional[str]
+    schema: Optional[str]
 class SerializableSessionMixin:
     """Mixin that provides pickling capabilities for objects with Snowpark sessions."""
@@ -40,17 +49,23 @@ class SerializableSessionMixin:
     def __setstate__(self, state: dict[str, Any]) -> None:
         """Restore session from context during unpickling."""
-        saved_account = state.pop(_SESSION_ACCOUNT_KEY, None)
-        saved_role = state.pop(_SESSION_ROLE_KEY, None)
-        saved_database = state.pop(_SESSION_DATABASE_KEY, None)
-        saved_schema = state.pop(_SESSION_SCHEMA_KEY, None)
+        session_state = _SessionState(
+            account=state.pop(_SESSION_ACCOUNT_KEY, None),
+            role=state.pop(_SESSION_ROLE_KEY, None),
+            database=state.pop(_SESSION_DATABASE_KEY, None),
+            schema=state.pop(_SESSION_SCHEMA_KEY, None),
+        )
         if hasattr(super(), "__setstate__"):
             super().__setstate__(state)  # type: ignore[misc]
         else:
             self.__dict__.update(state)
-        if saved_account is not None:
+        self._set_session(session_state)
+    def _set_session(self, session_state: _SessionState) -> None:
+        if session_state.account is not None:
             active_sessions = snowpark_session._get_active_sessions()
             if len(active_sessions) == 0:
                 raise RuntimeError("No active Snowpark session available. Please create a session.")
@@ -63,10 +78,10 @@ class SerializableSessionMixin:
                     active_sessions,
                     key=lambda s: sum(
                         (
-                            _identifiers_match(saved_account, s.get_current_account()),
-                            _identifiers_match(saved_role, s.get_current_role()),
-                            _identifiers_match(saved_database, s.get_current_database()),
-                            _identifiers_match(saved_schema, s.get_current_schema()),
+                            _identifiers_match(session_state.account, s.get_current_account()),
+                            _identifiers_match(session_state.role, s.get_current_role()),
+                            _identifiers_match(session_state.database, s.get_current_database()),
+                            _identifiers_match(session_state.schema, s.get_current_schema()),
                         )
                     ),
                 ),

snowflake/ml/experiment/experiment_tracking.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import functools
 import json
 import sys
-from typing import Any, Optional, Union
+from typing import Any, Callable, Concatenate, Optional, ParamSpec, TypeVar, Union
 from urllib.parse import quote
-import snowflake.snowpark._internal.utils as snowpark_utils
+from snowflake import snowpark
 from snowflake.ml import model as ml_model, registry
 from snowflake.ml._internal.human_readable_id import hrid_generator
 from snowflake.ml._internal.utils import mixins, sql_identifier
@@ -18,20 +18,40 @@ from snowflake.ml.experiment._client import (
 )
 from snowflake.ml.model import type_hints
 from snowflake.ml.utils import sql_client as sql_client_utils
-from snowflake.snowpark import session
 DEFAULT_EXPERIMENT_NAME = sql_identifier.SqlIdentifier("DEFAULT")
+P = ParamSpec("P")
+T = TypeVar("T")
+def _restore_session(
+    func: Callable[Concatenate["ExperimentTracking", P], T],
+) -> Callable[Concatenate["ExperimentTracking", P], T]:
+    @functools.wraps(func)
+    def wrapper(self: "ExperimentTracking", /, *args: P.args, **kwargs: P.kwargs) -> T:
+        if self._session is None:
+            if self._session_state is None:
+                raise RuntimeError(
+                    f"Session is not set before calling {func.__name__}, and there is no session state to restore from"
+                )
+            self._set_session(self._session_state)
+            if self._session is None:
+                raise RuntimeError(f"Failed to restore session before calling {func.__name__}")
+        return func(self, *args, **kwargs)
+    return wrapper
 class ExperimentTracking(mixins.SerializableSessionMixin):
     """
     Class to manage experiments in Snowflake.
     """
-    @snowpark_utils.private_preview(version="1.9.1")
+    @snowpark._internal.utils.private_preview(version="1.9.1")
     def __init__(
         self,
-        session: session.Session,
+        session: snowpark.Session,
         *,
         database_name: Optional[str] = None,
         schema_name: Optional[str] = None,
@@ -73,7 +93,10 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
             database_name=self._database_name,
             schema_name=self._schema_name,
         )
-        self._session = session
+        self._session: Optional[snowpark.Session] = session
+        # Used to store information about the session if the session could not be restored during unpickling
+        # _session_state is None if and only if _session is not None
+        self._session_state: Optional[mixins._SessionState] = None
         # The experiment in context
         self._experiment: Optional[entities.Experiment] = None
@@ -87,20 +110,29 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
         state["_registry"] = None
         return state
-    def __setstate__(self, state: dict[str, Any]) -> None:
-        super().__setstate__(state)
-        # Restore unpicklable attributes
-        self._sql_client = sql_client.ExperimentTrackingSQLClient(
-            session=self._session,
-            database_name=self._database_name,
-            schema_name=self._schema_name,
-        )
-        self._registry = registry.Registry(
-            session=self._session,
-            database_name=self._database_name,
-            schema_name=self._schema_name,
-        )
+    def _set_session(self, session_state: mixins._SessionState) -> None:
+        try:
+            super()._set_session(session_state)
+            assert self._session is not None
+        except (snowpark.exceptions.SnowparkSessionException, AssertionError):
+            # If session was not set, store the session state
+            self._session = None
+            self._session_state = session_state
+        else:
+            # If session was set, clear the session state, and reinitialize the SQL client and registry
+            self._session_state = None
+            self._sql_client = sql_client.ExperimentTrackingSQLClient(
+                session=self._session,
+                database_name=self._database_name,
+                schema_name=self._schema_name,
+            )
+            self._registry = registry.Registry(
+                session=self._session,
+                database_name=self._database_name,
+                schema_name=self._schema_name,
+            )
+    @_restore_session
     def set_experiment(
         self,
         experiment_name: str,
@@ -125,6 +157,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
         self._run = None
         return self._experiment
+    @_restore_session
     def delete_experiment(
         self,
         experiment_name: str,
@@ -141,8 +174,10 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
             self._run = None
     @functools.wraps(registry.Registry.log_model)
+    @_restore_session
     def log_model(
         self,
+        /,  # self needs to be a positional argument to stop mypy from complaining
         model: Union[type_hints.SupportedModelType, ml_model.ModelVersion],
         *,
         model_name: str,
@@ -152,6 +187,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
         with experiment_info.ExperimentInfoPatcher(experiment_info=run._get_experiment_info()):
             return self._registry.log_model(model, model_name=model_name, **kwargs)
+    @_restore_session
     def start_run(
         self,
         run_name: Optional[str] = None,
@@ -181,6 +217,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
         self._run = entities.Run(experiment_tracking=self, experiment_name=experiment.name, run_name=run_name)
         return self._run
+    @_restore_session
     def end_run(self, run_name: Optional[str] = None) -> None:
         """
         End the current run if no run name is provided. Otherwise, the specified run is ended.
@@ -210,6 +247,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
             self._run = None
         self._print_urls(experiment_name=experiment_name, run_name=run_name)
+    @_restore_session
     def delete_run(
         self,
         run_name: str,
@@ -248,6 +286,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
         """
         self.log_metrics(metrics={key: value}, step=step)
+    @_restore_session
     def log_metrics(
         self,
         metrics: dict[str, float],
@@ -284,6 +323,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
         """
         self.log_params({key: value})
+    @_restore_session
     def log_params(
         self,
         params: dict[str, Any],
@@ -305,6 +345,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
             params=json.dumps([param.to_dict() for param in params_list]),
         )
+    @_restore_session
     def log_artifact(
         self,
         local_path: str,
@@ -328,6 +369,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
                 file_path=file_path,
             )
+    @_restore_session
     def list_artifacts(
         self,
         run_name: str,
@@ -356,6 +398,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
             artifact_path=artifact_path or "",
         )
+    @_restore_session
     def download_artifacts(
         self,
         run_name: str,
@@ -397,6 +440,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
             return self._run
         return self.start_run()
+    @_restore_session
     def _generate_run_name(self, experiment: entities.Experiment) -> sql_identifier.SqlIdentifier:
         generator = hrid_generator.HRID16()
         existing_runs = self._sql_client.show_runs_in_experiment(experiment_name=experiment.name)

snowflake/ml/jobs/_utils/spec_utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
+import re
 import sys
 from math import ceil
 from pathlib import PurePath
@@ -10,6 +11,8 @@ from snowflake.ml._internal.utils import snowflake_env
 from snowflake.ml.jobs._utils import constants, feature_flags, query_helper, types
 from snowflake.ml.jobs._utils.runtime_env_utils import RuntimeEnvironmentsDict
+_OCI_TAG_REGEX = re.compile("^[a-zA-Z0-9._-]{1,128}$")
 def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.ComputeResources:
     """Extract resource information for the specified compute pool"""
@@ -56,22 +59,55 @@ def _get_runtime_image(session: snowpark.Session, target_hardware: Literal["CPU"
     return selected_runtime.runtime_container_image if selected_runtime else None
-def _get_image_spec(session: snowpark.Session, compute_pool: str) -> types.ImageSpec:
+def _check_image_tag_valid(tag: Optional[str]) -> bool:
+    if tag is None:
+        return False
+    return _OCI_TAG_REGEX.fullmatch(tag) is not None
+def _get_image_spec(
+    session: snowpark.Session, compute_pool: str, runtime_environment: Optional[str] = None
+) -> types.ImageSpec:
+    """
+    Resolve image specification (container image and resources) for the job.
+    Behavior:
+    - If `runtime_environment` is empty or the feature flag is disabled, use the
+      default image tag and image name.
+    - If `runtime_environment` is a valid image tag, use that tag with the default
+      repository/name.
+    - If `runtime_environment` is a full image URL, use it directly.
+    - If the feature flag is enabled and `runtime_environment` is not provided,
+      select an ML Runtime image matching the local Python major.minor
+    - When multiple inputs are provided, `runtime_environment` takes priority.
+    Args:
+        session: Snowflake session.
+        compute_pool: Compute pool used to infer CPU/GPU resources.
+        runtime_environment: Optional image tag or full image URL to override.
+    Returns:
+        Image spec including container image and resource requests/limits.
+    """
     # Retrieve compute pool node resources
     resources = _get_node_resources(session, compute_pool=compute_pool)
+    hardware = "GPU" if resources.gpu > 0 else "CPU"
+    image_tag = _get_runtime_image_tag()
+    image_repo = constants.DEFAULT_IMAGE_REPO
+    image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
     # Use MLRuntime image
-    hardware = "GPU" if resources.gpu > 0 else "CPU"
     container_image = None
-    if feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
+    if runtime_environment:
+        if _check_image_tag_valid(runtime_environment):
+            image_tag = runtime_environment
+        else:
+            container_image = runtime_environment
+    elif feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
         container_image = _get_runtime_image(session, hardware)  # type: ignore[arg-type]
-    if not container_image:
-        image_repo = constants.DEFAULT_IMAGE_REPO
-        image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
-        image_tag = _get_runtime_image_tag()
-        container_image = f"{image_repo}/{image_name}:{image_tag}"
+    container_image = container_image or f"{image_repo}/{image_name}:{image_tag}"
     # TODO: Should each instance consume the entire pod?
     return types.ImageSpec(
         resource_requests=resources,
@@ -127,6 +163,7 @@ def generate_service_spec(
     target_instances: int = 1,
     min_instances: int = 1,
     enable_metrics: bool = False,
+    runtime_environment: Optional[str] = None,
 ) -> dict[str, Any]:
     """
     Generate a service specification for a job.
@@ -139,11 +176,12 @@ def generate_service_spec(
         target_instances: Number of instances for multi-node job
         enable_metrics: Enable platform metrics for the job
         min_instances: Minimum number of instances required to start the job
+        runtime_environment: The runtime image to use. Only support image tag or full image URL.
     Returns:
         Job service specification
     """
-    image_spec = _get_image_spec(session, compute_pool)
+    image_spec = _get_image_spec(session, compute_pool, runtime_environment)
     # Set resource requests/limits, including nvidia.com/gpu quantity if applicable
     resource_requests: dict[str, Union[str, int]] = {
@@ -317,7 +355,7 @@ def merge_patch(base: Any, patch: Any, display_name: str = "") -> Any:
     Returns:
         The patched object.
     """
-    if not type(base) is type(patch):
+    if type(base) is not type(patch):
         if base is not None:
             logging.warning(f"Type mismatch while merging {display_name} (base={type(base)}, patch={type(patch)})")
         return patch

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import pathlib
+import sys
 import textwrap
 from pathlib import PurePath
 from typing import Any, Callable, Optional, TypeVar, Union, cast, overload
@@ -344,6 +345,9 @@ def submit_from_stage(
             query_warehouse (str): The query warehouse to use. Defaults to session warehouse.
             spec_overrides (dict): A dictionary of overrides for the service spec.
             imports (list[Union[tuple[str, str], tuple[str]]]): A list of additional payloads used in the job.
+            runtime_environment (str): The runtime image to use. Only support image tag or full image URL,
+                e.g. "1.7.1" or "image_repo/image_name:image_tag". When it refers to a full image URL,
+                it should contain image repository, image name and image tag.
     Returns:
         An object representing the submitted job.
@@ -409,6 +413,7 @@ def _submit_job(
         "min_instances",
         "enable_metrics",
         "query_warehouse",
+        "runtime_environment",
     ],
 )
 def _submit_job(
@@ -459,6 +464,9 @@ def _submit_job(
         )
         imports = kwargs.pop("additional_payloads")
+    if "runtime_environment" in kwargs:
+        logger.warning("'runtime_environment' is in private preview since 1.15.0, do not use it in production.")
     # Use kwargs for less common optional parameters
     database = kwargs.pop("database", None)
     schema = kwargs.pop("schema", None)
@@ -470,6 +478,7 @@ def _submit_job(
     enable_metrics = kwargs.pop("enable_metrics", True)
     query_warehouse = kwargs.pop("query_warehouse", session.get_current_warehouse())
     imports = kwargs.pop("imports", None) or imports
+    runtime_environment = kwargs.pop("runtime_environment", None)
     # Warn if there are unknown kwargs
     if kwargs:
@@ -544,6 +553,7 @@ def _submit_job(
             min_instances=min_instances,
             enable_metrics=enable_metrics,
             use_async=True,
+            runtime_environment=runtime_environment,
         )
     # Fall back to v1
@@ -556,6 +566,7 @@ def _submit_job(
         target_instances=target_instances,
         min_instances=min_instances,
         enable_metrics=enable_metrics,
+        runtime_environment=runtime_environment,
     )
     # Generate spec overrides
@@ -639,6 +650,7 @@ def _do_submit_job_v2(
     min_instances: int = 1,
     enable_metrics: bool = True,
     use_async: bool = True,
+    runtime_environment: Optional[str] = None,
 ) -> jb.MLJob[Any]:
     """
     Generate the SQL query for job submission.
@@ -657,6 +669,7 @@ def _do_submit_job_v2(
         min_instances: Minimum number of instances required to start the job.
         enable_metrics: Whether to enable platform metrics for the job.
         use_async: Whether to run the job asynchronously.
+        runtime_environment: image tag or full image URL to use for the job.
     Returns:
         The job object.
@@ -672,6 +685,13 @@ def _do_submit_job_v2(
         "ENABLE_METRICS": enable_metrics,
         "SPEC_OVERRIDES": spec_overrides,
     }
+    # for the image tag or full image URL, we use that directly
+    if runtime_environment:
+        spec_options["RUNTIME"] = runtime_environment
+    elif feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
+        # when feature flag is enabled, we get the local python version and wrap it in a dict
+        # in system function, we can know whether it is python version or image tag or full image URL through the format
+        spec_options["RUNTIME"] = json.dumps({"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}"})
     job_options = {
         "EXTERNAL_ACCESS_INTEGRATIONS": external_access_integrations,
         "QUERY_WAREHOUSE": query_warehouse,

snowflake/ml/model/__init__.py CHANGED Viewed

@@ -1,6 +1,20 @@
-from snowflake.ml.model._client.model.batch_inference_specs import JobSpec, OutputSpec
+from snowflake.ml.model._client.model.batch_inference_specs import (
+    JobSpec,
+    OutputSpec,
+    SaveMode,
+)
 from snowflake.ml.model._client.model.model_impl import Model
 from snowflake.ml.model._client.model.model_version_impl import ExportMode, ModelVersion
 from snowflake.ml.model.models.huggingface_pipeline import HuggingFacePipelineModel
+from snowflake.ml.model.volatility import Volatility
-__all__ = ["Model", "ModelVersion", "ExportMode", "HuggingFacePipelineModel", "JobSpec", "OutputSpec"]
+__all__ = [
+    "Model",
+    "ModelVersion",
+    "ExportMode",
+    "HuggingFacePipelineModel",
+    "JobSpec",
+    "OutputSpec",
+    "SaveMode",
+    "Volatility",
+]

snowflake/ml/model/_client/model/batch_inference_specs.py CHANGED Viewed

@@ -1,10 +1,26 @@
-from typing import Optional, Union
+from enum import Enum
+from typing import Optional
 from pydantic import BaseModel
+class SaveMode(str, Enum):
+    """Save mode options for batch inference output.
+    Determines the behavior when files already exist in the output location.
+    OVERWRITE: Remove existing files and write new results.
+    ERROR: Raise an error if files already exist in the output location.
+    """
+    OVERWRITE = "overwrite"
+    ERROR = "error"
 class OutputSpec(BaseModel):
     stage_location: str
+    mode: SaveMode = SaveMode.ERROR
 class JobSpec(BaseModel):
@@ -12,10 +28,10 @@ class JobSpec(BaseModel):
     job_name: Optional[str] = None
     num_workers: Optional[int] = None
     function_name: Optional[str] = None
-    gpu: Optional[Union[str, int]] = None
     force_rebuild: bool = False
     max_batch_rows: int = 1024
     warehouse: Optional[str] = None
     cpu_requests: Optional[str] = None
     memory_requests: Optional[str] = None
+    gpu_requests: Optional[str] = None
     replicas: Optional[int] = None

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -551,6 +551,8 @@ class ModelVersion(lineage_node.LineageNode):
         subproject=_TELEMETRY_SUBPROJECT,
         func_params_to_log=[
             "compute_pool",
+            "output_spec",
+            "job_spec",
         ],
     )
     def _run_batch(
@@ -579,6 +581,8 @@ class ModelVersion(lineage_node.LineageNode):
             output_stage_location += "/"
         input_stage_location = f"{output_stage_location}{_BATCH_INFERENCE_TEMPORARY_FOLDER}/"
+        self._service_ops._enforce_save_mode(output_spec.mode, output_stage_location)
         try:
             input_spec.write.copy_into_location(location=input_stage_location, file_format_type="parquet", header=True)
         # todo: be specific about the type of errors to provide better error messages.
@@ -605,6 +609,7 @@ class ModelVersion(lineage_node.LineageNode):
             warehouse=sql_identifier.SqlIdentifier(warehouse),
             cpu_requests=job_spec.cpu_requests,
             memory_requests=job_spec.memory_requests,
+            gpu_requests=job_spec.gpu_requests,
             job_name=job_name,
             replicas=job_spec.replicas,
             # input and output

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -7,6 +7,7 @@ import re
 import tempfile
 import threading
 import time
+import warnings
 from typing import Any, Optional, Union, cast
 from snowflake import snowpark
@@ -14,6 +15,7 @@ from snowflake.ml import jobs
 from snowflake.ml._internal import file_utils, platform_capabilities as pc
 from snowflake.ml._internal.utils import identifier, service_logger, sql_identifier
 from snowflake.ml.model import inference_engine as inference_engine_module, type_hints
+from snowflake.ml.model._client.model import batch_inference_specs
 from snowflake.ml.model._client.service import model_deployment_spec
 from snowflake.ml.model._client.sql import service as service_sql, stage as stage_sql
 from snowflake.snowpark import async_job, exceptions, row, session
@@ -155,17 +157,17 @@ class ServiceOperator:
             database_name=database_name,
             schema_name=schema_name,
         )
+        self._stage_client = stage_sql.StageSQLClient(
+            session,
+            database_name=database_name,
+            schema_name=schema_name,
+        )
         self._use_inlined_deployment_spec = pc.PlatformCapabilities.get_instance().is_inlined_deployment_spec_enabled()
         if self._use_inlined_deployment_spec:
             self._workspace = None
             self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec()
         else:
             self._workspace = tempfile.TemporaryDirectory()
-            self._stage_client = stage_sql.StageSQLClient(
-                session,
-                database_name=database_name,
-                schema_name=schema_name,
-            )
             self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec(
                 workspace_path=pathlib.Path(self._workspace.name)
             )
@@ -651,6 +653,47 @@ class ServiceOperator:
             else:
                 module_logger.warning(f"Service {service.display_service_name} is done, but not transitioning.")
+    def _enforce_save_mode(self, output_mode: batch_inference_specs.SaveMode, output_stage_location: str) -> None:
+        """Enforce the save mode for the output stage location.
+        Args:
+            output_mode: The output mode
+            output_stage_location: The output stage location to check/clean.
+        Raises:
+            FileExistsError: When ERROR mode is specified and files exist in the output location.
+            RuntimeError: When operations fail (checking files or removing files).
+            ValueError: When an invalid SaveMode is specified.
+        """
+        list_results = self._stage_client.list_stage(output_stage_location)
+        if output_mode == batch_inference_specs.SaveMode.ERROR:
+            if len(list_results) > 0:
+                raise FileExistsError(
+                    f"Output stage location '{output_stage_location}' is not empty. "
+                    f"Found {len(list_results)} existing files. When using ERROR mode, the output location "
+                    f"must be empty. Please clear the existing files or use OVERWRITE mode."
+                )
+        elif output_mode == batch_inference_specs.SaveMode.OVERWRITE:
+            if len(list_results) > 0:
+                warnings.warn(
+                    f"Output stage location '{output_stage_location}' is not empty. "
+                    f"Found {len(list_results)} existing files. OVERWRITE mode will remove all existing files "
+                    f"in the output location before running the batch inference job.",
+                    stacklevel=2,
+                )
+                try:
+                    self._session.sql(f"REMOVE {output_stage_location}").collect()
+                except Exception as e:
+                    raise RuntimeError(
+                        f"OVERWRITE was specified. However, failed to remove existing files in output stage "
+                        f"{output_stage_location}: {e}. Please clear up the existing files manually and retry "
+                        f"the operation."
+                    )
+        else:
+            valid_modes = list(batch_inference_specs.SaveMode)
+            raise ValueError(f"Invalid SaveMode: {output_mode}. Must be one of {valid_modes}")
     def _stream_service_logs(
         self,
         async_job: snowpark.AsyncJob,
@@ -927,6 +970,7 @@ class ServiceOperator:
         max_batch_rows: Optional[int],
         cpu_requests: Optional[str],
         memory_requests: Optional[str],
+        gpu_requests: Optional[str],
         replicas: Optional[int],
         statement_params: Optional[dict[str, Any]] = None,
     ) -> jobs.MLJob[Any]:
@@ -961,6 +1005,7 @@ class ServiceOperator:
             warehouse=warehouse,
             cpu=cpu_requests,
             memory=memory_requests,
+            gpu=gpu_requests,
             replicas=replicas,
         )

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -204,7 +204,7 @@ class ModelDeploymentSpec:
         job_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
         cpu: Optional[str] = None,
         memory: Optional[str] = None,
-        gpu: Optional[Union[str, int]] = None,
+        gpu: Optional[str] = None,
         num_workers: Optional[int] = None,
         max_batch_rows: Optional[int] = None,
         replicas: Optional[int] = None,

snowflake-ml-python 1.15.0__py3-none-any.whl → 1.16.0__py3-none-any.whl

snowflake-ml-python 1.15.0py3-none-any.whl → 1.16.0py3-none-any.whl