PyPI - snowflake-ml-python - Versions diffs - 1.14.0__py3-none-any.whl → 1.16.0__py3-none-any.whl - Mend

snowflake-ml-python 1.14.0py3-none-any.whl → 1.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

snowflake/ml/jobs/_utils/spec_utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
+import re
 import sys
 from math import ceil
 from pathlib import PurePath
@@ -10,6 +11,8 @@ from snowflake.ml._internal.utils import snowflake_env
 from snowflake.ml.jobs._utils import constants, feature_flags, query_helper, types
 from snowflake.ml.jobs._utils.runtime_env_utils import RuntimeEnvironmentsDict
+_OCI_TAG_REGEX = re.compile("^[a-zA-Z0-9._-]{1,128}$")
 def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.ComputeResources:
     """Extract resource information for the specified compute pool"""
@@ -56,22 +59,55 @@ def _get_runtime_image(session: snowpark.Session, target_hardware: Literal["CPU"
     return selected_runtime.runtime_container_image if selected_runtime else None
-def _get_image_spec(session: snowpark.Session, compute_pool: str) -> types.ImageSpec:
+def _check_image_tag_valid(tag: Optional[str]) -> bool:
+    if tag is None:
+        return False
+    return _OCI_TAG_REGEX.fullmatch(tag) is not None
+def _get_image_spec(
+    session: snowpark.Session, compute_pool: str, runtime_environment: Optional[str] = None
+) -> types.ImageSpec:
+    """
+    Resolve image specification (container image and resources) for the job.
+    Behavior:
+    - If `runtime_environment` is empty or the feature flag is disabled, use the
+      default image tag and image name.
+    - If `runtime_environment` is a valid image tag, use that tag with the default
+      repository/name.
+    - If `runtime_environment` is a full image URL, use it directly.
+    - If the feature flag is enabled and `runtime_environment` is not provided,
+      select an ML Runtime image matching the local Python major.minor
+    - When multiple inputs are provided, `runtime_environment` takes priority.
+    Args:
+        session: Snowflake session.
+        compute_pool: Compute pool used to infer CPU/GPU resources.
+        runtime_environment: Optional image tag or full image URL to override.
+    Returns:
+        Image spec including container image and resource requests/limits.
+    """
     # Retrieve compute pool node resources
     resources = _get_node_resources(session, compute_pool=compute_pool)
+    hardware = "GPU" if resources.gpu > 0 else "CPU"
+    image_tag = _get_runtime_image_tag()
+    image_repo = constants.DEFAULT_IMAGE_REPO
+    image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
     # Use MLRuntime image
-    hardware = "GPU" if resources.gpu > 0 else "CPU"
     container_image = None
-    if feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
+    if runtime_environment:
+        if _check_image_tag_valid(runtime_environment):
+            image_tag = runtime_environment
+        else:
+            container_image = runtime_environment
+    elif feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
         container_image = _get_runtime_image(session, hardware)  # type: ignore[arg-type]
-    if not container_image:
-        image_repo = constants.DEFAULT_IMAGE_REPO
-        image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
-        image_tag = _get_runtime_image_tag()
-        container_image = f"{image_repo}/{image_name}:{image_tag}"
+    container_image = container_image or f"{image_repo}/{image_name}:{image_tag}"
     # TODO: Should each instance consume the entire pod?
     return types.ImageSpec(
         resource_requests=resources,
@@ -127,6 +163,7 @@ def generate_service_spec(
     target_instances: int = 1,
     min_instances: int = 1,
     enable_metrics: bool = False,
+    runtime_environment: Optional[str] = None,
 ) -> dict[str, Any]:
     """
     Generate a service specification for a job.
@@ -139,11 +176,12 @@ def generate_service_spec(
         target_instances: Number of instances for multi-node job
         enable_metrics: Enable platform metrics for the job
         min_instances: Minimum number of instances required to start the job
+        runtime_environment: The runtime image to use. Only support image tag or full image URL.
     Returns:
         Job service specification
     """
-    image_spec = _get_image_spec(session, compute_pool)
+    image_spec = _get_image_spec(session, compute_pool, runtime_environment)
     # Set resource requests/limits, including nvidia.com/gpu quantity if applicable
     resource_requests: dict[str, Union[str, int]] = {
@@ -317,7 +355,7 @@ def merge_patch(base: Any, patch: Any, display_name: str = "") -> Any:
     Returns:
         The patched object.
     """
-    if not type(base) is type(patch):
+    if type(base) is not type(patch):
         if base is not None:
             logging.warning(f"Type mismatch while merging {display_name} (base={type(base)}, patch={type(patch)})")
         return patch

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import pathlib
+import sys
 import textwrap
 from pathlib import PurePath
 from typing import Any, Callable, Optional, TypeVar, Union, cast, overload
@@ -344,6 +345,9 @@ def submit_from_stage(
             query_warehouse (str): The query warehouse to use. Defaults to session warehouse.
             spec_overrides (dict): A dictionary of overrides for the service spec.
             imports (list[Union[tuple[str, str], tuple[str]]]): A list of additional payloads used in the job.
+            runtime_environment (str): The runtime image to use. Only support image tag or full image URL,
+                e.g. "1.7.1" or "image_repo/image_name:image_tag". When it refers to a full image URL,
+                it should contain image repository, image name and image tag.
     Returns:
         An object representing the submitted job.
@@ -409,6 +413,7 @@ def _submit_job(
         "min_instances",
         "enable_metrics",
         "query_warehouse",
+        "runtime_environment",
     ],
 )
 def _submit_job(
@@ -459,6 +464,9 @@ def _submit_job(
         )
         imports = kwargs.pop("additional_payloads")
+    if "runtime_environment" in kwargs:
+        logger.warning("'runtime_environment' is in private preview since 1.15.0, do not use it in production.")
     # Use kwargs for less common optional parameters
     database = kwargs.pop("database", None)
     schema = kwargs.pop("schema", None)
@@ -470,6 +478,7 @@ def _submit_job(
     enable_metrics = kwargs.pop("enable_metrics", True)
     query_warehouse = kwargs.pop("query_warehouse", session.get_current_warehouse())
     imports = kwargs.pop("imports", None) or imports
+    runtime_environment = kwargs.pop("runtime_environment", None)
     # Warn if there are unknown kwargs
     if kwargs:
@@ -544,6 +553,7 @@ def _submit_job(
             min_instances=min_instances,
             enable_metrics=enable_metrics,
             use_async=True,
+            runtime_environment=runtime_environment,
         )
     # Fall back to v1
@@ -556,6 +566,7 @@ def _submit_job(
         target_instances=target_instances,
         min_instances=min_instances,
         enable_metrics=enable_metrics,
+        runtime_environment=runtime_environment,
     )
     # Generate spec overrides
@@ -639,6 +650,7 @@ def _do_submit_job_v2(
     min_instances: int = 1,
     enable_metrics: bool = True,
     use_async: bool = True,
+    runtime_environment: Optional[str] = None,
 ) -> jb.MLJob[Any]:
     """
     Generate the SQL query for job submission.
@@ -657,6 +669,7 @@ def _do_submit_job_v2(
         min_instances: Minimum number of instances required to start the job.
         enable_metrics: Whether to enable platform metrics for the job.
         use_async: Whether to run the job asynchronously.
+        runtime_environment: image tag or full image URL to use for the job.
     Returns:
         The job object.
@@ -672,6 +685,13 @@ def _do_submit_job_v2(
         "ENABLE_METRICS": enable_metrics,
         "SPEC_OVERRIDES": spec_overrides,
     }
+    # for the image tag or full image URL, we use that directly
+    if runtime_environment:
+        spec_options["RUNTIME"] = runtime_environment
+    elif feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
+        # when feature flag is enabled, we get the local python version and wrap it in a dict
+        # in system function, we can know whether it is python version or image tag or full image URL through the format
+        spec_options["RUNTIME"] = json.dumps({"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}"})
     job_options = {
         "EXTERNAL_ACCESS_INTEGRATIONS": external_access_integrations,
         "QUERY_WAREHOUSE": query_warehouse,

snowflake/ml/model/__init__.py CHANGED Viewed

@@ -1,10 +1,20 @@
 from snowflake.ml.model._client.model.batch_inference_specs import (
-    InputSpec,
     JobSpec,
     OutputSpec,
+    SaveMode,
 )
 from snowflake.ml.model._client.model.model_impl import Model
 from snowflake.ml.model._client.model.model_version_impl import ExportMode, ModelVersion
 from snowflake.ml.model.models.huggingface_pipeline import HuggingFacePipelineModel
+from snowflake.ml.model.volatility import Volatility
-__all__ = ["Model", "ModelVersion", "ExportMode", "HuggingFacePipelineModel", "InputSpec", "JobSpec", "OutputSpec"]
+__all__ = [
+    "Model",
+    "ModelVersion",
+    "ExportMode",
+    "HuggingFacePipelineModel",
+    "JobSpec",
+    "OutputSpec",
+    "SaveMode",
+    "Volatility",
+]

snowflake/ml/model/_client/model/batch_inference_specs.py CHANGED Viewed

@@ -1,14 +1,26 @@
-from typing import Optional, Union
+from enum import Enum
+from typing import Optional
 from pydantic import BaseModel
-class InputSpec(BaseModel):
-    stage_location: str
+class SaveMode(str, Enum):
+    """Save mode options for batch inference output.
+    Determines the behavior when files already exist in the output location.
+    OVERWRITE: Remove existing files and write new results.
+    ERROR: Raise an error if files already exist in the output location.
+    """
+    OVERWRITE = "overwrite"
+    ERROR = "error"
 class OutputSpec(BaseModel):
     stage_location: str
+    mode: SaveMode = SaveMode.ERROR
 class JobSpec(BaseModel):
@@ -16,10 +28,10 @@ class JobSpec(BaseModel):
     job_name: Optional[str] = None
     num_workers: Optional[int] = None
     function_name: Optional[str] = None
-    gpu: Optional[Union[str, int]] = None
     force_rebuild: bool = False
     max_batch_rows: int = 1024
     warehouse: Optional[str] = None
     cpu_requests: Optional[str] = None
     memory_requests: Optional[str] = None
+    gpu_requests: Optional[str] = None
     replicas: Optional[int] = None

snowflake/ml/model/_client/model/inference_engine_utils.py ADDED Viewed

@@ -0,0 +1,55 @@
+from typing import Any, Optional, Union
+from snowflake.ml.model._client.ops import service_ops
+def _get_inference_engine_args(
+    experimental_options: Optional[dict[str, Any]],
+) -> Optional[service_ops.InferenceEngineArgs]:
+    if not experimental_options:
+        return None
+    if "inference_engine" not in experimental_options:
+        raise ValueError("inference_engine is required in experimental_options")
+    return service_ops.InferenceEngineArgs(
+        inference_engine=experimental_options["inference_engine"],
+        inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
+    )
+def _enrich_inference_engine_args(
+    inference_engine_args: service_ops.InferenceEngineArgs,
+    gpu_requests: Optional[Union[str, int]] = None,
+) -> Optional[service_ops.InferenceEngineArgs]:
+    """Enrich inference engine args with model path and tensor parallelism settings.
+    Args:
+        inference_engine_args: The original inference engine args
+        gpu_requests: The number of GPUs requested
+    Returns:
+        Enriched inference engine args
+    Raises:
+        ValueError: Invalid gpu_requests
+    """
+    if inference_engine_args.inference_engine_args_override is None:
+        inference_engine_args.inference_engine_args_override = []
+    gpu_count = None
+    # Set tensor-parallelism if gpu_requests is specified
+    if gpu_requests is not None:
+        # assert gpu_requests is a string or an integer before casting to int
+        try:
+            gpu_count = int(gpu_requests)
+            if gpu_count > 0:
+                inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
+            else:
+                raise ValueError(f"GPU count must be greater than 0, got {gpu_count}")
+        except ValueError:
+            raise ValueError(f"Invalid gpu_requests: {gpu_requests} with type {type(gpu_requests).__name__}")
+    return inference_engine_args

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -12,7 +12,10 @@ from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.lineage import lineage_node
 from snowflake.ml.model import task, type_hints
-from snowflake.ml.model._client.model import batch_inference_specs
+from snowflake.ml.model._client.model import (
+    batch_inference_specs,
+    inference_engine_utils,
+)
 from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
 from snowflake.ml.model._model_composer import model_composer
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
@@ -22,6 +25,7 @@ from snowflake.snowpark import Session, async_job, dataframe
 _TELEMETRY_PROJECT = "MLOps"
 _TELEMETRY_SUBPROJECT = "ModelManagement"
 _BATCH_INFERENCE_JOB_ID_PREFIX = "BATCH_INFERENCE_"
+_BATCH_INFERENCE_TEMPORARY_FOLDER = "_temporary"
 class ExportMode(enum.Enum):
@@ -547,13 +551,15 @@ class ModelVersion(lineage_node.LineageNode):
         subproject=_TELEMETRY_SUBPROJECT,
         func_params_to_log=[
             "compute_pool",
+            "output_spec",
+            "job_spec",
         ],
     )
     def _run_batch(
         self,
         *,
         compute_pool: str,
-        input_spec: batch_inference_specs.InputSpec,
+        input_spec: dataframe.DataFrame,
         output_spec: batch_inference_specs.OutputSpec,
         job_spec: Optional[batch_inference_specs.JobSpec] = None,
     ) -> jobs.MLJob[Any]:
@@ -569,6 +575,20 @@ class ModelVersion(lineage_node.LineageNode):
         if warehouse is None:
             raise ValueError("Warehouse is not set. Please set the warehouse field in the JobSpec.")
+        # use a temporary folder in the output stage to store the intermediate output from the dataframe
+        output_stage_location = output_spec.stage_location
+        if not output_stage_location.endswith("/"):
+            output_stage_location += "/"
+        input_stage_location = f"{output_stage_location}{_BATCH_INFERENCE_TEMPORARY_FOLDER}/"
+        self._service_ops._enforce_save_mode(output_spec.mode, output_stage_location)
+        try:
+            input_spec.write.copy_into_location(location=input_stage_location, file_format_type="parquet", header=True)
+        # todo: be specific about the type of errors to provide better error messages.
+        except Exception as e:
+            raise RuntimeError(f"Failed to process input_spec: {e}")
         if job_spec.job_name is None:
             # Same as the MLJob ID generation logic with a different prefix
             job_name = f"{_BATCH_INFERENCE_JOB_ID_PREFIX}{str(uuid.uuid4()).replace('-', '_').upper()}"
@@ -589,12 +609,13 @@ class ModelVersion(lineage_node.LineageNode):
             warehouse=sql_identifier.SqlIdentifier(warehouse),
             cpu_requests=job_spec.cpu_requests,
             memory_requests=job_spec.memory_requests,
+            gpu_requests=job_spec.gpu_requests,
             job_name=job_name,
             replicas=job_spec.replicas,
             # input and output
-            input_stage_location=input_spec.stage_location,
+            input_stage_location=input_stage_location,
             input_file_pattern="*",
-            output_stage_location=output_spec.stage_location,
+            output_stage_location=output_stage_location,
             completion_filename="_SUCCESS",
             # misc
             statement_params=statement_params,
@@ -768,60 +789,6 @@ class ModelVersion(lineage_node.LineageNode):
             version_name=sql_identifier.SqlIdentifier(version),
         )
-    def _get_inference_engine_args(
-        self, experimental_options: Optional[dict[str, Any]]
-    ) -> Optional[service_ops.InferenceEngineArgs]:
-        if not experimental_options:
-            return None
-        if "inference_engine" not in experimental_options:
-            raise ValueError("inference_engine is required in experimental_options")
-        return service_ops.InferenceEngineArgs(
-            inference_engine=experimental_options["inference_engine"],
-            inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
-        )
-    def _enrich_inference_engine_args(
-        self,
-        inference_engine_args: service_ops.InferenceEngineArgs,
-        gpu_requests: Optional[Union[str, int]] = None,
-    ) -> Optional[service_ops.InferenceEngineArgs]:
-        """Enrich inference engine args with tensor parallelism settings.
-        Args:
-            inference_engine_args: The original inference engine args
-            gpu_requests: The number of GPUs requested
-        Returns:
-            Enriched inference engine args
-        Raises:
-            ValueError: Invalid gpu_requests
-        """
-        if inference_engine_args.inference_engine_args_override is None:
-            inference_engine_args.inference_engine_args_override = []
-        gpu_count = None
-        # Set tensor-parallelism if gpu_requests is specified
-        if gpu_requests is not None:
-            # assert gpu_requests is a string or an integer before casting to int
-            if isinstance(gpu_requests, str) or isinstance(gpu_requests, int):
-                try:
-                    gpu_count = int(gpu_requests)
-                except ValueError:
-                    raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
-        if gpu_count is not None:
-            if gpu_count > 0:
-                inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
-            else:
-                raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
-        return inference_engine_args
     def _check_huggingface_text_generation_model(
         self,
         statement_params: Optional[dict[str, Any]] = None,
@@ -1101,13 +1068,14 @@ class ModelVersion(lineage_node.LineageNode):
         if experimental_options:
             self._check_huggingface_text_generation_model(statement_params)
-        inference_engine_args: Optional[service_ops.InferenceEngineArgs] = self._get_inference_engine_args(
-            experimental_options
-        )
+        inference_engine_args = inference_engine_utils._get_inference_engine_args(experimental_options)
         # Enrich inference engine args if inference engine is specified
         if inference_engine_args is not None:
-            inference_engine_args = self._enrich_inference_engine_args(inference_engine_args, gpu_requests)
+            inference_engine_args = inference_engine_utils._enrich_inference_engine_args(
+                inference_engine_args,
+                gpu_requests,
+            )
         from snowflake.ml.model import event_handler
         from snowflake.snowpark import exceptions

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -7,6 +7,7 @@ import re
 import tempfile
 import threading
 import time
+import warnings
 from typing import Any, Optional, Union, cast
 from snowflake import snowpark
@@ -14,6 +15,7 @@ from snowflake.ml import jobs
 from snowflake.ml._internal import file_utils, platform_capabilities as pc
 from snowflake.ml._internal.utils import identifier, service_logger, sql_identifier
 from snowflake.ml.model import inference_engine as inference_engine_module, type_hints
+from snowflake.ml.model._client.model import batch_inference_specs
 from snowflake.ml.model._client.service import model_deployment_spec
 from snowflake.ml.model._client.sql import service as service_sql, stage as stage_sql
 from snowflake.snowpark import async_job, exceptions, row, session
@@ -155,16 +157,17 @@ class ServiceOperator:
             database_name=database_name,
             schema_name=schema_name,
         )
-        if pc.PlatformCapabilities.get_instance().is_inlined_deployment_spec_enabled():
+        self._stage_client = stage_sql.StageSQLClient(
+            session,
+            database_name=database_name,
+            schema_name=schema_name,
+        )
+        self._use_inlined_deployment_spec = pc.PlatformCapabilities.get_instance().is_inlined_deployment_spec_enabled()
+        if self._use_inlined_deployment_spec:
             self._workspace = None
             self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec()
         else:
             self._workspace = tempfile.TemporaryDirectory()
-            self._stage_client = stage_sql.StageSQLClient(
-                session,
-                database_name=database_name,
-                schema_name=schema_name,
-            )
             self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec(
                 workspace_path=pathlib.Path(self._workspace.name)
             )
@@ -264,7 +267,14 @@ class ServiceOperator:
             self._model_deployment_spec.add_hf_logger_spec(
                 hf_model_name=hf_model_args.hf_model_name,
                 hf_task=hf_model_args.hf_task,
-                hf_token=hf_model_args.hf_token,
+                hf_token=(
+                    # when using inlined deployment spec, we need to use QMARK_RESERVED_TOKEN
+                    # to avoid revealing the token while calling the SYSTEM$DEPLOY_MODEL function
+                    # noop if using file-based deployment spec or token is not provided
+                    service_sql.QMARK_RESERVED_TOKEN
+                    if hf_model_args.hf_token and self._use_inlined_deployment_spec
+                    else hf_model_args.hf_token
+                ),
                 hf_tokenizer=hf_model_args.hf_tokenizer,
                 hf_revision=hf_model_args.hf_revision,
                 hf_trust_remote_code=hf_model_args.hf_trust_remote_code,
@@ -320,6 +330,14 @@ class ServiceOperator:
                 model_deployment_spec.ModelDeploymentSpec.DEPLOY_SPEC_FILE_REL_PATH if self._workspace else None
             ),
             model_deployment_spec_yaml_str=None if self._workspace else spec_yaml_str_or_path,
+            query_params=(
+                # when using inlined deployment spec, we need to add the token to the query params
+                # to avoid revealing the token while calling the SYSTEM$DEPLOY_MODEL function
+                # noop if using file-based deployment spec or token is not provided
+                [hf_model_args.hf_token]
+                if (self._use_inlined_deployment_spec and hf_model_args and hf_model_args.hf_token)
+                else []
+            ),
             statement_params=statement_params,
         )
@@ -635,6 +653,47 @@ class ServiceOperator:
             else:
                 module_logger.warning(f"Service {service.display_service_name} is done, but not transitioning.")
+    def _enforce_save_mode(self, output_mode: batch_inference_specs.SaveMode, output_stage_location: str) -> None:
+        """Enforce the save mode for the output stage location.
+        Args:
+            output_mode: The output mode
+            output_stage_location: The output stage location to check/clean.
+        Raises:
+            FileExistsError: When ERROR mode is specified and files exist in the output location.
+            RuntimeError: When operations fail (checking files or removing files).
+            ValueError: When an invalid SaveMode is specified.
+        """
+        list_results = self._stage_client.list_stage(output_stage_location)
+        if output_mode == batch_inference_specs.SaveMode.ERROR:
+            if len(list_results) > 0:
+                raise FileExistsError(
+                    f"Output stage location '{output_stage_location}' is not empty. "
+                    f"Found {len(list_results)} existing files. When using ERROR mode, the output location "
+                    f"must be empty. Please clear the existing files or use OVERWRITE mode."
+                )
+        elif output_mode == batch_inference_specs.SaveMode.OVERWRITE:
+            if len(list_results) > 0:
+                warnings.warn(
+                    f"Output stage location '{output_stage_location}' is not empty. "
+                    f"Found {len(list_results)} existing files. OVERWRITE mode will remove all existing files "
+                    f"in the output location before running the batch inference job.",
+                    stacklevel=2,
+                )
+                try:
+                    self._session.sql(f"REMOVE {output_stage_location}").collect()
+                except Exception as e:
+                    raise RuntimeError(
+                        f"OVERWRITE was specified. However, failed to remove existing files in output stage "
+                        f"{output_stage_location}: {e}. Please clear up the existing files manually and retry "
+                        f"the operation."
+                    )
+        else:
+            valid_modes = list(batch_inference_specs.SaveMode)
+            raise ValueError(f"Invalid SaveMode: {output_mode}. Must be one of {valid_modes}")
     def _stream_service_logs(
         self,
         async_job: snowpark.AsyncJob,
@@ -911,6 +970,7 @@ class ServiceOperator:
         max_batch_rows: Optional[int],
         cpu_requests: Optional[str],
         memory_requests: Optional[str],
+        gpu_requests: Optional[str],
         replicas: Optional[int],
         statement_params: Optional[dict[str, Any]] = None,
     ) -> jobs.MLJob[Any]:
@@ -945,6 +1005,7 @@ class ServiceOperator:
             warehouse=warehouse,
             cpu=cpu_requests,
             memory=memory_requests,
+            gpu=gpu_requests,
             replicas=replicas,
         )

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -204,7 +204,7 @@ class ModelDeploymentSpec:
         job_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
         cpu: Optional[str] = None,
         memory: Optional[str] = None,
-        gpu: Optional[Union[str, int]] = None,
+        gpu: Optional[str] = None,
         num_workers: Optional[int] = None,
         max_batch_rows: Optional[int] = None,
         replicas: Optional[int] = None,

snowflake-ml-python 1.14.0__py3-none-any.whl → 1.16.0__py3-none-any.whl

snowflake-ml-python 1.14.0py3-none-any.whl → 1.16.0py3-none-any.whl