PyPI - snowflake-ml-python - Versions diffs - 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

snowflake-ml-python 1.10.0py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -707,6 +707,128 @@ class ModelVersion(lineage_node.LineageNode):
             version_name=sql_identifier.SqlIdentifier(version),
         )
+    def _get_inference_engine_args(
+        self, experimental_options: Optional[dict[str, Any]]
+    ) -> Optional[service_ops.InferenceEngineArgs]:
+        if not experimental_options:
+            return None
+        if "inference_engine" not in experimental_options:
+            raise ValueError("inference_engine is required in experimental_options")
+        return service_ops.InferenceEngineArgs(
+            inference_engine=experimental_options["inference_engine"],
+            inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
+        )
+    def _enrich_inference_engine_args(
+        self,
+        inference_engine_args: service_ops.InferenceEngineArgs,
+        gpu_requests: Optional[Union[str, int]] = None,
+    ) -> Optional[service_ops.InferenceEngineArgs]:
+        """Enrich inference engine args with model path and tensor parallelism settings.
+        Args:
+            inference_engine_args: The original inference engine args
+            gpu_requests: The number of GPUs requested
+        Returns:
+            Enriched inference engine args
+        Raises:
+            ValueError: Invalid gpu_requests
+        """
+        if inference_engine_args.inference_engine_args_override is None:
+            inference_engine_args.inference_engine_args_override = []
+        # Get model stage path and strip off "snow://" prefix
+        model_stage_path = self._model_ops.get_model_version_stage_path(
+            database_name=None,
+            schema_name=None,
+            model_name=self._model_name,
+            version_name=self._version_name,
+        )
+        # Strip "snow://" prefix
+        if model_stage_path.startswith("snow://"):
+            model_stage_path = model_stage_path.replace("snow://", "", 1)
+        # Always overwrite the model key by appending
+        inference_engine_args.inference_engine_args_override.append(f"--model={model_stage_path}")
+        gpu_count = None
+        # Set tensor-parallelism if gpu_requests is specified
+        if gpu_requests is not None:
+            # assert gpu_requests is a string or an integer before casting to int
+            if isinstance(gpu_requests, str) or isinstance(gpu_requests, int):
+                try:
+                    gpu_count = int(gpu_requests)
+                except ValueError:
+                    raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
+        if gpu_count is not None:
+            if gpu_count > 0:
+                inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
+            else:
+                raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
+        return inference_engine_args
+    def _check_huggingface_text_generation_model(
+        self,
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> None:
+        """Check if the model is a HuggingFace pipeline with text-generation task.
+        Args:
+            statement_params: Optional dictionary of statement parameters to include
+                in the SQL command to fetch model spec.
+        Raises:
+            ValueError: If the model is not a HuggingFace text-generation model.
+        """
+        # Fetch model spec
+        model_spec = self._model_ops._fetch_model_spec(
+            database_name=None,
+            schema_name=None,
+            model_name=self._model_name,
+            version_name=self._version_name,
+            statement_params=statement_params,
+        )
+        # Check if model_type is huggingface_pipeline
+        model_type = model_spec.get("model_type")
+        if model_type != "huggingface_pipeline":
+            raise ValueError(
+                f"Inference engine is only supported for HuggingFace text-generation models. "
+                f"Found model_type: {model_type}"
+            )
+        # Check if model supports text-generation task
+        # There should only be one model in the list because we don't support multiple models in a single model spec
+        models = model_spec.get("models", {})
+        is_text_generation = False
+        found_tasks: list[str] = []
+        # As long as the model supports text-generation task, we can use it
+        for _, model_info in models.items():
+            options = model_info.get("options", {})
+            task = options.get("task")
+            if task:
+                found_tasks.append(str(task))
+                if task == "text-generation":
+                    is_text_generation = True
+                    break
+        if not is_text_generation:
+            tasks_str = ", ".join(found_tasks)
+            found_tasks_str = (
+                f"Found task(s): {tasks_str} in model spec." if found_tasks else "No task found in model spec."
+            )
+            raise ValueError(f"Inference engine is only supported for task 'text-generation'. {found_tasks_str}")
     @overload
     def create_service(
         self,
@@ -714,7 +836,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -725,6 +847,7 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integration: Optional[str] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -735,7 +858,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -756,6 +880,10 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         """
         ...
@@ -766,7 +894,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -777,6 +905,7 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -787,7 +916,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -808,6 +938,10 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         """
         ...
@@ -832,7 +966,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -844,6 +978,7 @@ class ModelVersion(lineage_node.LineageNode):
         build_external_access_integration: Optional[str] = None,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -854,7 +989,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -877,6 +1013,11 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is False, this function executes the underlying service creation asynchronously
                 and returns an AsyncJob.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         Raises:
             ValueError: Illegal external access integration arguments.
@@ -885,6 +1026,9 @@ class ModelVersion(lineage_node.LineageNode):
         Returns:
             If `block=True`, return result information about service creation from server.
             Otherwise, return the service creation AsyncJob.
+        Raises:
+            ValueError: Illegal external access integration arguments.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
@@ -906,7 +1050,18 @@ class ModelVersion(lineage_node.LineageNode):
             build_external_access_integrations = [build_external_access_integration]
         service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
-        image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
+        # Check if model is HuggingFace text-generation before doing inference engine checks
+        if experimental_options:
+            self._check_huggingface_text_generation_model(statement_params)
+        inference_engine_args: Optional[service_ops.InferenceEngineArgs] = self._get_inference_engine_args(
+            experimental_options
+        )
+        # Enrich inference engine args if inference engine is specified
+        if inference_engine_args is not None:
+            inference_engine_args = self._enrich_inference_engine_args(inference_engine_args, gpu_requests)
         from snowflake.ml.model import event_handler
         from snowflake.snowpark import exceptions
@@ -929,7 +1084,7 @@ class ModelVersion(lineage_node.LineageNode):
                         else sql_identifier.SqlIdentifier(service_compute_pool)
                     ),
                     service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
-                    image_repo=image_repo,
+                    image_repo_name=image_repo,
                     ingress_enabled=ingress_enabled,
                     max_instances=max_instances,
                     cpu_requests=cpu_requests,
@@ -946,6 +1101,7 @@ class ModelVersion(lineage_node.LineageNode):
                     block=block,
                     statement_params=statement_params,
                     progress_status=status,
+                    inference_engine_args=inference_engine_args,
                 )
                 status.update(label="Model service created successfully", state="complete", expanded=False)
                 return result
@@ -1039,7 +1195,7 @@ class ModelVersion(lineage_node.LineageNode):
         *,
         job_name: str,
         compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         output_table_name: str,
         function_name: Optional[str] = None,
         cpu_requests: Optional[str] = None,
@@ -1074,7 +1230,7 @@ class ModelVersion(lineage_node.LineageNode):
             job_name=job_id,
             compute_pool_name=sql_identifier.SqlIdentifier(compute_pool),
             warehouse_name=sql_identifier.SqlIdentifier(warehouse),
-            image_repo=image_repo,
+            image_repo_name=image_repo,
             output_table_database_name=output_table_db_id,
             output_table_schema_name=output_table_schema_id,
             output_table_name=output_table_id,

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -12,7 +12,11 @@ from typing import Any, Optional, Union, cast
 from snowflake import snowpark
 from snowflake.ml._internal import file_utils, platform_capabilities as pc
 from snowflake.ml._internal.utils import identifier, service_logger, sql_identifier
-from snowflake.ml.model import model_signature, type_hints
+from snowflake.ml.model import (
+    inference_engine as inference_engine_module,
+    model_signature,
+    type_hints,
+)
 from snowflake.ml.model._client.service import model_deployment_spec
 from snowflake.ml.model._client.sql import service as service_sql, stage as stage_sql
 from snowflake.ml.model._signatures import snowpark_handler
@@ -131,6 +135,12 @@ class HFModelArgs:
     warehouse: Optional[str] = None
+@dataclasses.dataclass
+class InferenceEngineArgs:
+    inference_engine: inference_engine_module.InferenceEngine
+    inference_engine_args_override: Optional[list[str]] = None
 class ServiceOperator:
     """Service operator for container services logic."""
@@ -180,7 +190,7 @@ class ServiceOperator:
         service_name: sql_identifier.SqlIdentifier,
         image_build_compute_pool_name: sql_identifier.SqlIdentifier,
         service_compute_pool_name: sql_identifier.SqlIdentifier,
-        image_repo: str,
+        image_repo_name: Optional[str],
         ingress_enabled: bool,
         max_instances: int,
         cpu_requests: Optional[str],
@@ -195,6 +205,8 @@ class ServiceOperator:
         statement_params: Optional[dict[str, Any]] = None,
         # hf model
         hf_model_args: Optional[HFModelArgs] = None,
+        # inference engine model
+        inference_engine_args: Optional[InferenceEngineArgs] = None,
     ) -> Union[str, async_job.AsyncJob]:
         # Generate operation ID for this deployment
@@ -205,15 +217,14 @@ class ServiceOperator:
         schema_name = schema_name or self._schema_name
         # Fall back to the model's database and schema if not provided then to the registry's database and schema
-        service_database_name = service_database_name or database_name or self._database_name
-        service_schema_name = service_schema_name or schema_name or self._schema_name
+        service_database_name = service_database_name or database_name
+        service_schema_name = service_schema_name or schema_name
-        # Parse image repo
-        image_repo_database_name, image_repo_schema_name, image_repo_name = sql_identifier.parse_fully_qualified_name(
-            image_repo
-        )
-        image_repo_database_name = image_repo_database_name or database_name or self._database_name
-        image_repo_schema_name = image_repo_schema_name or schema_name or self._schema_name
+        image_repo_fqn = ServiceOperator._get_image_repo_fqn(image_repo_name, database_name, schema_name)
+        # There may be more conditions to enable image build in the future
+        # For now, we only enable image build if inference engine is not specified
+        is_enable_image_build = inference_engine_args is None
         # Step 1: Preparing deployment artifacts
         progress_status.update("preparing deployment artifacts...")
@@ -230,14 +241,15 @@ class ServiceOperator:
             model_name=model_name,
             version_name=version_name,
         )
-        self._model_deployment_spec.add_image_build_spec(
-            image_build_compute_pool_name=image_build_compute_pool_name,
-            image_repo_database_name=image_repo_database_name,
-            image_repo_schema_name=image_repo_schema_name,
-            image_repo_name=image_repo_name,
-            force_rebuild=force_rebuild,
-            external_access_integrations=build_external_access_integrations,
-        )
+        if is_enable_image_build:
+            self._model_deployment_spec.add_image_build_spec(
+                image_build_compute_pool_name=image_build_compute_pool_name,
+                fully_qualified_image_repo_name=image_repo_fqn,
+                force_rebuild=force_rebuild,
+                external_access_integrations=build_external_access_integrations,
+            )
         self._model_deployment_spec.add_service_spec(
             service_database_name=service_database_name,
             service_schema_name=service_schema_name,
@@ -266,6 +278,13 @@ class ServiceOperator:
                 warehouse=hf_model_args.warehouse,
                 **(hf_model_args.hf_model_kwargs if hf_model_args.hf_model_kwargs else {}),
             )
+        if inference_engine_args:
+            self._model_deployment_spec.add_inference_engine_spec(
+                inference_engine=inference_engine_args.inference_engine,
+                inference_engine_args=inference_engine_args.inference_engine_args_override,
+            )
         spec_yaml_str_or_path = self._model_deployment_spec.save()
         # Step 2: Uploading deployment artifacts
@@ -412,6 +431,29 @@ class ServiceOperator:
         return async_job
+    @staticmethod
+    def _get_image_repo_fqn(
+        image_repo_name: Optional[str],
+        database_name: sql_identifier.SqlIdentifier,
+        schema_name: sql_identifier.SqlIdentifier,
+    ) -> Optional[str]:
+        """Get the fully qualified name of the image repository."""
+        if image_repo_name is None or image_repo_name.strip() == "":
+            return None
+        # Parse image repo
+        (
+            image_repo_database_name,
+            image_repo_schema_name,
+            image_repo_name,
+        ) = sql_identifier.parse_fully_qualified_name(image_repo_name)
+        image_repo_database_name = image_repo_database_name or database_name
+        image_repo_schema_name = image_repo_schema_name or schema_name
+        return identifier.get_schema_level_object_identifier(
+            db=image_repo_database_name.identifier(),
+            schema=image_repo_schema_name.identifier(),
+            object_name=image_repo_name.identifier(),
+        )
     def _start_service_log_streaming(
         self,
         async_job: snowpark.AsyncJob,
@@ -838,7 +880,7 @@ class ServiceOperator:
         job_name: sql_identifier.SqlIdentifier,
         compute_pool_name: sql_identifier.SqlIdentifier,
         warehouse_name: sql_identifier.SqlIdentifier,
-        image_repo: str,
+        image_repo_name: Optional[str],
         output_table_database_name: Optional[sql_identifier.SqlIdentifier],
         output_table_schema_name: Optional[sql_identifier.SqlIdentifier],
         output_table_name: sql_identifier.SqlIdentifier,
@@ -859,12 +901,7 @@ class ServiceOperator:
         job_database_name = job_database_name or database_name or self._database_name
         job_schema_name = job_schema_name or schema_name or self._schema_name
-        # Parse image repo
-        image_repo_database_name, image_repo_schema_name, image_repo_name = sql_identifier.parse_fully_qualified_name(
-            image_repo
-        )
-        image_repo_database_name = image_repo_database_name or database_name or self._database_name
-        image_repo_schema_name = image_repo_schema_name or schema_name or self._schema_name
+        image_repo_fqn = self._get_image_repo_fqn(image_repo_name, database_name, schema_name)
         input_table_database_name = job_database_name
         input_table_schema_name = job_schema_name
@@ -948,9 +985,7 @@ class ServiceOperator:
             self._model_deployment_spec.add_image_build_spec(
                 image_build_compute_pool_name=compute_pool_name,
-                image_repo_database_name=image_repo_database_name,
-                image_repo_schema_name=image_repo_schema_name,
-                image_repo_name=image_repo_name,
+                fully_qualified_image_repo_name=image_repo_fqn,
                 force_rebuild=force_rebuild,
                 external_access_integrations=build_external_access_integrations,
             )

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import json
 import pathlib
+import warnings
 from typing import Any, Optional, Union
 import yaml
 from snowflake.ml._internal.utils import identifier, sql_identifier
+from snowflake.ml.model import inference_engine as inference_engine_module
 from snowflake.ml.model._client.service import model_deployment_spec_schema
@@ -24,6 +26,8 @@ class ModelDeploymentSpec:
         self._service: Optional[model_deployment_spec_schema.Service] = None
         self._job: Optional[model_deployment_spec_schema.Job] = None
         self._model_loggings: Optional[list[model_deployment_spec_schema.ModelLogging]] = None
+        # this is referring to custom inference engine spec (vllm, sglang, etc)
+        self._inference_engine_spec: Optional[model_deployment_spec_schema.InferenceEngineSpec] = None
         self._inference_spec: dict[str, Any] = {}  # Common inference spec for service/job
         self.database: Optional[sql_identifier.SqlIdentifier] = None
@@ -71,10 +75,8 @@ class ModelDeploymentSpec:
     def add_image_build_spec(
         self,
-        image_build_compute_pool_name: sql_identifier.SqlIdentifier,
-        image_repo_name: sql_identifier.SqlIdentifier,
-        image_repo_database_name: Optional[sql_identifier.SqlIdentifier] = None,
-        image_repo_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
+        image_build_compute_pool_name: Optional[sql_identifier.SqlIdentifier] = None,
+        fully_qualified_image_repo_name: Optional[str] = None,
         force_rebuild: bool = False,
         external_access_integrations: Optional[list[sql_identifier.SqlIdentifier]] = None,
     ) -> "ModelDeploymentSpec":
@@ -82,33 +84,29 @@ class ModelDeploymentSpec:
         Args:
             image_build_compute_pool_name: Compute pool for image building.
-            image_repo_name: Name of the image repository.
-            image_repo_database_name: Database name for the image repository.
-            image_repo_schema_name: Schema name for the image repository.
+            fully_qualified_image_repo_name: Fully qualified name of the image repository.
             force_rebuild: Whether to force rebuilding the image.
             external_access_integrations: List of external access integrations.
         Returns:
             Self for chaining.
         """
-        saved_image_repo_database = image_repo_database_name or self.database
-        saved_image_repo_schema = image_repo_schema_name or self.schema
-        assert saved_image_repo_database is not None
-        assert saved_image_repo_schema is not None
-        fq_image_repo_name = identifier.get_schema_level_object_identifier(
-            db=saved_image_repo_database.identifier(),
-            schema=saved_image_repo_schema.identifier(),
-            object_name=image_repo_name.identifier(),
-        )
-        self._image_build = model_deployment_spec_schema.ImageBuild(
-            compute_pool=image_build_compute_pool_name.identifier(),
-            image_repo=fq_image_repo_name,
-            force_rebuild=force_rebuild,
-            external_access_integrations=(
-                [eai.identifier() for eai in external_access_integrations] if external_access_integrations else None
-            ),
-        )
+        if (
+            image_build_compute_pool_name is not None
+            or fully_qualified_image_repo_name is not None
+            or force_rebuild is True
+            or external_access_integrations is not None
+        ):
+            self._image_build = model_deployment_spec_schema.ImageBuild(
+                compute_pool=(
+                    None if image_build_compute_pool_name is None else image_build_compute_pool_name.identifier()
+                ),
+                image_repo=fully_qualified_image_repo_name,
+                force_rebuild=force_rebuild,
+                external_access_integrations=(
+                    [eai.identifier() for eai in external_access_integrations] if external_access_integrations else None
+                ),
+            )
         return self
     def _add_inference_spec(
@@ -363,6 +361,86 @@ class ModelDeploymentSpec:
             self._model_loggings.append(model_logging)
         return self
+    def add_inference_engine_spec(
+        self,
+        inference_engine: inference_engine_module.InferenceEngine,
+        inference_engine_args: Optional[list[str]] = None,
+    ) -> "ModelDeploymentSpec":
+        """Add inference engine specification. This must be called after self.add_service_spec().
+        Args:
+            inference_engine: Inference engine.
+            inference_engine_args: Inference engine arguments.
+        Returns:
+            Self for chaining.
+        Raises:
+            ValueError: If inference engine specification is called before add_service_spec().
+            ValueError: If the argument does not have a '--' prefix.
+        """
+        # TODO: needs to eventually support job deployment spec
+        if self._service is None:
+            raise ValueError("Inference engine specification must be called after add_service_spec().")
+        if inference_engine_args is None:
+            inference_engine_args = []
+        # Validate inference engine
+        if inference_engine == inference_engine_module.InferenceEngine.VLLM:
+            # Block list for VLLM args that should not be user-configurable
+            # make this a set for faster lookup
+            block_list = {
+                "--host",
+                "--port",
+                "--allowed-headers",
+                "--api-key",
+                "--lora-modules",
+                "--prompt-adapter",
+                "--ssl-keyfile",
+                "--ssl-certfile",
+                "--ssl-ca-certs",
+                "--enable-ssl-refresh",
+                "--ssl-cert-reqs",
+                "--root-path",
+                "--middleware",
+                "--disable-frontend-multiprocessing",
+                "--enable-request-id-headers",
+                "--enable-auto-tool-choice",
+                "--tool-call-parser",
+                "--tool-parser-plugin",
+                "--log-config-file",
+            }
+            filtered_args = []
+            for arg in inference_engine_args:
+                # Check if the argument has a '--' prefix
+                if not arg.startswith("--"):
+                    raise ValueError(
+                        f"""The argument {arg} is not allowed for configuration in Snowflake ML's
+                        {inference_engine.value} inference engine. Maybe you forgot to add '--' prefix?""",
+                    )
+                # Filter out blocked args and warn user
+                if arg.split("=")[0] in block_list:
+                    warnings.warn(
+                        f"""The argument {arg} is not allowed for configuration in Snowflake ML's
+                        {inference_engine.value} inference engine. It will be ignored.""",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+                else:
+                    filtered_args.append(arg)
+            inference_engine_args = filtered_args
+        self._service.inference_engine_spec = model_deployment_spec_schema.InferenceEngineSpec(
+            # convert to string to be saved in the deployment spec
+            inference_engine_name=inference_engine.value,
+            inference_engine_args=inference_engine_args,
+        )
+        return self
     def save(self) -> str:
         """Constructs the final deployment spec from added components and saves it.
@@ -377,8 +455,6 @@ class ModelDeploymentSpec:
         # Validations
         if not self._models:
             raise ValueError("Model specification is required. Call add_model_spec().")
-        if not self._image_build:
-            raise ValueError("Image build specification is required. Call add_image_build_spec().")
         if not self._service and not self._job:
             raise ValueError(
                 "Either service or job specification is required. Call add_service_spec() or add_job_spec()."

snowflake-ml-python 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

snowflake-ml-python 1.10.0py3-none-any.whl → 1.11.0py3-none-any.whl