PyPI - snowflake-ml-python - Versions diffs - 1.21.0__py3-none-any.whl → 1.23.0__py3-none-any.whl - Mend

snowflake-ml-python 1.21.0py3-none-any.whl → 1.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -1,11 +1,5 @@
-import json
 import logging
-import os
-import pathlib
-import sys
-from pathlib import PurePath
 from typing import Any, Callable, Optional, TypeVar, Union, cast, overload
-from uuid import uuid4
 import pandas as pd
@@ -13,13 +7,8 @@ from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
 from snowflake.ml.jobs import job as jb
-from snowflake.ml.jobs._utils import (
-    constants,
-    feature_flags,
-    payload_utils,
-    query_helper,
-    types,
-)
+from snowflake.ml.jobs._utils import query_helper
+from snowflake.ml.jobs.job_definition import MLJobDefinition
 from snowflake.snowpark.context import get_active_session
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.functions import coalesce, col, lit, when
@@ -457,7 +446,6 @@ def _submit_job(
         An object representing the submitted job.
     Raises:
-        ValueError: If database or schema value(s) are invalid
         RuntimeError: If schema is not specified in session context or job submission
     """
     session = _ensure_session(session)
@@ -469,94 +457,30 @@ def _submit_job(
         )
         target_instances = max(target_instances, kwargs.pop("num_instances"))
-    imports = None
     if "additional_payloads" in kwargs:
         logger.warning(
             "'additional_payloads' is deprecated and will be removed in a future release. Use 'imports' instead."
         )
-        imports = kwargs.pop("additional_payloads")
+        if "imports" not in kwargs:
+            imports = kwargs.pop("additional_payloads", None)
+            kwargs.update({"imports": imports})
     if "runtime_environment" in kwargs:
         logger.warning("'runtime_environment' is in private preview since 1.15.0, do not use it in production.")
-    # Use kwargs for less common optional parameters
-    database = kwargs.pop("database", None)
-    schema = kwargs.pop("schema", None)
-    min_instances = kwargs.pop("min_instances", target_instances)
-    pip_requirements = kwargs.pop("pip_requirements", None)
-    external_access_integrations = kwargs.pop("external_access_integrations", None)
-    env_vars = kwargs.pop("env_vars", None)
-    spec_overrides = kwargs.pop("spec_overrides", None)
-    enable_metrics = kwargs.pop("enable_metrics", True)
-    query_warehouse = kwargs.pop("query_warehouse", session.get_current_warehouse())
-    imports = kwargs.pop("imports", None) or imports
-    # if the mljob is submitted from a notebook, we use the same image tag as the notebook
-    runtime_environment = kwargs.pop("runtime_environment", os.environ.get(constants.RUNTIME_IMAGE_TAG_ENV_VAR, None))
-    # Warn if there are unknown kwargs
-    if kwargs:
-        logger.warning(f"Ignoring unknown kwargs: {kwargs.keys()}")
-    # Validate parameters
-    if database and not schema:
-        raise ValueError("Schema must be specified if database is specified.")
-    if target_instances < 1:
-        raise ValueError("target_instances must be greater than 0.")
-    if not (0 < min_instances <= target_instances):
-        raise ValueError("min_instances must be greater than 0 and less than or equal to target_instances.")
-    if min_instances > 1:
-        # Validate min_instances against compute pool max_nodes
-        pool_info = jb._get_compute_pool_info(session, compute_pool)
-        max_nodes = int(pool_info["max_nodes"])
-        if min_instances > max_nodes:
-            raise ValueError(
-                f"The requested min_instances ({min_instances}) exceeds the max_nodes ({max_nodes}) "
-                f"of compute pool '{compute_pool}'. Reduce min_instances or increase max_nodes."
-            )
-    job_name = f"{JOB_ID_PREFIX}{str(uuid4()).replace('-', '_').upper()}"
-    job_id = identifier.get_schema_level_object_identifier(database, schema, job_name)
-    stage_path_parts = identifier.parse_snowflake_stage_path(stage_name.lstrip("@"))
-    stage_name = f"@{'.'.join(filter(None, stage_path_parts[:3]))}"
-    stage_path = pathlib.PurePosixPath(f"{stage_name}{stage_path_parts[-1].rstrip('/')}/{job_name}")
-    try:
-        # Upload payload
-        uploaded_payload = payload_utils.JobPayload(
-            source, entrypoint=entrypoint, pip_requirements=pip_requirements, imports=imports
-        ).upload(session, stage_path)
-    except SnowparkSQLException as e:
-        if e.sql_error_code == 90106:
-            raise RuntimeError(
-                "Please specify a schema, either in the session context or as a parameter in the job submission"
-            )
-        elif e.sql_error_code == 3001 and "schema" in str(e).lower():
-            raise RuntimeError(
-                "please grant privileges on schema before submitting a job, see",
-                "https://docs.snowflake.com/en/developer-guide/snowflake-ml/ml-jobs/access-control-requirements",
-                " for more details",
-            ) from e
-        raise
-    combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
+    job_definition = MLJobDefinition.register(
+        source,
+        compute_pool,
+        stage_name,
+        session or get_active_session(),
+        entrypoint,
+        target_instances,
+        generate_suffix=True,
+        **kwargs,
+    )
     try:
-        return _do_submit_job(
-            session=session,
-            payload=uploaded_payload,
-            args=args,
-            env_vars=combined_env_vars,
-            spec_overrides=spec_overrides,
-            compute_pool=compute_pool,
-            job_id=job_id,
-            external_access_integrations=external_access_integrations,
-            query_warehouse=query_warehouse,
-            target_instances=target_instances,
-            min_instances=min_instances,
-            enable_metrics=enable_metrics,
-            use_async=True,
-            runtime_environment=runtime_environment,
-        )
+        return job_definition(*(args or []))
     except SnowparkSQLException as e:
         if e.sql_error_code == 3001 and "schema" in str(e).lower():
             raise RuntimeError(
@@ -567,91 +491,6 @@ def _submit_job(
         raise
-def _do_submit_job(
-    session: snowpark.Session,
-    payload: types.UploadedPayload,
-    args: Optional[list[str]],
-    env_vars: dict[str, str],
-    spec_overrides: dict[str, Any],
-    compute_pool: str,
-    job_id: Optional[str] = None,
-    external_access_integrations: Optional[list[str]] = None,
-    query_warehouse: Optional[str] = None,
-    target_instances: int = 1,
-    min_instances: int = 1,
-    enable_metrics: bool = True,
-    use_async: bool = True,
-    runtime_environment: Optional[str] = None,
-) -> jb.MLJob[Any]:
-    """
-    Generate the SQL query for job submission.
-    Args:
-        session: The Snowpark session to use.
-        payload: The uploaded job payload.
-        args: Arguments to pass to the entrypoint script.
-        env_vars: Environment variables to set in the job container.
-        spec_overrides: Custom service specification overrides.
-        compute_pool: The compute pool to use for job execution.
-        job_id: The ID of the job.
-        external_access_integrations: Optional list of external access integrations.
-        query_warehouse: Optional query warehouse to use.
-        target_instances: Number of instances for multi-node job.
-        min_instances: Minimum number of instances required to start the job.
-        enable_metrics: Whether to enable platform metrics for the job.
-        use_async: Whether to run the job asynchronously.
-        runtime_environment: image tag or full image URL to use for the job.
-    Returns:
-        The job object.
-    """
-    args = [(v.as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint] + (args or [])
-    spec_options = {
-        "STAGE_PATH": payload.stage_path.as_posix(),
-        "ENTRYPOINT": ["/usr/local/bin/_entrypoint.sh"],
-        "ARGS": args,
-        "ENV_VARS": env_vars,
-        "ENABLE_METRICS": enable_metrics,
-        "SPEC_OVERRIDES": spec_overrides,
-    }
-    if runtime_environment:
-        # for the image tag or full image URL, we use that directly
-        spec_options["RUNTIME"] = runtime_environment
-    elif feature_flags.FeatureFlags.ENABLE_RUNTIME_VERSIONS.is_enabled():
-        # when feature flag is enabled, we get the local python version and wrap it in a dict
-        # in system function, we can know whether it is python version or image tag or full image URL through the format
-        spec_options["RUNTIME"] = json.dumps({"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}"})
-    job_options = {
-        "EXTERNAL_ACCESS_INTEGRATIONS": external_access_integrations,
-        "QUERY_WAREHOUSE": query_warehouse,
-        "TARGET_INSTANCES": target_instances,
-        "MIN_INSTANCES": min_instances,
-        "ASYNC": use_async,
-    }
-    if feature_flags.FeatureFlags.ENABLE_STAGE_MOUNT_V2.is_enabled(default=True):
-        spec_options["ENABLE_STAGE_MOUNT_V2"] = True
-    if payload.payload_name:
-        job_options["GENERATE_SUFFIX"] = True
-    job_options = {k: v for k, v in job_options.items() if v is not None}
-    query_template = "CALL SYSTEM$EXECUTE_ML_JOB(?, ?, ?, ?)"
-    if job_id:
-        database, schema, _ = identifier.parse_schema_level_object_identifier(job_id)
-    params = [
-        job_id
-        if payload.payload_name is None
-        else identifier.get_schema_level_object_identifier(database, schema, payload.payload_name) + "_",
-        compute_pool,
-        json.dumps(spec_options),
-        json.dumps(job_options),
-    ]
-    actual_job_id = query_helper.run_query(session, query_template, params=params)[0][0]
-    return get_job(actual_job_id, session=session)
 def _ensure_session(session: Optional[snowpark.Session]) -> snowpark.Session:
     try:
         session = session or get_active_session()

snowflake/ml/lineage/lineage_node.py CHANGED Viewed

@@ -7,7 +7,7 @@ from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier, mixins
 if TYPE_CHECKING:
-    from snowflake.ml import dataset
+    from snowflake.ml.dataset import dataset
     from snowflake.ml.feature_store import feature_view
     from snowflake.ml.model._client.model import model_version_impl

snowflake/ml/model/__init__.py CHANGED Viewed

@@ -2,16 +2,20 @@ import sys
 import warnings
 from snowflake.ml.model._client.model.batch_inference_specs import (
+    ColumnHandlingOptions,
+    FileEncoding,
     JobSpec,
     OutputSpec,
     SaveMode,
 )
 from snowflake.ml.model._client.model.model_impl import Model
 from snowflake.ml.model._client.model.model_version_impl import ExportMode, ModelVersion
+from snowflake.ml.model.code_path import CodePath
 from snowflake.ml.model.models.huggingface_pipeline import HuggingFacePipelineModel
 from snowflake.ml.model.volatility import Volatility
 __all__ = [
+    "CodePath",
     "Model",
     "ModelVersion",
     "ExportMode",
@@ -20,6 +24,8 @@ __all__ = [
     "OutputSpec",
     "SaveMode",
     "Volatility",
+    "FileEncoding",
+    "ColumnHandlingOptions",
 ]
 _deprecation_warning_msg_for_3_9 = (

snowflake/ml/model/_client/model/batch_inference_specs.py CHANGED Viewed

@@ -2,6 +2,7 @@ from enum import Enum
 from typing import Optional
 from pydantic import BaseModel
+from typing_extensions import TypedDict
 class SaveMode(str, Enum):
@@ -18,6 +19,20 @@ class SaveMode(str, Enum):
     ERROR = "error"
+class FileEncoding(str, Enum):
+    """The encoding of the file content that will be passed to the custom model."""
+    RAW_BYTES = "raw_bytes"
+    BASE64 = "base64"
+    BASE64_DATA_URL = "base64_data_url"
+class ColumnHandlingOptions(TypedDict):
+    """Options for handling specific columns during run_batch for file I/O."""
+    encoding: FileEncoding
 class OutputSpec(BaseModel):
     """Specification for batch inference output.
@@ -74,7 +89,7 @@ class JobSpec(BaseModel):
             the memory of the node.
         gpu_requests (Optional[str]): The gpu limit for GPU based inference. Can be integer or
             string values. Use CPU if None.
-        replicas (Optional[int]): Number of job replicas to run for high availability.
+        replicas (Optional[int]): Number of SPCS job nodes used for distributed inference.
             If not specified, defaults to 1 replica.
     Example:

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -30,6 +30,10 @@ _TELEMETRY_PROJECT = "MLOps"
 _TELEMETRY_SUBPROJECT = "ModelManagement"
 _BATCH_INFERENCE_JOB_ID_PREFIX = "BATCH_INFERENCE_"
 _BATCH_INFERENCE_TEMPORARY_FOLDER = "_temporary"
+VLLM_SUPPORTED_TASKS = [
+    "text-generation",
+    "image-text-to-text",
+]
 class ExportMode(enum.Enum):
@@ -495,6 +499,7 @@ class ModelVersion(lineage_node.LineageNode):
         function_name: Optional[str] = None,
         partition_column: Optional[str] = None,
         strict_input_validation: bool = False,
+        params: Optional[dict[str, Any]] = None,
     ) -> Union[pd.DataFrame, dataframe.DataFrame]:
         """Invoke a method in a model version object.
@@ -505,6 +510,8 @@ class ModelVersion(lineage_node.LineageNode):
             partition_column: The partition column name to partition by.
             strict_input_validation: Enable stricter validation for the input data. This will result value range based
                 type validation to make sure your input data won't overflow when providing to the model.
+            params: Optional dictionary of model inference parameters (e.g., temperature, top_k for LLMs).
+                These are passed as keyword arguments to the model's inference method. Defaults to None.
         """
         ...
@@ -516,6 +523,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         function_name: Optional[str] = None,
         strict_input_validation: bool = False,
+        params: Optional[dict[str, Any]] = None,
     ) -> Union[pd.DataFrame, dataframe.DataFrame]:
         """Invoke a method in a model version object via a service.
@@ -525,6 +533,8 @@ class ModelVersion(lineage_node.LineageNode):
             function_name: The function name to run. It is the name used to call a function in SQL.
             strict_input_validation: Enable stricter validation for the input data. This will result value range based
                 type validation to make sure your input data won't overflow when providing to the model.
+            params: Optional dictionary of model inference parameters (e.g., temperature, top_k for LLMs).
+                These are passed as keyword arguments to the model's inference method. Defaults to None.
         """
         ...
@@ -541,6 +551,7 @@ class ModelVersion(lineage_node.LineageNode):
         function_name: Optional[str] = None,
         partition_column: Optional[str] = None,
         strict_input_validation: bool = False,
+        params: Optional[dict[str, Any]] = None,
     ) -> Union[pd.DataFrame, "dataframe.DataFrame"]:
         """Invoke a method in a model version object via the warehouse or a service.
@@ -552,6 +563,8 @@ class ModelVersion(lineage_node.LineageNode):
             partition_column: The partition column name to partition by.
             strict_input_validation: Enable stricter validation for the input data. This will result value range based
                 type validation to make sure your input data won't overflow when providing to the model.
+            params: Optional dictionary of model inference parameters (e.g., temperature, top_k for LLMs).
+                These are passed as keyword arguments to the model's inference method. Defaults to None.
         Returns:
             The prediction data. It would be the same type dataframe as your input.
@@ -582,6 +595,7 @@ class ModelVersion(lineage_node.LineageNode):
                 service_name=service_name_id,
                 strict_input_validation=strict_input_validation,
                 statement_params=statement_params,
+                params=params,
             )
         else:
             manifest = self._get_model_manifest(statement_params=statement_params)
@@ -621,6 +635,7 @@ class ModelVersion(lineage_node.LineageNode):
                 statement_params=statement_params,
                 is_partitioned=target_function_info["is_partitioned"],
                 explain_case_sensitive=explain_case_sensitive,
+                params=params,
             )
     def _determine_explain_case_sensitivity(
@@ -651,6 +666,9 @@ class ModelVersion(lineage_node.LineageNode):
         input_spec: dataframe.DataFrame,
         output_spec: batch_inference_specs.OutputSpec,
         job_spec: Optional[batch_inference_specs.JobSpec] = None,
+        params: Optional[dict[str, Any]] = None,
+        column_handling: Optional[dict[str, batch_inference_specs.ColumnHandlingOptions]] = None,
+        inference_engine_options: Optional[dict[str, Any]] = None,
     ) -> job.MLJob[Any]:
         """Execute batch inference on datasets as an SPCS job.
@@ -664,6 +682,16 @@ class ModelVersion(lineage_node.LineageNode):
             job_spec (Optional[batch_inference_specs.JobSpec]): Optional configuration for job
                 execution parameters such as compute resources, worker counts, and job naming.
                 If None, default values will be used.
+            params (Optional[dict[str, Any]]): Optional dictionary of model inference parameters
+                (e.g., temperature, top_k for LLMs). These are passed as keyword arguments to the
+                model's inference method. Defaults to None.
+            column_handling (Optional[dict[str, batch_inference_specs.FileEncoding]]): Optional dictionary
+                specifying how to handle specific columns during file I/O. Maps column names to their
+                file encoding configuration.
+            inference_engine_options: Options for the service creation with custom inference engine.
+                Supports `engine` and `engine_args_override`.
+                `engine` is the type of the inference engine to use.
+                `engine_args_override` is a list of string arguments to pass to the inference engine.
         Returns:
             job.MLJob[Any]: A batch inference job object that can be used to monitor progress and manage the job
@@ -722,6 +750,15 @@ class ModelVersion(lineage_node.LineageNode):
         if job_spec is None:
             job_spec = batch_inference_specs.JobSpec()
+        # Validate GPU support if GPU resources are requested
+        self._throw_error_if_gpu_is_not_supported(job_spec.gpu_requests, statement_params)
+        inference_engine_args = self._prepare_inference_engine_args(
+            inference_engine_options,
+            job_spec.gpu_requests,
+            statement_params,
+        )
         warehouse = job_spec.warehouse or self._service_ops._session.get_current_warehouse()
         if warehouse is None:
             raise ValueError("Warehouse is not set. Please set the warehouse field in the JobSpec.")
@@ -746,12 +783,14 @@ class ModelVersion(lineage_node.LineageNode):
         else:
             job_name = job_spec.job_name
+        target_function_info = self._get_function_info(function_name=job_spec.function_name)
         return self._service_ops.invoke_batch_job_method(
             # model version info
             model_name=self._model_name,
             version_name=self._version_name,
             # job spec
-            function_name=self._get_function_info(function_name=job_spec.function_name)["target_method"],
+            function_name=target_function_info["target_method"],
             compute_pool_name=sql_identifier.SqlIdentifier(compute_pool),
             force_rebuild=job_spec.force_rebuild,
             image_repo_name=job_spec.image_repo,
@@ -766,10 +805,14 @@ class ModelVersion(lineage_node.LineageNode):
             # input and output
             input_stage_location=input_stage_location,
             input_file_pattern="*",
+            column_handling=column_handling,
+            params=params,
+            signature_params=target_function_info["signature"].params,
             output_stage_location=output_stage_location,
             completion_filename="_SUCCESS",
             # misc
             statement_params=statement_params,
+            inference_engine_args=inference_engine_args,
         )
     def _get_function_info(self, function_name: Optional[str]) -> model_manifest_schema.ModelFunctionInfo:
@@ -985,20 +1028,55 @@ class ModelVersion(lineage_node.LineageNode):
                 " the `log_model` function."
             )
-    def _check_huggingface_text_generation_model(
+    def _prepare_inference_engine_args(
+        self,
+        inference_engine_options: Optional[dict[str, Any]],
+        gpu_requests: Optional[Union[str, int]],
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> Optional[service_ops.InferenceEngineArgs]:
+        """Prepare and validate inference engine arguments.
+        This method handles the common logic for processing inference engine options:
+        1. Parse inference engine options into InferenceEngineArgs
+        2. Validate that the model is a HuggingFace text-generation model (if inference engine is specified)
+        3. Enrich inference engine args
+        Args:
+            inference_engine_options: Optional dictionary containing inference engine configuration.
+            gpu_requests: GPU resource request string (e.g., "4").
+            statement_params: Optional dictionary of statement parameters for SQL commands.
+        Returns:
+            Prepared InferenceEngineArgs or None if no inference engine is specified.
+        """
+        inference_engine_args = inference_engine_utils._get_inference_engine_args(inference_engine_options)
+        if inference_engine_args is not None:
+            # Validate that model is HuggingFace vLLM supported model and is logged with
+            # OpenAI compatible signature.
+            self._check_huggingface_vllm_supported_model(statement_params)
+            # Enrich with GPU configuration
+            inference_engine_args = inference_engine_utils._enrich_inference_engine_args(
+                inference_engine_args,
+                gpu_requests,
+            )
+        return inference_engine_args
+    def _check_huggingface_vllm_supported_model(
         self,
         statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
-        """Check if the model is a HuggingFace pipeline with text-generation task
-        and is logged with OPENAI_CHAT_SIGNATURE.
+        """Check if the model is a HuggingFace pipeline with vLLM supported task
+        and is logged with OpenAI compatible signature.
         Args:
             statement_params: Optional dictionary of statement parameters to include
                 in the SQL command to fetch model spec.
         Raises:
-            ValueError: If the model is not a HuggingFace text-generation model or
-                if the model is not logged with OPENAI_CHAT_SIGNATURE.
+            ValueError: If the model is not a HuggingFace vLLM supported model or
+                if the model is not logged with OpenAI compatible signature.
         """
         # Fetch model spec
         model_spec = self._get_model_spec(statement_params)
@@ -1007,34 +1085,37 @@ class ModelVersion(lineage_node.LineageNode):
         model_type = model_spec.get("model_type")
         if model_type != "huggingface_pipeline":
             raise ValueError(
-                f"Inference engine is only supported for HuggingFace text-generation models. "
+                f"Inference engine is only supported for HuggingFace vLLM supported models. "
                 f"Found model_type: {model_type}"
             )
-        # Check if model supports text-generation task
+        # Check if model supports vLLM supported task
         # There should only be one model in the list because we don't support multiple models in a single model spec
         models = model_spec.get("models", {})
-        is_text_generation = False
+        is_vllm_supported_task = False
         found_tasks: list[str] = []
-        # As long as the model supports text-generation task, we can use it
+        # As long as the model supports vLLM supported task, we can use it
         for _, model_info in models.items():
             options = model_info.get("options", {})
             task = options.get("task")
             if task:
                 found_tasks.append(str(task))
-                if task == "text-generation":
-                    is_text_generation = True
+                if task in VLLM_SUPPORTED_TASKS:
+                    is_vllm_supported_task = True
                     break
-        if not is_text_generation:
+        if not is_vllm_supported_task:
             tasks_str = ", ".join(found_tasks)
             found_tasks_str = (
                 f"Found task(s): {tasks_str} in model spec." if found_tasks else "No task found in model spec."
             )
-            raise ValueError(f"Inference engine is only supported for task 'text-generation'. {found_tasks_str}")
+            supported_tasks_str = ", ".join(VLLM_SUPPORTED_TASKS)
+            raise ValueError(
+                f"Inference engine is only supported for vLLM supported tasks. {supported_tasks_str}. {found_tasks_str}"
+            )
-        # Check if the model is logged with OPENAI_CHAT_SIGNATURE
+        # Check if the model is logged with OpenAI compatible signature.
         signatures_dict = model_spec.get("signatures", {})
         # Deserialize signatures from model spec to ModelSignature objects for proper semantic comparison.
@@ -1042,11 +1123,16 @@ class ModelVersion(lineage_node.LineageNode):
             func_name: core.ModelSignature.from_dict(sig_dict) for func_name, sig_dict in signatures_dict.items()
         }
-        if deserialized_signatures != openai_signatures.OPENAI_CHAT_SIGNATURE:
+        if deserialized_signatures not in [
+            openai_signatures.OPENAI_CHAT_SIGNATURE,
+            openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING,
+        ]:
             raise ValueError(
-                "Inference engine requires the model to be logged with OPENAI_CHAT_SIGNATURE. "
+                "Inference engine requires the model to be logged with openai_signatures.OPENAI_CHAT_SIGNATURE or "
+                "openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING. "
                 f"Found signatures: {signatures_dict}. "
-                "Please log the model with: signatures=openai_signatures.OPENAI_CHAT_SIGNATURE"
+                "Please log the model again with: signatures=openai_signatures.OPENAI_CHAT_SIGNATURE or "
+                "signatures=openai_signatures.OPENAI_CHAT_SIGNATURE_WITH_CONTENT_FORMAT_STRING"
             )
     @overload
@@ -1287,20 +1373,11 @@ class ModelVersion(lineage_node.LineageNode):
         # Validate GPU support if GPU resources are requested
         self._throw_error_if_gpu_is_not_supported(gpu_requests, statement_params)
-        inference_engine_args = inference_engine_utils._get_inference_engine_args(inference_engine_options)
-        # Check if model is HuggingFace text-generation and is logged with
-        # OPENAI_CHAT_SIGNATURE before doing inference engine checks
-        # Only validate if inference engine is actually specified
-        if inference_engine_args is not None:
-            self._check_huggingface_text_generation_model(statement_params)
-        # Enrich inference engine args if inference engine is specified
-        if inference_engine_args is not None:
-            inference_engine_args = inference_engine_utils._enrich_inference_engine_args(
-                inference_engine_args,
-                gpu_requests,
-            )
+        inference_engine_args = self._prepare_inference_engine_args(
+            inference_engine_options,
+            gpu_requests,
+            statement_params,
+        )
         from snowflake.ml.model import event_handler
         from snowflake.snowpark import exceptions

snowflake/ml/model/_client/ops/deployment_step.py ADDED Viewed

@@ -0,0 +1,36 @@
+import enum
+import hashlib
+from typing import Optional
+class DeploymentStep(enum.Enum):
+    MODEL_BUILD = ("model-build", "model_build_")
+    MODEL_INFERENCE = ("model-inference", None)
+    MODEL_LOGGING = ("model-logging", "model_logging_")
+    def __init__(self, container_name: str, service_name_prefix: Optional[str]) -> None:
+        self._container_name = container_name
+        self._service_name_prefix = service_name_prefix
+    @property
+    def container_name(self) -> str:
+        """Get the container name for the deployment step."""
+        return self._container_name
+    @property
+    def service_name_prefix(self) -> Optional[str]:
+        """Get the service name prefix for the deployment step."""
+        return self._service_name_prefix
+def get_service_id_from_deployment_step(query_id: str, deployment_step: DeploymentStep) -> str:
+    """Get the service ID through the server-side logic."""
+    uuid = query_id.replace("-", "")
+    big_int = int(uuid, 16)
+    md5_hash = hashlib.md5(str(big_int).encode(), usedforsecurity=False).hexdigest()
+    identifier = md5_hash[:8]
+    service_name_prefix = deployment_step.service_name_prefix
+    if service_name_prefix is None:
+        # raise an exception if the service name prefix is None
+        raise ValueError(f"Service name prefix is {service_name_prefix} for deployment step {deployment_step}.")
+    return (service_name_prefix + identifier).upper()

snowflake-ml-python 1.21.0__py3-none-any.whl → 1.23.0__py3-none-any.whl

snowflake-ml-python 1.21.0py3-none-any.whl → 1.23.0py3-none-any.whl