PyPI - snowflake-ml-python - Versions diffs - 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl - Mend

snowflake-ml-python 1.19.0py3-none-any.whl → 1.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

snowflake/ml/_internal/env_utils.py +16 -0
snowflake/ml/_internal/platform_capabilities.py +36 -0
snowflake/ml/_internal/telemetry.py +56 -7
snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
snowflake/ml/data/data_connector.py +103 -1
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
snowflake/ml/experiment/_entities/run.py +15 -0
snowflake/ml/experiment/callback/keras.py +25 -2
snowflake/ml/experiment/callback/lightgbm.py +27 -2
snowflake/ml/experiment/callback/xgboost.py +25 -2
snowflake/ml/experiment/experiment_tracking.py +123 -13
snowflake/ml/experiment/utils.py +6 -0
snowflake/ml/feature_store/access_manager.py +1 -0
snowflake/ml/feature_store/feature_store.py +1 -1
snowflake/ml/feature_store/feature_view.py +34 -24
snowflake/ml/jobs/_interop/protocols.py +3 -0
snowflake/ml/jobs/_utils/feature_flags.py +1 -0
snowflake/ml/jobs/_utils/payload_utils.py +360 -357
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
snowflake/ml/jobs/_utils/spec_utils.py +2 -406
snowflake/ml/jobs/_utils/stage_utils.py +22 -1
snowflake/ml/jobs/_utils/types.py +14 -7
snowflake/ml/jobs/job.py +8 -9
snowflake/ml/jobs/manager.py +64 -129
snowflake/ml/model/_client/model/inference_engine_utils.py +8 -4
snowflake/ml/model/_client/model/model_version_impl.py +109 -28
snowflake/ml/model/_client/ops/model_ops.py +32 -6
snowflake/ml/model/_client/ops/service_ops.py +9 -4
snowflake/ml/model/_client/sql/service.py +69 -2
snowflake/ml/model/_packager/model_handler.py +8 -2
snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
snowflake/ml/model/_signatures/core.py +305 -8
snowflake/ml/model/_signatures/utils.py +13 -4
snowflake/ml/model/compute_pool.py +2 -0
snowflake/ml/model/models/huggingface.py +285 -0
snowflake/ml/model/models/huggingface_pipeline.py +25 -215
snowflake/ml/model/type_hints.py +5 -1
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
snowflake/ml/utils/html_utils.py +67 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/METADATA +94 -7
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/RECORD +52 -48
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.19.0.dist-info → snowflake_ml_python-1.21.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -1,27 +1,25 @@
 import json
 import logging
+import os
 import pathlib
 import sys
-import textwrap
 from pathlib import PurePath
 from typing import Any, Callable, Optional, TypeVar, Union, cast, overload
 from uuid import uuid4
 import pandas as pd
-import yaml
 from snowflake import snowpark
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import identifier
 from snowflake.ml.jobs import job as jb
 from snowflake.ml.jobs._utils import (
+    constants,
     feature_flags,
     payload_utils,
     query_helper,
-    spec_utils,
     types,
 )
-from snowflake.snowpark._internal import utils as sp_utils
 from snowflake.snowpark.context import get_active_session
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.functions import coalesce, col, lit, when
@@ -259,7 +257,7 @@ def submit_directory(
     dir_path: str,
     compute_pool: str,
     *,
-    entrypoint: str,
+    entrypoint: Union[str, list[str]],
     stage_name: str,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
@@ -274,7 +272,11 @@ def submit_directory(
     Args:
         dir_path: The path to the directory containing the job payload.
         compute_pool: The compute pool to use for the job.
-        entrypoint: The relative path to the entry point script inside the source directory.
+        entrypoint: The entry point for job execution. Can be:
+            - A string path to the entry point script inside the source directory.
+            - A list of strings representing a custom command (e.g., ["arctic_training"])
+              which is passed through as-is without local resolution or validation.
+              This is useful for entrypoints that are installed via pip_requirements.
         stage_name: The name of the stage where the job payload will be uploaded.
         args: A list of arguments to pass to the job.
         target_instances: The number of nodes in the job. If none specified, create a single node job.
@@ -315,7 +317,7 @@ def submit_from_stage(
     source: str,
     compute_pool: str,
     *,
-    entrypoint: str,
+    entrypoint: Union[str, list[str]],
     stage_name: str,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
@@ -330,7 +332,11 @@ def submit_from_stage(
     Args:
         source: a stage path or a stage containing the job payload.
         compute_pool: The compute pool to use for the job.
-        entrypoint: a stage path containing the entry point script inside the source directory.
+        entrypoint: The entry point for job execution. Can be:
+            - A string path to the entry point script inside the source directory.
+            - A list of strings representing a custom command (e.g., ["arctic_training"])
+              which is passed through as-is without local resolution or validation.
+              This is useful for entrypoints that are installed via pip_requirements.
         stage_name: The name of the stage where the job payload will be uploaded.
         args: A list of arguments to pass to the job.
         target_instances: The number of nodes in the job. If none specified, create a single node job.
@@ -375,7 +381,7 @@ def _submit_job(
     compute_pool: str,
     *,
     stage_name: str,
-    entrypoint: Optional[str] = None,
+    entrypoint: Optional[Union[str, list[str]]] = None,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
     pip_requirements: Optional[list[str]] = None,
@@ -392,7 +398,7 @@ def _submit_job(
     compute_pool: str,
     *,
     stage_name: str,
-    entrypoint: Optional[str] = None,
+    entrypoint: Optional[Union[str, list[str]]] = None,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
     pip_requirements: Optional[list[str]] = None,
@@ -424,7 +430,7 @@ def _submit_job(
     compute_pool: str,
     *,
     stage_name: str,
-    entrypoint: Optional[str] = None,
+    entrypoint: Optional[Union[str, list[str]]] = None,
     args: Optional[list[str]] = None,
     target_instances: int = 1,
     session: Optional[snowpark.Session] = None,
@@ -437,7 +443,11 @@ def _submit_job(
         source: The file/directory path containing payload source code or a serializable Python callable.
         compute_pool: The compute pool to use for the job.
         stage_name: The name of the stage where the job payload will be uploaded.
-        entrypoint: The entry point for the job execution. Required if source is a directory.
+        entrypoint: The entry point for the job execution. Can be:
+            - A string path to a Python script (required if source is a directory).
+            - A list of strings representing a custom command (e.g., ["arctic_training"])
+              which is passed through as-is without local resolution or validation.
+              This is useful for entrypoints that are installed via pip_requirements.
         args: A list of arguments to pass to the job.
         target_instances: The number of instances to use for the job. If none specified, single node job is created.
         session: The Snowpark session to use. If none specified, uses active session.
@@ -449,7 +459,6 @@ def _submit_job(
     Raises:
         ValueError: If database or schema value(s) are invalid
         RuntimeError: If schema is not specified in session context or job submission
-        SnowparkSQLException: if failed to upload payload
     """
     session = _ensure_session(session)
@@ -481,7 +490,8 @@ def _submit_job(
     enable_metrics = kwargs.pop("enable_metrics", True)
     query_warehouse = kwargs.pop("query_warehouse", session.get_current_warehouse())
     imports = kwargs.pop("imports", None) or imports
-    runtime_environment = kwargs.pop("runtime_environment", None)
+    # if the mljob is submitted from a notebook, we use the same image tag as the notebook
+    runtime_environment = kwargs.pop("runtime_environment", os.environ.get(constants.RUNTIME_IMAGE_TAG_ENV_VAR, None))
     # Warn if there are unknown kwargs
     if kwargs:
@@ -513,128 +523,51 @@ def _submit_job(
     try:
         # Upload payload
         uploaded_payload = payload_utils.JobPayload(
-            source, entrypoint=entrypoint, pip_requirements=pip_requirements, additional_payloads=imports
+            source, entrypoint=entrypoint, pip_requirements=pip_requirements, imports=imports
         ).upload(session, stage_path)
     except SnowparkSQLException as e:
         if e.sql_error_code == 90106:
             raise RuntimeError(
                 "Please specify a schema, either in the session context or as a parameter in the job submission"
             )
+        elif e.sql_error_code == 3001 and "schema" in str(e).lower():
+            raise RuntimeError(
+                "please grant privileges on schema before submitting a job, see",
+                "https://docs.snowflake.com/en/developer-guide/snowflake-ml/ml-jobs/access-control-requirements",
+                " for more details",
+            ) from e
         raise
-    if feature_flags.FeatureFlags.USE_SUBMIT_JOB_V2.is_enabled(default=True):
-        # Add default env vars (extracted from spec_utils.generate_service_spec)
-        combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
-        try:
-            return _do_submit_job_v2(
-                session=session,
-                payload=uploaded_payload,
-                args=args,
-                env_vars=combined_env_vars,
-                spec_overrides=spec_overrides,
-                compute_pool=compute_pool,
-                job_id=job_id,
-                external_access_integrations=external_access_integrations,
-                query_warehouse=query_warehouse,
-                target_instances=target_instances,
-                min_instances=min_instances,
-                enable_metrics=enable_metrics,
-                use_async=True,
-                runtime_environment=runtime_environment,
-            )
-        except SnowparkSQLException as e:
-            if not (e.sql_error_code == 90237 and sp_utils.is_in_stored_procedure()):  # type: ignore[no-untyped-call]
-                raise
-            # SNOW-2390287: SYSTEM$EXECUTE_ML_JOB() is erroneously blocked in owner's rights
-            # stored procedures. This will be fixed in an upcoming release.
-            logger.warning(
-                "Job submission using V2 failed with error {}. Falling back to V1.".format(
-                    str(e).split("\n", 1)[0],
-                )
-            )
-    # Fall back to v1
-    # Generate service spec
-    spec = spec_utils.generate_service_spec(
-        session,
-        compute_pool=compute_pool,
-        payload=uploaded_payload,
-        args=args,
-        target_instances=target_instances,
-        min_instances=min_instances,
-        enable_metrics=enable_metrics,
-        runtime_environment=runtime_environment,
-    )
-    # Generate spec overrides
-    spec_overrides = spec_utils.generate_spec_overrides(
-        environment_vars=env_vars,
-        custom_overrides=spec_overrides,
-    )
-    if spec_overrides:
-        spec = spec_utils.merge_patch(spec, spec_overrides, display_name="spec_overrides")
-    return _do_submit_job_v1(
-        session, spec, external_access_integrations, query_warehouse, target_instances, compute_pool, job_id
-    )
-def _do_submit_job_v1(
-    session: snowpark.Session,
-    spec: dict[str, Any],
-    external_access_integrations: list[str],
-    query_warehouse: Optional[str],
-    target_instances: int,
-    compute_pool: str,
-    job_id: str,
-) -> jb.MLJob[Any]:
-    """
-    Generate the SQL query for job submission.
-    Args:
-        session: The Snowpark session to use.
-        spec: The service spec for the job.
-        external_access_integrations: The external access integrations for the job.
-        query_warehouse: The query warehouse for the job.
-        target_instances: The number of instances for the job.
-        session: The Snowpark session to use.
-        compute_pool: The compute pool to use for the job.
-        job_id: The ID of the job.
-    Returns:
-        The job object.
-    """
-    query_template = textwrap.dedent(
-        """\
-        EXECUTE JOB SERVICE
-        IN COMPUTE POOL IDENTIFIER(?)
-        FROM SPECIFICATION $$
-        {}
-        $$
-        NAME = IDENTIFIER(?)
-        ASYNC = TRUE
-        """
-    )
-    params: list[Any] = [compute_pool, job_id]
-    query = query_template.format(yaml.dump(spec)).splitlines()
-    if external_access_integrations:
-        external_access_integration_list = ",".join(f"{e}" for e in external_access_integrations)
-        query.append(f"EXTERNAL_ACCESS_INTEGRATIONS = ({external_access_integration_list})")
-    if query_warehouse:
-        query.append("QUERY_WAREHOUSE = IDENTIFIER(?)")
-        params.append(query_warehouse)
-    if target_instances > 1:
-        query.append("REPLICAS = ?")
-        params.append(target_instances)
-    query_text = "\n".join(line for line in query if line)
-    _ = query_helper.run_query(session, query_text, params=params)
+    combined_env_vars = {**uploaded_payload.env_vars, **(env_vars or {})}
-    return get_job(job_id, session=session)
+    try:
+        return _do_submit_job(
+            session=session,
+            payload=uploaded_payload,
+            args=args,
+            env_vars=combined_env_vars,
+            spec_overrides=spec_overrides,
+            compute_pool=compute_pool,
+            job_id=job_id,
+            external_access_integrations=external_access_integrations,
+            query_warehouse=query_warehouse,
+            target_instances=target_instances,
+            min_instances=min_instances,
+            enable_metrics=enable_metrics,
+            use_async=True,
+            runtime_environment=runtime_environment,
+        )
+    except SnowparkSQLException as e:
+        if e.sql_error_code == 3001 and "schema" in str(e).lower():
+            raise RuntimeError(
+                "please grant privileges on schema before submitting a job, see",
+                "https://docs.snowflake.com/en/developer-guide/snowflake-ml/ml-jobs/access-control-requirements"
+                " for more details",
+            ) from e
+        raise
-def _do_submit_job_v2(
+def _do_submit_job(
     session: snowpark.Session,
     payload: types.UploadedPayload,
     args: Optional[list[str]],
@@ -672,9 +605,7 @@ def _do_submit_job_v2(
     Returns:
         The job object.
     """
-    args = [
-        (payload.stage_path.joinpath(v).as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint
-    ] + (args or [])
+    args = [(v.as_posix() if isinstance(v, PurePath) else v) for v in payload.entrypoint] + (args or [])
     spec_options = {
         "STAGE_PATH": payload.stage_path.as_posix(),
         "ENTRYPOINT": ["/usr/local/bin/_entrypoint.sh"],
@@ -683,13 +614,14 @@ def _do_submit_job_v2(
         "ENABLE_METRICS": enable_metrics,
         "SPEC_OVERRIDES": spec_overrides,
     }
-    # for the image tag or full image URL, we use that directly
     if runtime_environment:
+        # for the image tag or full image URL, we use that directly
         spec_options["RUNTIME"] = runtime_environment
     elif feature_flags.FeatureFlags.ENABLE_RUNTIME_VERSIONS.is_enabled():
         # when feature flag is enabled, we get the local python version and wrap it in a dict
         # in system function, we can know whether it is python version or image tag or full image URL through the format
         spec_options["RUNTIME"] = json.dumps({"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}"})
     job_options = {
         "EXTERNAL_ACCESS_INTEGRATIONS": external_access_integrations,
         "QUERY_WAREHOUSE": query_warehouse,
@@ -697,6 +629,9 @@ def _do_submit_job_v2(
         "MIN_INSTANCES": min_instances,
         "ASYNC": use_async,
     }
+    if feature_flags.FeatureFlags.ENABLE_STAGE_MOUNT_V2.is_enabled(default=True):
+        spec_options["ENABLE_STAGE_MOUNT_V2"] = True
     if payload.payload_name:
         job_options["GENERATE_SUFFIX"] = True
     job_options = {k: v for k, v in job_options.items() if v is not None}

snowflake/ml/model/_client/model/inference_engine_utils.py CHANGED Viewed

@@ -4,14 +4,18 @@ from snowflake.ml.model._client.ops import service_ops
 def _get_inference_engine_args(
-    experimental_options: Optional[dict[str, Any]],
+    inference_engine_options: Optional[dict[str, Any]],
 ) -> Optional[service_ops.InferenceEngineArgs]:
-    if not experimental_options or "inference_engine" not in experimental_options:
+    if not inference_engine_options:
         return None
+    if "engine" not in inference_engine_options:
+        raise ValueError("'engine' field is required in inference_engine_options")
     return service_ops.InferenceEngineArgs(
-        inference_engine=experimental_options["inference_engine"],
-        inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
+        inference_engine=inference_engine_options["engine"],
+        inference_engine_args_override=inference_engine_options.get("engine_args_override"),
     )

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -8,11 +8,11 @@ from typing import Any, Callable, Optional, Union, overload
 import pandas as pd
 from snowflake import snowpark
-from snowflake.ml import jobs
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import sql_identifier
+from snowflake.ml.jobs import job
 from snowflake.ml.lineage import lineage_node
-from snowflake.ml.model import task, type_hints
+from snowflake.ml.model import openai_signatures, task, type_hints
 from snowflake.ml.model._client.model import (
     batch_inference_specs,
     inference_engine_utils,
@@ -23,6 +23,7 @@ from snowflake.ml.model._model_composer.model_manifest import model_manifest_sch
 from snowflake.ml.model._model_composer.model_method import utils as model_method_utils
 from snowflake.ml.model._packager.model_handlers import snowmlmodel
 from snowflake.ml.model._packager.model_meta import model_meta_schema
+from snowflake.ml.model._signatures import core
 from snowflake.snowpark import Session, async_job, dataframe
 _TELEMETRY_PROJECT = "MLOps"
@@ -45,6 +46,7 @@ class ModelVersion(lineage_node.LineageNode):
     _version_name: sql_identifier.SqlIdentifier
     _functions: list[model_manifest_schema.ModelFunctionInfo]
     _model_spec: Optional[model_meta_schema.ModelMetadataDict]
+    _model_manifest: Optional[model_manifest_schema.ModelManifestDict]
     def __init__(self) -> None:
         raise RuntimeError("ModelVersion's initializer is not meant to be used. Use `version` from model instead.")
@@ -155,6 +157,7 @@ class ModelVersion(lineage_node.LineageNode):
         self._version_name = version_name
         self._functions = self._get_functions()
         self._model_spec = None
+        self._model_manifest = None
         super(cls, cls).__init__(
             self,
             session=model_ops._session,
@@ -462,6 +465,28 @@ class ModelVersion(lineage_node.LineageNode):
             )
         return self._model_spec
+    def _get_model_manifest(
+        self, statement_params: Optional[dict[str, Any]] = None
+    ) -> model_manifest_schema.ModelManifestDict:
+        """Fetch and cache the model manifest for this model version.
+        Args:
+            statement_params: Optional dictionary of statement parameters to include
+                in the SQL command to fetch the model manifest.
+        Returns:
+            The model manifest as a dictionary for this model version.
+        """
+        if self._model_manifest is None:
+            self._model_manifest = self._model_ops.get_model_version_manifest(
+                database_name=None,
+                schema_name=None,
+                model_name=self._model_name,
+                version_name=self._version_name,
+                statement_params=statement_params,
+            )
+        return self._model_manifest
     @overload
     def run(
         self,
@@ -530,6 +555,9 @@ class ModelVersion(lineage_node.LineageNode):
         Returns:
             The prediction data. It would be the same type dataframe as your input.
+        Raises:
+            ValueError: When the model does not support running on warehouse and no service name is provided.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
@@ -556,6 +584,27 @@ class ModelVersion(lineage_node.LineageNode):
                 statement_params=statement_params,
             )
         else:
+            manifest = self._get_model_manifest(statement_params=statement_params)
+            target_platforms = manifest.get("target_platforms", None)
+            if (
+                target_platforms is not None
+                and len(target_platforms) > 0
+                and type_hints.TargetPlatform.WAREHOUSE.value not in target_platforms
+            ):
+                raise ValueError(
+                    f"The model {self.fully_qualified_model_name} version {self.version_name} "
+                    "is not logged for inference in Warehouse. "
+                    "To run the model in Warehouse, please log the model again using `log_model` API with "
+                    '`target_platforms=["WAREHOUSE"]` or '
+                    '`target_platforms=["WAREHOUSE", "SNOWPARK_CONTAINER_SERVICES"]` and rerun the command. '
+                    "To run the model in Snowpark Container Services, the `service_name` argument must be provided. "
+                    "You can create a service using the `create_service` API. "
+                    "For inference in Warehouse, see https://docs.snowflake.com/en/developer-guide/"
+                    "snowflake-ml/model-registry/warehouse#inference-from-python. "
+                    "For inference in Snowpark Container Services, see https://docs.snowflake.com/en/developer-guide/"
+                    "snowflake-ml/model-registry/container#python."
+                )
             explain_case_sensitive = self._determine_explain_case_sensitivity(target_function_info, statement_params)
             return self._model_ops.invoke_method(
@@ -602,7 +651,7 @@ class ModelVersion(lineage_node.LineageNode):
         input_spec: dataframe.DataFrame,
         output_spec: batch_inference_specs.OutputSpec,
         job_spec: Optional[batch_inference_specs.JobSpec] = None,
-    ) -> jobs.MLJob[Any]:
+    ) -> job.MLJob[Any]:
         """Execute batch inference on datasets as an SPCS job.
         Args:
@@ -617,7 +666,7 @@ class ModelVersion(lineage_node.LineageNode):
                 If None, default values will be used.
         Returns:
-            jobs.MLJob[Any]: A batch inference job object that can be used to monitor progress and manage the job
+            job.MLJob[Any]: A batch inference job object that can be used to monitor progress and manage the job
                 lifecycle.
         Raises:
@@ -940,14 +989,16 @@ class ModelVersion(lineage_node.LineageNode):
         self,
         statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
-        """Check if the model is a HuggingFace pipeline with text-generation task.
+        """Check if the model is a HuggingFace pipeline with text-generation task
+        and is logged with OPENAI_CHAT_SIGNATURE.
         Args:
             statement_params: Optional dictionary of statement parameters to include
                 in the SQL command to fetch model spec.
         Raises:
-            ValueError: If the model is not a HuggingFace text-generation model.
+            ValueError: If the model is not a HuggingFace text-generation model or
+                if the model is not logged with OPENAI_CHAT_SIGNATURE.
         """
         # Fetch model spec
         model_spec = self._get_model_spec(statement_params)
@@ -983,6 +1034,21 @@ class ModelVersion(lineage_node.LineageNode):
             )
             raise ValueError(f"Inference engine is only supported for task 'text-generation'. {found_tasks_str}")
+        # Check if the model is logged with OPENAI_CHAT_SIGNATURE
+        signatures_dict = model_spec.get("signatures", {})
+        # Deserialize signatures from model spec to ModelSignature objects for proper semantic comparison.
+        deserialized_signatures = {
+            func_name: core.ModelSignature.from_dict(sig_dict) for func_name, sig_dict in signatures_dict.items()
+        }
+        if deserialized_signatures != openai_signatures.OPENAI_CHAT_SIGNATURE:
+            raise ValueError(
+                "Inference engine requires the model to be logged with OPENAI_CHAT_SIGNATURE. "
+                f"Found signatures: {signatures_dict}. "
+                "Please log the model with: signatures=openai_signatures.OPENAI_CHAT_SIGNATURE"
+            )
     @overload
     def create_service(
         self,
@@ -1001,6 +1067,8 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integration: Optional[str] = None,
         block: bool = True,
+        autocapture: bool = False,
+        inference_engine_options: Optional[dict[str, Any]] = None,
         experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -1034,11 +1102,13 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
-            experimental_options: Experimental options for the service creation with custom inference engine.
-                Currently, `inference_engine`, `inference_engine_args_override`, and `autocapture` are supported.
-                `inference_engine` is the name of the inference engine to use.
-                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
-                `autocapture` is a boolean to enable/disable inference table.
+            autocapture: Whether inference autocapture is enabled on the service. If true, inference data will be
+                captured in the model inference table.
+            inference_engine_options: Options for the service creation with custom inference engine.
+                Supports `engine` and `engine_args_override`.
+                `engine` is the type of the inference engine to use.
+                `engine_args_override` is a list of string arguments to pass to the inference engine.
+            experimental_options: Experimental options for the service creation.
         """
         ...
@@ -1060,6 +1130,8 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        autocapture: bool = False,
+        inference_engine_options: Optional[dict[str, Any]] = None,
         experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -1093,11 +1165,13 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
-            experimental_options: Experimental options for the service creation with custom inference engine.
-                Currently, `inference_engine`, `inference_engine_args_override`, and `autocapture` are supported.
-                `inference_engine` is the name of the inference engine to use.
-                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
-                `autocapture` is a boolean to enable/disable inference table.
+            autocapture: Whether inference autocapture is enabled on the service. If true, inference data will be
+                captured in the model inference table.
+            inference_engine_options: Options for the service creation with custom inference engine.
+                Supports `engine` and `engine_args_override`.
+                `engine` is the type of the inference engine to use.
+                `engine_args_override` is a list of string arguments to pass to the inference engine.
+            experimental_options: Experimental options for the service creation.
         """
         ...
@@ -1134,6 +1208,8 @@ class ModelVersion(lineage_node.LineageNode):
         build_external_access_integration: Optional[str] = None,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        autocapture: bool = False,
+        inference_engine_options: Optional[dict[str, Any]] = None,
         experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -1169,11 +1245,13 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is False, this function executes the underlying service creation asynchronously
                 and returns an AsyncJob.
-            experimental_options: Experimental options for the service creation with custom inference engine.
-                Currently, `inference_engine`, `inference_engine_args_override`, and `autocapture` are supported.
-                `inference_engine` is the name of the inference engine to use.
-                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
-                `autocapture` is a boolean to enable/disable inference table.
+            autocapture: Whether inference autocapture is enabled on the service. If true, inference data will be
+                captured in the model inference table.
+            inference_engine_options: Options for the service creation with custom inference engine.
+                Supports `engine` and `engine_args_override`.
+                `engine` is the type of the inference engine to use.
+                `engine_args_override` is a list of string arguments to pass to the inference engine.
+            experimental_options: Experimental options for the service creation.
         Raises:
@@ -1209,9 +1287,10 @@ class ModelVersion(lineage_node.LineageNode):
         # Validate GPU support if GPU resources are requested
         self._throw_error_if_gpu_is_not_supported(gpu_requests, statement_params)
-        inference_engine_args = inference_engine_utils._get_inference_engine_args(experimental_options)
+        inference_engine_args = inference_engine_utils._get_inference_engine_args(inference_engine_options)
-        # Check if model is HuggingFace text-generation before doing inference engine checks
+        # Check if model is HuggingFace text-generation and is logged with
+        # OPENAI_CHAT_SIGNATURE before doing inference engine checks
         # Only validate if inference engine is actually specified
         if inference_engine_args is not None:
             self._check_huggingface_text_generation_model(statement_params)
@@ -1223,9 +1302,6 @@ class ModelVersion(lineage_node.LineageNode):
                 gpu_requests,
             )
-        # Extract autocapture from experimental_options
-        autocapture = experimental_options.get("autocapture") if experimental_options else None
         from snowflake.ml.model import event_handler
         from snowflake.snowpark import exceptions
@@ -1292,8 +1368,13 @@ class ModelVersion(lineage_node.LineageNode):
         """List all the service names using this model version.
         Returns:
-            List of service_names: The name of the service, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+            List of details about all the services associated with this model version. The details include:
+              name: The name of the service.
+              status: The status of the service.
+              inference_endpoint: The public endpoint of the service, if enabled and services is not in PENDING state.
+                This will give privatelink endpoint if the session is created with privatelink connection
+              internal_endpoint: The internal endpoint of the service, if services is not in PENDING state.
+              autocapture_enabled: Whether service has autocapture enabled, if it is set in service proxy spec.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,

snowflake-ml-python 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

snowflake-ml-python 1.19.0py3-none-any.whl → 1.21.0py3-none-any.whl