PyPI - snowflake-ml-python - Versions diffs - 1.10.0__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

snowflake-ml-python 1.10.0py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -1,16 +1,18 @@
 import enum
 import pathlib
 import tempfile
+import uuid
 import warnings
 from typing import Any, Callable, Optional, Union, overload
 import pandas as pd
-from snowflake import snowpark
+from snowflake.ml import jobs
 from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.lineage import lineage_node
 from snowflake.ml.model import task, type_hints
+from snowflake.ml.model._client.model import batch_inference_specs
 from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
 from snowflake.ml.model._model_composer import model_composer
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
@@ -19,6 +21,7 @@ from snowflake.snowpark import Session, async_job, dataframe
 _TELEMETRY_PROJECT = "MLOps"
 _TELEMETRY_SUBPROJECT = "ModelManagement"
+_BATCH_INFERENCE_JOB_ID_PREFIX = "BATCH_INFERENCE_"
 class ExportMode(enum.Enum):
@@ -539,6 +542,63 @@ class ModelVersion(lineage_node.LineageNode):
                 is_partitioned=target_function_info["is_partitioned"],
             )
+    @telemetry.send_api_usage_telemetry(
+        project=_TELEMETRY_PROJECT,
+        subproject=_TELEMETRY_SUBPROJECT,
+        func_params_to_log=[
+            "compute_pool",
+        ],
+    )
+    def _run_batch(
+        self,
+        *,
+        compute_pool: str,
+        input_spec: batch_inference_specs.InputSpec,
+        output_spec: batch_inference_specs.OutputSpec,
+        job_spec: Optional[batch_inference_specs.JobSpec] = None,
+    ) -> jobs.MLJob[Any]:
+        statement_params = telemetry.get_statement_params(
+            project=_TELEMETRY_PROJECT,
+            subproject=_TELEMETRY_SUBPROJECT,
+        )
+        if job_spec is None:
+            job_spec = batch_inference_specs.JobSpec()
+        warehouse = job_spec.warehouse or self._service_ops._session.get_current_warehouse()
+        if warehouse is None:
+            raise ValueError("Warehouse is not set. Please set the warehouse field in the JobSpec.")
+        if job_spec.job_name is None:
+            # Same as the MLJob ID generation logic with a different prefix
+            job_name = f"{_BATCH_INFERENCE_JOB_ID_PREFIX}{str(uuid.uuid4()).replace('-', '_').upper()}"
+        else:
+            job_name = job_spec.job_name
+        return self._service_ops.invoke_batch_job_method(
+            # model version info
+            model_name=self._model_name,
+            version_name=self._version_name,
+            # job spec
+            function_name=self._get_function_info(function_name=job_spec.function_name)["target_method"],
+            compute_pool_name=sql_identifier.SqlIdentifier(compute_pool),
+            force_rebuild=job_spec.force_rebuild,
+            image_repo_name=job_spec.image_repo,
+            num_workers=job_spec.num_workers,
+            max_batch_rows=job_spec.max_batch_rows,
+            warehouse=sql_identifier.SqlIdentifier(warehouse),
+            cpu_requests=job_spec.cpu_requests,
+            memory_requests=job_spec.memory_requests,
+            job_name=job_name,
+            # input and output
+            input_stage_location=input_spec.input_stage_location,
+            input_file_pattern=input_spec.input_file_pattern,
+            output_stage_location=output_spec.output_stage_location,
+            completion_filename=output_spec.completion_filename,
+            # misc
+            statement_params=statement_params,
+        )
     def _get_function_info(self, function_name: Optional[str]) -> model_manifest_schema.ModelFunctionInfo:
         functions: list[model_manifest_schema.ModelFunctionInfo] = self._functions
@@ -707,6 +767,128 @@ class ModelVersion(lineage_node.LineageNode):
             version_name=sql_identifier.SqlIdentifier(version),
         )
+    def _get_inference_engine_args(
+        self, experimental_options: Optional[dict[str, Any]]
+    ) -> Optional[service_ops.InferenceEngineArgs]:
+        if not experimental_options:
+            return None
+        if "inference_engine" not in experimental_options:
+            raise ValueError("inference_engine is required in experimental_options")
+        return service_ops.InferenceEngineArgs(
+            inference_engine=experimental_options["inference_engine"],
+            inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
+        )
+    def _enrich_inference_engine_args(
+        self,
+        inference_engine_args: service_ops.InferenceEngineArgs,
+        gpu_requests: Optional[Union[str, int]] = None,
+    ) -> Optional[service_ops.InferenceEngineArgs]:
+        """Enrich inference engine args with model path and tensor parallelism settings.
+        Args:
+            inference_engine_args: The original inference engine args
+            gpu_requests: The number of GPUs requested
+        Returns:
+            Enriched inference engine args
+        Raises:
+            ValueError: Invalid gpu_requests
+        """
+        if inference_engine_args.inference_engine_args_override is None:
+            inference_engine_args.inference_engine_args_override = []
+        # Get model stage path and strip off "snow://" prefix
+        model_stage_path = self._model_ops.get_model_version_stage_path(
+            database_name=None,
+            schema_name=None,
+            model_name=self._model_name,
+            version_name=self._version_name,
+        )
+        # Strip "snow://" prefix
+        if model_stage_path.startswith("snow://"):
+            model_stage_path = model_stage_path.replace("snow://", "", 1)
+        # Always overwrite the model key by appending
+        inference_engine_args.inference_engine_args_override.append(f"--model={model_stage_path}")
+        gpu_count = None
+        # Set tensor-parallelism if gpu_requests is specified
+        if gpu_requests is not None:
+            # assert gpu_requests is a string or an integer before casting to int
+            if isinstance(gpu_requests, str) or isinstance(gpu_requests, int):
+                try:
+                    gpu_count = int(gpu_requests)
+                except ValueError:
+                    raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
+        if gpu_count is not None:
+            if gpu_count > 0:
+                inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
+            else:
+                raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
+        return inference_engine_args
+    def _check_huggingface_text_generation_model(
+        self,
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> None:
+        """Check if the model is a HuggingFace pipeline with text-generation task.
+        Args:
+            statement_params: Optional dictionary of statement parameters to include
+                in the SQL command to fetch model spec.
+        Raises:
+            ValueError: If the model is not a HuggingFace text-generation model.
+        """
+        # Fetch model spec
+        model_spec = self._model_ops._fetch_model_spec(
+            database_name=None,
+            schema_name=None,
+            model_name=self._model_name,
+            version_name=self._version_name,
+            statement_params=statement_params,
+        )
+        # Check if model_type is huggingface_pipeline
+        model_type = model_spec.get("model_type")
+        if model_type != "huggingface_pipeline":
+            raise ValueError(
+                f"Inference engine is only supported for HuggingFace text-generation models. "
+                f"Found model_type: {model_type}"
+            )
+        # Check if model supports text-generation task
+        # There should only be one model in the list because we don't support multiple models in a single model spec
+        models = model_spec.get("models", {})
+        is_text_generation = False
+        found_tasks: list[str] = []
+        # As long as the model supports text-generation task, we can use it
+        for _, model_info in models.items():
+            options = model_info.get("options", {})
+            task = options.get("task")
+            if task:
+                found_tasks.append(str(task))
+                if task == "text-generation":
+                    is_text_generation = True
+                    break
+        if not is_text_generation:
+            tasks_str = ", ".join(found_tasks)
+            found_tasks_str = (
+                f"Found task(s): {tasks_str} in model spec." if found_tasks else "No task found in model spec."
+            )
+            raise ValueError(f"Inference engine is only supported for task 'text-generation'. {found_tasks_str}")
     @overload
     def create_service(
         self,
@@ -714,7 +896,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -725,6 +907,7 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integration: Optional[str] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -735,7 +918,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -756,6 +940,10 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         """
         ...
@@ -766,7 +954,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -777,6 +965,7 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -787,7 +976,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -808,6 +998,10 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         """
         ...
@@ -832,7 +1026,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -844,6 +1038,7 @@ class ModelVersion(lineage_node.LineageNode):
         build_external_access_integration: Optional[str] = None,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -854,7 +1049,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -877,6 +1073,11 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is False, this function executes the underlying service creation asynchronously
                 and returns an AsyncJob.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         Raises:
             ValueError: Illegal external access integration arguments.
@@ -885,6 +1086,9 @@ class ModelVersion(lineage_node.LineageNode):
         Returns:
             If `block=True`, return result information about service creation from server.
             Otherwise, return the service creation AsyncJob.
+        Raises:
+            ValueError: Illegal external access integration arguments.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
@@ -906,7 +1110,18 @@ class ModelVersion(lineage_node.LineageNode):
             build_external_access_integrations = [build_external_access_integration]
         service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
-        image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
+        # Check if model is HuggingFace text-generation before doing inference engine checks
+        if experimental_options:
+            self._check_huggingface_text_generation_model(statement_params)
+        inference_engine_args: Optional[service_ops.InferenceEngineArgs] = self._get_inference_engine_args(
+            experimental_options
+        )
+        # Enrich inference engine args if inference engine is specified
+        if inference_engine_args is not None:
+            inference_engine_args = self._enrich_inference_engine_args(inference_engine_args, gpu_requests)
         from snowflake.ml.model import event_handler
         from snowflake.snowpark import exceptions
@@ -929,7 +1144,7 @@ class ModelVersion(lineage_node.LineageNode):
                         else sql_identifier.SqlIdentifier(service_compute_pool)
                     ),
                     service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
-                    image_repo=image_repo,
+                    image_repo_name=image_repo,
                     ingress_enabled=ingress_enabled,
                     max_instances=max_instances,
                     cpu_requests=cpu_requests,
@@ -946,6 +1161,7 @@ class ModelVersion(lineage_node.LineageNode):
                     block=block,
                     statement_params=statement_params,
                     progress_status=status,
+                    inference_engine_args=inference_engine_args,
                 )
                 status.update(label="Model service created successfully", state="complete", expanded=False)
                 return result
@@ -1028,69 +1244,5 @@ class ModelVersion(lineage_node.LineageNode):
             statement_params=statement_params,
         )
-    @snowpark._internal.utils.private_preview(version="1.8.3")
-    @telemetry.send_api_usage_telemetry(
-        project=_TELEMETRY_PROJECT,
-        subproject=_TELEMETRY_SUBPROJECT,
-    )
-    def _run_job(
-        self,
-        X: Union[pd.DataFrame, "dataframe.DataFrame"],
-        *,
-        job_name: str,
-        compute_pool: str,
-        image_repo: str,
-        output_table_name: str,
-        function_name: Optional[str] = None,
-        cpu_requests: Optional[str] = None,
-        memory_requests: Optional[str] = None,
-        gpu_requests: Optional[Union[str, int]] = None,
-        num_workers: Optional[int] = None,
-        max_batch_rows: Optional[int] = None,
-        force_rebuild: bool = False,
-        build_external_access_integrations: Optional[list[str]] = None,
-    ) -> Union[pd.DataFrame, dataframe.DataFrame]:
-        statement_params = telemetry.get_statement_params(
-            project=_TELEMETRY_PROJECT,
-            subproject=_TELEMETRY_SUBPROJECT,
-        )
-        target_function_info = self._get_function_info(function_name=function_name)
-        job_db_id, job_schema_id, job_id = sql_identifier.parse_fully_qualified_name(job_name)
-        output_table_db_id, output_table_schema_id, output_table_id = sql_identifier.parse_fully_qualified_name(
-            output_table_name
-        )
-        warehouse = self._service_ops._session.get_current_warehouse()
-        assert warehouse, "No active warehouse selected in the current session."
-        return self._service_ops.invoke_job_method(
-            target_method=target_function_info["target_method"],
-            signature=target_function_info["signature"],
-            X=X,
-            database_name=None,
-            schema_name=None,
-            model_name=self._model_name,
-            version_name=self._version_name,
-            job_database_name=job_db_id,
-            job_schema_name=job_schema_id,
-            job_name=job_id,
-            compute_pool_name=sql_identifier.SqlIdentifier(compute_pool),
-            warehouse_name=sql_identifier.SqlIdentifier(warehouse),
-            image_repo=image_repo,
-            output_table_database_name=output_table_db_id,
-            output_table_schema_name=output_table_schema_id,
-            output_table_name=output_table_id,
-            cpu_requests=cpu_requests,
-            memory_requests=memory_requests,
-            gpu_requests=gpu_requests,
-            num_workers=num_workers,
-            max_batch_rows=max_batch_rows,
-            force_rebuild=force_rebuild,
-            build_external_access_integrations=(
-                None
-                if build_external_access_integrations is None
-                else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
-            ),
-            statement_params=statement_params,
-        )
 lineage_node.DOMAIN_LINEAGE_REGISTRY["model"] = ModelVersion

snowflake-ml-python 1.10.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

snowflake-ml-python 1.10.0py3-none-any.whl → 1.12.0py3-none-any.whl