PyPI - snowflake-ml-python - Versions diffs - 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

snowflake-ml-python 1.9.2py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

snowflake/ml/_internal/utils/service_logger.py +31 -17
snowflake/ml/experiment/callback/keras.py +63 -0
snowflake/ml/experiment/callback/lightgbm.py +59 -0
snowflake/ml/experiment/callback/xgboost.py +67 -0
snowflake/ml/experiment/utils.py +14 -0
snowflake/ml/jobs/_utils/__init__.py +0 -0
snowflake/ml/jobs/_utils/constants.py +4 -1
snowflake/ml/jobs/_utils/payload_utils.py +55 -21
snowflake/ml/jobs/_utils/query_helper.py +5 -1
snowflake/ml/jobs/_utils/runtime_env_utils.py +63 -0
snowflake/ml/jobs/_utils/scripts/get_instance_ip.py +2 -2
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +5 -5
snowflake/ml/jobs/_utils/spec_utils.py +41 -8
snowflake/ml/jobs/_utils/stage_utils.py +22 -9
snowflake/ml/jobs/_utils/types.py +5 -7
snowflake/ml/jobs/job.py +1 -1
snowflake/ml/jobs/manager.py +1 -13
snowflake/ml/model/_client/model/model_version_impl.py +219 -55
snowflake/ml/model/_client/ops/service_ops.py +230 -30
snowflake/ml/model/_client/service/model_deployment_spec.py +103 -27
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +11 -5
snowflake/ml/model/_model_composer/model_composer.py +1 -70
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +2 -43
snowflake/ml/model/event_handler.py +87 -18
snowflake/ml/model/inference_engine.py +5 -0
snowflake/ml/model/models/huggingface_pipeline.py +74 -51
snowflake/ml/model/type_hints.py +26 -1
snowflake/ml/registry/_manager/model_manager.py +37 -70
snowflake/ml/registry/_manager/model_parameter_reconciler.py +294 -0
snowflake/ml/registry/registry.py +0 -19
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/METADATA +523 -491
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/RECORD +36 -29
snowflake/ml/experiment/callback.py +0 -121
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.9.2.dist-info → snowflake_ml_python-1.11.0.dist-info}/top_level.txt +0 -0

snowflake/ml/jobs/_utils/stage_utils.py CHANGED Viewed

@@ -121,15 +121,28 @@ class StagePath:
         return self._compose_path(self._path)
     def joinpath(self, *args: Union[str, PathLike[str]]) -> "StagePath":
+        """
+        Joins the given path arguments to the current path,
+        mimicking the behavior of pathlib.Path.joinpath.
+        If the argument is a stage path (i.e., an absolute path),
+        it overrides the current path and is returned as the final path.
+        If the argument is a normal path, it is joined with the current relative path
+        using self._path.joinpath(arg).
+        Args:
+            *args: Path components to join.
+        Returns:
+            A new StagePath with the joined path.
+        Raises:
+            NotImplementedError: the argument is a stage path.
+        """
         path = self
         for arg in args:
-            path = path._make_child(arg)
+            if isinstance(arg, StagePath):
+                raise NotImplementedError
+            else:
+                # the arg might be an absolute path, so we need to remove the leading '/'
+                path = StagePath(f"{path.root}/{path._path.joinpath(arg).as_posix().lstrip('/')}")
         return path
-    def _make_child(self, path: Union[str, PathLike[str]]) -> "StagePath":
-        stage_path = path if isinstance(path, StagePath) else StagePath(os.fspath(path))
-        if self.root == stage_path.root:
-            child_path = self._path.joinpath(stage_path._path)
-            return StagePath(self._compose_path(child_path))
-        else:
-            return stage_path

snowflake/ml/jobs/_utils/types.py CHANGED Viewed

@@ -30,6 +30,10 @@ class PayloadPath(Protocol):
     def parent(self) -> "PayloadPath":
         ...
+    @property
+    def root(self) -> str:
+        ...
     def exists(self) -> bool:
         ...
@@ -98,12 +102,6 @@ class ComputeResources:
 @dataclass(frozen=True)
 class ImageSpec:
-    repo: str
-    image_name: str
-    image_tag: str
     resource_requests: ComputeResources
     resource_limits: ComputeResources
-    @property
-    def full_name(self) -> str:
-        return f"{self.repo}/{self.image_name}:{self.image_tag}"
+    container_image: str

snowflake/ml/jobs/job.py CHANGED Viewed

@@ -199,7 +199,7 @@ class MLJob(Generic[T], SerializableSessionMixin):
             elapsed = time.monotonic() - start_time
             if elapsed >= timeout >= 0:
                 raise TimeoutError(f"Job {self.name} did not complete within {timeout} seconds")
-            elif status == "PENDING" and not warning_shown and elapsed >= 2:  # Only show warning after 2s
+            elif status == "PENDING" and not warning_shown and elapsed >= 5:  # Only show warning after 5s
                 pool_info = _get_compute_pool_info(self._session, self._compute_pool)
                 if (pool_info.max_nodes - pool_info.active_nodes) < self.min_instances:
                     logger.warning(

snowflake/ml/jobs/manager.py CHANGED Viewed

@@ -426,7 +426,6 @@ def _submit_job(
     Raises:
         ValueError: If database or schema value(s) are invalid
-        SnowparkSQLException: If there is an error submitting the job.
     """
     session = session or get_active_session()
@@ -504,18 +503,7 @@ def _submit_job(
     query_text, params = _generate_submission_query(
         spec, external_access_integrations, query_warehouse, target_instances, session, compute_pool, job_id
     )
-    try:
-        _ = query_helper.run_query(session, query_text, params=params)
-    except SnowparkSQLException as e:
-        if "Invalid spec: unknown option 'resourceManagement' for 'spec'." in e.message:
-            logger.warning("Dropping 'resourceManagement' from spec because control policy is not enabled.")
-            spec["spec"].pop("resourceManagement", None)
-            query_text, params = _generate_submission_query(
-                spec, external_access_integrations, query_warehouse, target_instances, session, compute_pool, job_id
-            )
-            _ = query_helper.run_query(session, query_text, params=params)
-        else:
-            raise
+    _ = query_helper.run_query(session, query_text, params=params)
     return get_job(job_id, session=session)

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import enum
-import logging
 import pathlib
 import tempfile
 import warnings
@@ -708,6 +707,128 @@ class ModelVersion(lineage_node.LineageNode):
             version_name=sql_identifier.SqlIdentifier(version),
         )
+    def _get_inference_engine_args(
+        self, experimental_options: Optional[dict[str, Any]]
+    ) -> Optional[service_ops.InferenceEngineArgs]:
+        if not experimental_options:
+            return None
+        if "inference_engine" not in experimental_options:
+            raise ValueError("inference_engine is required in experimental_options")
+        return service_ops.InferenceEngineArgs(
+            inference_engine=experimental_options["inference_engine"],
+            inference_engine_args_override=experimental_options.get("inference_engine_args_override"),
+        )
+    def _enrich_inference_engine_args(
+        self,
+        inference_engine_args: service_ops.InferenceEngineArgs,
+        gpu_requests: Optional[Union[str, int]] = None,
+    ) -> Optional[service_ops.InferenceEngineArgs]:
+        """Enrich inference engine args with model path and tensor parallelism settings.
+        Args:
+            inference_engine_args: The original inference engine args
+            gpu_requests: The number of GPUs requested
+        Returns:
+            Enriched inference engine args
+        Raises:
+            ValueError: Invalid gpu_requests
+        """
+        if inference_engine_args.inference_engine_args_override is None:
+            inference_engine_args.inference_engine_args_override = []
+        # Get model stage path and strip off "snow://" prefix
+        model_stage_path = self._model_ops.get_model_version_stage_path(
+            database_name=None,
+            schema_name=None,
+            model_name=self._model_name,
+            version_name=self._version_name,
+        )
+        # Strip "snow://" prefix
+        if model_stage_path.startswith("snow://"):
+            model_stage_path = model_stage_path.replace("snow://", "", 1)
+        # Always overwrite the model key by appending
+        inference_engine_args.inference_engine_args_override.append(f"--model={model_stage_path}")
+        gpu_count = None
+        # Set tensor-parallelism if gpu_requests is specified
+        if gpu_requests is not None:
+            # assert gpu_requests is a string or an integer before casting to int
+            if isinstance(gpu_requests, str) or isinstance(gpu_requests, int):
+                try:
+                    gpu_count = int(gpu_requests)
+                except ValueError:
+                    raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
+        if gpu_count is not None:
+            if gpu_count > 0:
+                inference_engine_args.inference_engine_args_override.append(f"--tensor-parallel-size={gpu_count}")
+            else:
+                raise ValueError(f"Invalid gpu_requests: {gpu_requests}")
+        return inference_engine_args
+    def _check_huggingface_text_generation_model(
+        self,
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> None:
+        """Check if the model is a HuggingFace pipeline with text-generation task.
+        Args:
+            statement_params: Optional dictionary of statement parameters to include
+                in the SQL command to fetch model spec.
+        Raises:
+            ValueError: If the model is not a HuggingFace text-generation model.
+        """
+        # Fetch model spec
+        model_spec = self._model_ops._fetch_model_spec(
+            database_name=None,
+            schema_name=None,
+            model_name=self._model_name,
+            version_name=self._version_name,
+            statement_params=statement_params,
+        )
+        # Check if model_type is huggingface_pipeline
+        model_type = model_spec.get("model_type")
+        if model_type != "huggingface_pipeline":
+            raise ValueError(
+                f"Inference engine is only supported for HuggingFace text-generation models. "
+                f"Found model_type: {model_type}"
+            )
+        # Check if model supports text-generation task
+        # There should only be one model in the list because we don't support multiple models in a single model spec
+        models = model_spec.get("models", {})
+        is_text_generation = False
+        found_tasks: list[str] = []
+        # As long as the model supports text-generation task, we can use it
+        for _, model_info in models.items():
+            options = model_info.get("options", {})
+            task = options.get("task")
+            if task:
+                found_tasks.append(str(task))
+                if task == "text-generation":
+                    is_text_generation = True
+                    break
+        if not is_text_generation:
+            tasks_str = ", ".join(found_tasks)
+            found_tasks_str = (
+                f"Found task(s): {tasks_str} in model spec." if found_tasks else "No task found in model spec."
+            )
+            raise ValueError(f"Inference engine is only supported for task 'text-generation'. {found_tasks_str}")
     @overload
     def create_service(
         self,
@@ -715,7 +836,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -726,6 +847,7 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integration: Optional[str] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -736,7 +858,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -757,6 +880,10 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         """
         ...
@@ -767,7 +894,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -778,6 +905,7 @@ class ModelVersion(lineage_node.LineageNode):
         force_rebuild: bool = False,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -788,7 +916,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -809,6 +938,10 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is ``False``, this function executes the underlying service creation asynchronously
                 and returns an :class:`AsyncJob`.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         """
         ...
@@ -833,7 +966,7 @@ class ModelVersion(lineage_node.LineageNode):
         service_name: str,
         image_build_compute_pool: Optional[str] = None,
         service_compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         ingress_enabled: bool = False,
         max_instances: int = 1,
         cpu_requests: Optional[str] = None,
@@ -845,6 +978,7 @@ class ModelVersion(lineage_node.LineageNode):
         build_external_access_integration: Optional[str] = None,
         build_external_access_integrations: Optional[list[str]] = None,
         block: bool = True,
+        experimental_options: Optional[dict[str, Any]] = None,
     ) -> Union[str, async_job.AsyncJob]:
         """Create an inference service with the given spec.
@@ -855,7 +989,8 @@ class ModelVersion(lineage_node.LineageNode):
                 the service compute pool if None.
             service_compute_pool: The name of the compute pool used to run the inference service.
             image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
-                or schema of the model will be used.
+                or schema of the model will be used. This can be None, in that case a default hidden image repository
+                will be used.
             ingress_enabled: If true, creates an service endpoint associated with the service. User must have
                 BIND SERVICE ENDPOINT privilege on the account.
             max_instances: The maximum number of inference service instances to run. The same value it set to
@@ -878,29 +1013,28 @@ class ModelVersion(lineage_node.LineageNode):
             block: A bool value indicating whether this function will wait until the service is available.
                 When it is False, this function executes the underlying service creation asynchronously
                 and returns an AsyncJob.
+            experimental_options: Experimental options for the service creation with custom inference engine.
+                Currently, only `inference_engine` and `inference_engine_args_override` are supported.
+                `inference_engine` is the name of the inference engine to use.
+                `inference_engine_args_override` is a list of string arguments to pass to the inference engine.
         Raises:
             ValueError: Illegal external access integration arguments.
+            exceptions.SnowparkSQLException: if service already exists.
         Returns:
             If `block=True`, return result information about service creation from server.
             Otherwise, return the service creation AsyncJob.
+        Raises:
+            ValueError: Illegal external access integration arguments.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
             subproject=_TELEMETRY_SUBPROJECT,
         )
-        # Check root logger level and emit warning if needed
-        root_logger = logging.getLogger()
-        if root_logger.level in (logging.WARNING, logging.ERROR):
-            warnings.warn(
-                "Suppressing service logs. Set the log level to INFO if you would like "
-                "verbose service logs (e.g., logging.getLogger().setLevel(logging.INFO)).",
-                UserWarning,
-                stacklevel=2,
-            )
         if build_external_access_integration is not None:
             msg = (
                 "`build_external_access_integration` is deprecated. "
@@ -916,41 +1050,74 @@ class ModelVersion(lineage_node.LineageNode):
             build_external_access_integrations = [build_external_access_integration]
         service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
-        image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
-        return self._service_ops.create_service(
-            database_name=None,
-            schema_name=None,
-            model_name=self._model_name,
-            version_name=self._version_name,
-            service_database_name=service_db_id,
-            service_schema_name=service_schema_id,
-            service_name=service_id,
-            image_build_compute_pool_name=(
-                sql_identifier.SqlIdentifier(image_build_compute_pool)
-                if image_build_compute_pool
-                else sql_identifier.SqlIdentifier(service_compute_pool)
-            ),
-            service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
-            image_repo_database_name=image_repo_db_id,
-            image_repo_schema_name=image_repo_schema_id,
-            image_repo_name=image_repo_id,
-            ingress_enabled=ingress_enabled,
-            max_instances=max_instances,
-            cpu_requests=cpu_requests,
-            memory_requests=memory_requests,
-            gpu_requests=gpu_requests,
-            num_workers=num_workers,
-            max_batch_rows=max_batch_rows,
-            force_rebuild=force_rebuild,
-            build_external_access_integrations=(
-                None
-                if build_external_access_integrations is None
-                else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
-            ),
-            block=block,
-            statement_params=statement_params,
+        # Check if model is HuggingFace text-generation before doing inference engine checks
+        if experimental_options:
+            self._check_huggingface_text_generation_model(statement_params)
+        inference_engine_args: Optional[service_ops.InferenceEngineArgs] = self._get_inference_engine_args(
+            experimental_options
         )
+        # Enrich inference engine args if inference engine is specified
+        if inference_engine_args is not None:
+            inference_engine_args = self._enrich_inference_engine_args(inference_engine_args, gpu_requests)
+        from snowflake.ml.model import event_handler
+        from snowflake.snowpark import exceptions
+        model_event_handler = event_handler.ModelEventHandler()
+        with model_event_handler.status("Creating model inference service", total=6, block=block) as status:
+            try:
+                result = self._service_ops.create_service(
+                    database_name=None,
+                    schema_name=None,
+                    model_name=self._model_name,
+                    version_name=self._version_name,
+                    service_database_name=service_db_id,
+                    service_schema_name=service_schema_id,
+                    service_name=service_id,
+                    image_build_compute_pool_name=(
+                        sql_identifier.SqlIdentifier(image_build_compute_pool)
+                        if image_build_compute_pool
+                        else sql_identifier.SqlIdentifier(service_compute_pool)
+                    ),
+                    service_compute_pool_name=sql_identifier.SqlIdentifier(service_compute_pool),
+                    image_repo_name=image_repo,
+                    ingress_enabled=ingress_enabled,
+                    max_instances=max_instances,
+                    cpu_requests=cpu_requests,
+                    memory_requests=memory_requests,
+                    gpu_requests=gpu_requests,
+                    num_workers=num_workers,
+                    max_batch_rows=max_batch_rows,
+                    force_rebuild=force_rebuild,
+                    build_external_access_integrations=(
+                        None
+                        if build_external_access_integrations is None
+                        else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
+                    ),
+                    block=block,
+                    statement_params=statement_params,
+                    progress_status=status,
+                    inference_engine_args=inference_engine_args,
+                )
+                status.update(label="Model service created successfully", state="complete", expanded=False)
+                return result
+            except exceptions.SnowparkSQLException as e:
+                # Check if the error is because the service already exists
+                if "already exists" in str(e).lower() or "100132" in str(
+                    e
+                ):  # 100132 is Snowflake error code for object already exists
+                    status.update("service already exists")
+                    status.complete()
+                    status.update(label="Service already exists", state="error", expanded=False)
+                    raise
+                else:
+                    status.update(label="Service creation failed", state="error", expanded=False)
+                    raise
     @telemetry.send_api_usage_telemetry(
         project=_TELEMETRY_PROJECT,
         subproject=_TELEMETRY_SUBPROJECT,
@@ -1028,7 +1195,7 @@ class ModelVersion(lineage_node.LineageNode):
         *,
         job_name: str,
         compute_pool: str,
-        image_repo: str,
+        image_repo: Optional[str] = None,
         output_table_name: str,
         function_name: Optional[str] = None,
         cpu_requests: Optional[str] = None,
@@ -1045,7 +1212,6 @@ class ModelVersion(lineage_node.LineageNode):
         )
         target_function_info = self._get_function_info(function_name=function_name)
         job_db_id, job_schema_id, job_id = sql_identifier.parse_fully_qualified_name(job_name)
-        image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
         output_table_db_id, output_table_schema_id, output_table_id = sql_identifier.parse_fully_qualified_name(
             output_table_name
         )
@@ -1064,9 +1230,7 @@ class ModelVersion(lineage_node.LineageNode):
             job_name=job_id,
             compute_pool_name=sql_identifier.SqlIdentifier(compute_pool),
             warehouse_name=sql_identifier.SqlIdentifier(warehouse),
-            image_repo_database_name=image_repo_db_id,
-            image_repo_schema_name=image_repo_schema_id,
-            image_repo_name=image_repo_id,
+            image_repo_name=image_repo,
             output_table_database_name=output_table_db_id,
             output_table_schema_name=output_table_schema_id,
             output_table_name=output_table_id,

snowflake-ml-python 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl

snowflake-ml-python 1.9.2py3-none-any.whl → 1.11.0py3-none-any.whl