PyPI - zenml-nightly - Versions diffs - 0.68.1.dev20241102__py3-none-any.whl → 0.68.1.dev20241106__py3-none-any.whl - Mend

zenml-nightly 0.68.1.dev20241102py3-none-any.whl → 0.68.1.dev20241106py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

zenml/integrations/vllm/model_deployers/vllm_model_deployer.py ADDED Viewed

@@ -0,0 +1,263 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Implementation of the vLLM Model Deployer."""
+import os
+import shutil
+from typing import ClassVar, Dict, Optional, Type, cast
+from uuid import UUID
+from zenml.config.global_config import GlobalConfiguration
+from zenml.constants import DEFAULT_SERVICE_START_STOP_TIMEOUT
+from zenml.integrations.vllm.flavors.vllm_model_deployer_flavor import (
+    VLLMModelDeployerConfig,
+    VLLMModelDeployerFlavor,
+)
+from zenml.integrations.vllm.services.vllm_deployment import (
+    VLLMDeploymentService,
+    VLLMServiceConfig,
+)
+from zenml.logger import get_logger
+from zenml.model_deployers import BaseModelDeployer, BaseModelDeployerFlavor
+from zenml.services.service import BaseService, ServiceConfig
+from zenml.utils.io_utils import create_dir_recursive_if_not_exists
+logger = get_logger(__name__)
+class VLLMModelDeployer(BaseModelDeployer):
+    """vLLM Inference Server."""
+    NAME: ClassVar[str] = "VLLM"
+    FLAVOR: ClassVar[Type[BaseModelDeployerFlavor]] = VLLMModelDeployerFlavor
+    _service_path: Optional[str] = None
+    @property
+    def config(self) -> VLLMModelDeployerConfig:
+        """Returns the `VLLMModelDeployerConfig` config.
+        Returns:
+            The configuration.
+        """
+        return cast(VLLMModelDeployerConfig, self._config)
+    @staticmethod
+    def get_service_path(id_: UUID) -> str:
+        """Get the path where local vLLM service information is stored.
+        This includes the deployment service configuration, PID and log files
+        are stored.
+        Args:
+            id_: The ID of the vLLM model deployer.
+        Returns:
+            The service path.
+        """
+        service_path = os.path.join(
+            GlobalConfiguration().local_stores_path,
+            str(id_),
+        )
+        create_dir_recursive_if_not_exists(service_path)
+        return service_path
+    @property
+    def local_path(self) -> str:
+        """Returns the path to the root directory.
+        This is where all configurations for vLLM deployment daemon processes
+        are stored.
+        If the service path is not set in the config by the user, the path is
+        set to a local default path according to the component ID.
+        Returns:
+            The path to the local service root directory.
+        """
+        if self._service_path is not None:
+            return self._service_path
+        if self.config.service_path:
+            self._service_path = self.config.service_path
+        else:
+            self._service_path = self.get_service_path(self.id)
+        create_dir_recursive_if_not_exists(self._service_path)
+        return self._service_path
+    @staticmethod
+    def get_model_server_info(  # type: ignore[override]
+        service_instance: "VLLMDeploymentService",
+    ) -> Dict[str, Optional[str]]:
+        """Return implementation specific information on the model server.
+        Args:
+            service_instance: vLLM deployment service object
+        Returns:
+            A dictionary containing the model server information.
+        """
+        return {
+            "HEALTH_CHECK_URL": service_instance.get_healthcheck_url(),
+            "PREDICTION_URL": service_instance.get_prediction_url(),
+            "SERVICE_PATH": service_instance.status.runtime_path,
+            "DAEMON_PID": str(service_instance.status.pid),
+        }
+    def perform_deploy_model(
+        self,
+        id: UUID,
+        config: ServiceConfig,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+    ) -> BaseService:
+        """Create a new vLLM deployment service or update an existing one.
+        This should serve the supplied model and deployment configuration.
+        This method has two modes of operation, depending on the `replace`
+        argument value:
+          * if `replace` is False, calling this method will create a new vLLM
+            deployment server to reflect the model and other configuration
+            parameters specified in the supplied vLLM service `config`.
+          * if `replace` is True, this method will first attempt to find an
+            existing vLLM deployment service that is *equivalent* to the
+            supplied configuration parameters. Two or more vLLM deployment
+            services are considered equivalent if they have the same
+            `pipeline_name`, `pipeline_step_name` and `model_name` configuration
+            parameters. To put it differently, two vLLM deployment services
+            are equivalent if they serve versions of the same model deployed by
+            the same pipeline step. If an equivalent vLLM deployment is found,
+            it will be updated in place to reflect the new configuration
+            parameters.
+        Callers should set `replace` to True if they want a continuous model
+        deployment workflow that doesn't spin up a new vLLM deployment
+        server for each new model version. If multiple equivalent vLLM
+        deployment servers are found, one is selected at random to be updated
+        and the others are deleted.
+        Args:
+            id: the UUID of the vLLM model deployer.
+            config: the configuration of the model to be deployed with vLLM.
+            timeout: the timeout in seconds to wait for the vLLM server
+                to be provisioned and successfully started or updated. If set
+                to 0, the method will return immediately after the vLLM
+                server is provisioned, without waiting for it to fully start.
+        Returns:
+            The ZenML vLLM deployment service object that can be used to
+            interact with the vLLM model http server.
+        """
+        config = cast(VLLMServiceConfig, config)
+        service = self._create_new_service(
+            id=id, timeout=timeout, config=config
+        )
+        logger.info(f"Created a new vLLM deployment service: {service}")
+        return service
+    def _clean_up_existing_service(
+        self,
+        timeout: int,
+        force: bool,
+        existing_service: VLLMDeploymentService,
+    ) -> None:
+        # stop the older service
+        existing_service.stop(timeout=timeout, force=force)
+        # delete the old configuration file
+        if existing_service.status.runtime_path:
+            shutil.rmtree(existing_service.status.runtime_path)
+    # the step will receive a config from the user that mentions the number
+    # of workers etc.the step implementation will create a new config using
+    # all values from the user and add values like pipeline name, model_uri
+    def _create_new_service(
+        self, id: UUID, timeout: int, config: VLLMServiceConfig
+    ) -> VLLMDeploymentService:
+        """Creates a new VLLMDeploymentService.
+        Args:
+            id: the ID of the vLLM deployment service to be created or updated.
+            timeout: the timeout in seconds to wait for the vLLM server
+                to be provisioned and successfully started or updated.
+            config: the configuration of the model to be deployed with vLLM.
+        Returns:
+            The VLLMDeploymentService object that can be used to interact
+            with the vLLM model server.
+        """
+        # set the root runtime path with the stack component's UUID
+        config.root_runtime_path = self.local_path
+        # create a new service for the new model
+        service = VLLMDeploymentService(uuid=id, config=config)
+        service.start(timeout=timeout)
+        return service
+    def perform_stop_model(
+        self,
+        service: BaseService,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+        force: bool = False,
+    ) -> BaseService:
+        """Method to stop a model server.
+        Args:
+            service: The service to stop.
+            timeout: Timeout in seconds to wait for the service to stop.
+            force: If True, force the service to stop.
+        Returns:
+            The stopped service.
+        """
+        service.stop(timeout=timeout, force=force)
+        return service
+    def perform_start_model(
+        self,
+        service: BaseService,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+    ) -> BaseService:
+        """Method to start a model server.
+        Args:
+            service: The service to start.
+            timeout: Timeout in seconds to wait for the service to start.
+        Returns:
+            The started service.
+        """
+        service.start(timeout=timeout)
+        return service
+    def perform_delete_model(
+        self,
+        service: BaseService,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+        force: bool = False,
+    ) -> None:
+        """Method to delete all configuration of a model server.
+        Args:
+            service: The service to delete.
+            timeout: Timeout in seconds to wait for the service to stop.
+            force: If True, force the service to stop.
+        """
+        service = cast(VLLMDeploymentService, service)
+        self._clean_up_existing_service(
+            existing_service=service, timeout=timeout, force=force
+        )

zenml/integrations/vllm/services/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Initialization of the vLLM Inference Server."""
+from zenml.integrations.vllm.services.vllm_deployment import (  # noqa
+    VLLMDeploymentService,
+    VLLMServiceConfig,
+)

zenml/integrations/vllm/services/vllm_deployment.py ADDED Viewed

@@ -0,0 +1,197 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Implementation of the vLLM Inference Server Service."""
+import os
+from typing import Any, List, Optional, Union
+from zenml.constants import DEFAULT_LOCAL_SERVICE_IP_ADDRESS
+from zenml.logger import get_logger
+from zenml.services import (
+    HTTPEndpointHealthMonitor,
+    HTTPEndpointHealthMonitorConfig,
+    LocalDaemonService,
+    LocalDaemonServiceConfig,
+    LocalDaemonServiceEndpoint,
+    LocalDaemonServiceEndpointConfig,
+    ServiceEndpointProtocol,
+    ServiceType,
+)
+from zenml.services.service import BaseDeploymentService
+logger = get_logger(__name__)
+VLLM_PREDICTION_URL_PATH = "v1"
+VLLM_HEALTHCHECK_URL_PATH = "health"
+class VLLMDeploymentEndpointConfig(LocalDaemonServiceEndpointConfig):
+    """vLLM deployment service configuration.
+    Attributes:
+        prediction_url_path: URI subpath for prediction requests
+    """
+    prediction_url_path: str
+class VLLMDeploymentEndpoint(LocalDaemonServiceEndpoint):
+    """A service endpoint exposed by the vLLM deployment daemon.
+    Attributes:
+        config: service endpoint configuration
+    """
+    config: VLLMDeploymentEndpointConfig
+    monitor: HTTPEndpointHealthMonitor
+    @property
+    def prediction_url(self) -> Optional[str]:
+        """Gets the prediction URL for the endpoint.
+        Returns:
+            the prediction URL for the endpoint
+        """
+        uri = self.status.uri
+        if not uri:
+            return None
+        return os.path.join(uri, self.config.prediction_url_path)
+class VLLMServiceConfig(LocalDaemonServiceConfig):
+    """vLLM service configurations."""
+    model: str
+    port: int
+    host: Optional[str] = None
+    blocking: bool = True
+    # If unspecified, model name or path will be used.
+    tokenizer: Optional[str] = None
+    served_model_name: Optional[Union[str, List[str]]] = None
+    # Trust remote code from huggingface.
+    trust_remote_code: Optional[bool] = False
+    # ['auto', 'slow', 'mistral']
+    tokenizer_mode: Optional[str] = "auto"
+    # ['auto', 'half', 'float16', 'bfloat16', 'float', 'float32']
+    dtype: Optional[str] = "auto"
+    # The specific model version to use. It can be a branch name, a tag name, or a commit id.
+    # If unspecified, will use the default version.
+    revision: Optional[str] = None
+class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
+    """vLLM Inference Server Deployment Service."""
+    SERVICE_TYPE = ServiceType(
+        name="vllm-deployment",
+        type="model-serving",
+        flavor="vllm",
+        description="vLLM Inference prediction service",
+    )
+    config: VLLMServiceConfig
+    endpoint: VLLMDeploymentEndpoint
+    def __init__(self, config: VLLMServiceConfig, **attrs: Any):
+        """Initialize the vLLM deployment service.
+        Args:
+            config: service configuration
+            attrs: additional attributes to set on the service
+        """
+        if isinstance(config, VLLMServiceConfig) and "endpoint" not in attrs:
+            endpoint = VLLMDeploymentEndpoint(
+                config=VLLMDeploymentEndpointConfig(
+                    protocol=ServiceEndpointProtocol.HTTP,
+                    port=config.port,
+                    ip_address=config.host or DEFAULT_LOCAL_SERVICE_IP_ADDRESS,
+                    prediction_url_path=VLLM_PREDICTION_URL_PATH,
+                ),
+                monitor=HTTPEndpointHealthMonitor(
+                    config=HTTPEndpointHealthMonitorConfig(
+                        healthcheck_uri_path=VLLM_HEALTHCHECK_URL_PATH,
+                    )
+                ),
+            )
+            attrs["endpoint"] = endpoint
+        super().__init__(config=config, **attrs)
+    def run(self) -> None:
+        """Start the service."""
+        logger.info(
+            "Starting vLLM inference server service as blocking "
+            "process... press CTRL+C once to stop it."
+        )
+        self.endpoint.prepare_for_start()
+        import uvloop
+        from vllm.entrypoints.openai.api_server import run_server
+        from vllm.entrypoints.openai.cli_args import make_arg_parser
+        from vllm.utils import FlexibleArgumentParser
+        try:
+            parser = make_arg_parser(FlexibleArgumentParser())
+            args = parser.parse_args()
+            # Override port with the available port
+            self.config.port = self.endpoint.status.port
+            # Update the arguments in place
+            args.__dict__.update(self.config.model_dump())
+            uvloop.run(run_server(args=args))
+        except KeyboardInterrupt:
+            logger.info("Stopping vLLM prediction service...")
+    @property
+    def prediction_url(self) -> Optional[str]:
+        """Gets the prediction URL for the endpoint.
+        Returns:
+            the prediction URL for the endpoint
+        """
+        if not self.is_running:
+            return None
+        return self.endpoint.prediction_url_path
+    def predict(self, data: "Any") -> "Any":
+        """Make a prediction using the service.
+        Args:
+            data: data to make a prediction on
+        Returns:
+            The prediction result.
+        Raises:
+            Exception: if the service is not running
+            ValueError: if the prediction endpoint is unknown.
+        """
+        if not self.is_running:
+            raise Exception(
+                "vLLM Inference service is not running. "
+                "Please start the service before making predictions."
+            )
+        if self.endpoint.prediction_url is not None:
+            from openai import OpenAI
+            client = OpenAI(
+                api_key="EMPTY",
+                base_url=self.endpoint.prediction_url,
+            )
+            models = client.models.list()
+            model = models.data[0].id
+            result = client.completions.create(model=model, prompt=data)
+            # TODO: We can add support for client.chat.completions.create
+        else:
+            raise ValueError("No endpoint known for prediction.")
+        return result

zenml/integrations/whylogs/materializers/whylogs_materializer.py CHANGED Viewed

@@ -14,7 +14,6 @@
 """Implementation of the whylogs materializer."""
 import os
-import tempfile
 from typing import Any, ClassVar, Dict, Tuple, Type, cast
 from whylogs.core import DatasetProfileView  # type: ignore
@@ -51,18 +50,14 @@ class WhylogsMaterializer(BaseMaterializer):
         """
         filepath = os.path.join(self.uri, PROFILE_FILENAME)
-        # Create a temporary folder
-        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
-        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
+        with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
+            temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
-        # Copy from artifact store to temporary file
-        fileio.copy(filepath, temp_file)
-        profile_view = DatasetProfileView.read(temp_file)
+            # Copy from artifact store to temporary file
+            fileio.copy(filepath, temp_file)
+            profile_view = DatasetProfileView.read(temp_file)
-        # Cleanup and return
-        fileio.rmtree(temp_dir)
-        return profile_view
+            return profile_view
     def save(self, profile_view: DatasetProfileView) -> None:
         """Writes a whylogs dataset profile view.
@@ -72,15 +67,13 @@ class WhylogsMaterializer(BaseMaterializer):
         """
         filepath = os.path.join(self.uri, PROFILE_FILENAME)
-        # Create a temporary folder
-        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
-        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
+        with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
+            temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
-        profile_view.write(temp_file)
+            profile_view.write(temp_file)
-        # Copy it into artifact store
-        fileio.copy(temp_file, filepath)
-        fileio.rmtree(temp_dir)
+            # Copy it into artifact store
+            fileio.copy(temp_file, filepath)
         try:
             self._upload_to_whylabs(profile_view)

zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py CHANGED Viewed

@@ -14,7 +14,6 @@
 """Implementation of an XGBoost booster materializer."""
 import os
-import tempfile
 from typing import Any, ClassVar, Tuple, Type
 import xgboost as xgb
@@ -43,18 +42,15 @@ class XgboostBoosterMaterializer(BaseMaterializer):
         """
         filepath = os.path.join(self.uri, DEFAULT_FILENAME)
-        # Create a temporary folder
-        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
-        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
+        with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
+            temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
-        # Copy from artifact store to temporary file
-        fileio.copy(filepath, temp_file)
-        booster = xgb.Booster()
-        booster.load_model(temp_file)
+            # Copy from artifact store to temporary file
+            fileio.copy(filepath, temp_file)
+            booster = xgb.Booster()
+            booster.load_model(temp_file)
-        # Cleanup and return
-        fileio.rmtree(temp_dir)
-        return booster
+            return booster
     def save(self, booster: xgb.Booster) -> None:
         """Creates a JSON serialization for a xgboost Booster model.
@@ -64,14 +60,7 @@ class XgboostBoosterMaterializer(BaseMaterializer):
         """
         filepath = os.path.join(self.uri, DEFAULT_FILENAME)
-        # Make a temporary phantom artifact
-        with tempfile.NamedTemporaryFile(
-            mode="w", suffix=".json", delete=False
-        ) as f:
-            booster.save_model(f.name)
-            # Copy it into artifact store
-            fileio.copy(f.name, filepath)
-        # Close and remove the temporary file
-        f.close()
-        fileio.remove(f.name)
+        with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
+            temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
+            booster.save_model(temp_file)
+            fileio.copy(temp_file, filepath)

zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py CHANGED Viewed

@@ -14,7 +14,6 @@
 """Implementation of the XGBoost dmatrix materializer."""
 import os
-import tempfile
 from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
 import xgboost as xgb
@@ -46,17 +45,14 @@ class XgboostDMatrixMaterializer(BaseMaterializer):
         """
         filepath = os.path.join(self.uri, DEFAULT_FILENAME)
-        # Create a temporary folder
-        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
-        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
+        with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
+            temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
-        # Copy from artifact store to temporary file
-        fileio.copy(filepath, temp_file)
-        matrix = xgb.DMatrix(temp_file)
+            # Copy from artifact store to temporary file
+            fileio.copy(filepath, temp_file)
+            matrix = xgb.DMatrix(temp_file)
-        # Cleanup and return
-        fileio.rmtree(temp_dir)
-        return matrix
+            return matrix
     def save(self, matrix: xgb.DMatrix) -> None:
         """Creates a binary serialization for a xgboost.DMatrix object.
@@ -66,15 +62,10 @@ class XgboostDMatrixMaterializer(BaseMaterializer):
         """
         filepath = os.path.join(self.uri, DEFAULT_FILENAME)
-        # Make a temporary phantom artifact
-        with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f:
-            matrix.save_binary(f.name)
-            # Copy it into artifact store
-            fileio.copy(f.name, filepath)
-        # Close and remove the temporary file
-        f.close()
-        fileio.remove(f.name)
+        with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
+            temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
+            matrix.save_binary(temp_file)
+            fileio.copy(temp_file, filepath)
     def extract_metadata(
         self, dataset: xgb.DMatrix

zenml-nightly 0.68.1.dev20241102__py3-none-any.whl → 0.68.1.dev20241106__py3-none-any.whl

zenml-nightly 0.68.1.dev20241102py3-none-any.whl → 0.68.1.dev20241106py3-none-any.whl