PyPI - zenml-nightly - Versions diffs - 0.68.1.dev20241103__py3-none-any.whl → 0.68.1.dev20241105__py3-none-any.whl - Mend

zenml-nightly 0.68.1.dev20241103py3-none-any.whl → 0.68.1.dev20241105py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

zenml/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.68.1.~~dev20241103~~
1	+ 0.68.1.dev20241105

zenml/integrations/__init__.py CHANGED Viewed

@@ -45,6 +45,7 @@ from zenml.integrations.kubernetes import KubernetesIntegration  # noqa
 from zenml.integrations.label_studio import LabelStudioIntegration  # noqa
 from zenml.integrations.langchain import LangchainIntegration  # noqa
 from zenml.integrations.lightgbm import LightGBMIntegration  # noqa
 # from zenml.integrations.llama_index import LlamaIndexIntegration  # noqa
 from zenml.integrations.mlflow import MlflowIntegration  # noqa
 from zenml.integrations.neptune import NeptuneIntegration  # noqa
@@ -52,7 +53,7 @@ from zenml.integrations.neural_prophet import NeuralProphetIntegration  # noqa
 from zenml.integrations.numpy import NumpyIntegration  # noqa
 from zenml.integrations.openai import OpenAIIntegration  # noqa
 from zenml.integrations.pandas import PandasIntegration  # noqa
-from zenml.integrations.pigeon import PigeonIntegration # noqa
+from zenml.integrations.pigeon import PigeonIntegration  # noqa
 from zenml.integrations.pillow import PillowIntegration  # noqa
 from zenml.integrations.polars import PolarsIntegration  # noqa
 from zenml.integrations.prodigy import ProdigyIntegration  # noqa
@@ -78,3 +79,4 @@ from zenml.integrations.tensorflow import TensorflowIntegration  # noqa
 from zenml.integrations.wandb import WandbIntegration  # noqa
 from zenml.integrations.whylogs import WhylogsIntegration  # noqa
 from zenml.integrations.xgboost import XgboostIntegration  # noqa
+from zenml.integrations.vllm import VLLMIntegration  # noqa

zenml/integrations/constants.py CHANGED Viewed

@@ -76,4 +76,5 @@ WANDB = "wandb"
 VERTEX = "vertex"
 XGBOOST = "xgboost"
 VAULT = "vault"
+VLLM = "vllm"
 LIGHTNING = "lightning"

zenml/integrations/langchain/__init__.py CHANGED Viewed

@@ -26,7 +26,8 @@ class LangchainIntegration(Integration):
     NAME = LANGCHAIN
     REQUIREMENTS = [
-        "langchain==0.0.325",
+        "langchain==0.3.0",
+        "langchain-community",
         "pyyaml>=6.0.1",
         "tenacity!=8.4.0",  # https://github.com/jd/tenacity/issues/471
     ]

zenml/integrations/langchain/materializers/openai_embedding_materializer.py CHANGED Viewed

@@ -24,11 +24,37 @@ from zenml.materializers.cloudpickle_materializer import (
 if TYPE_CHECKING and sys.version_info < (3, 8):
     OpenAIEmbeddings = Any
 else:
-    from langchain.embeddings import OpenAIEmbeddings
+    from langchain_community.embeddings import (
+        OpenAIEmbeddings,
+    )
 class LangchainOpenaiEmbeddingMaterializer(CloudpickleMaterializer):
-    """Handle langchain OpenAI embedding objects."""
+    """Materializer for Langchain OpenAI Embeddings."""
     ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
     ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (OpenAIEmbeddings,)
+    def save(self, embeddings: Any) -> None:
+        """Saves the embeddings model after clearing non-picklable clients.
+        Args:
+            embeddings: The embeddings model to save.
+        """
+        # Clear the clients which will be recreated on load
+        embeddings.client = None
+        embeddings.async_client = None
+        # Use the parent class's save implementation which uses cloudpickle
+        super().save(embeddings)
+    def load(self, data_type: Type[Any]) -> Any:
+        """Loads the embeddings model and lets it recreate clients when needed.
+        Args:
+            data_type: The type of the data to load.
+        Returns:
+            The loaded embeddings model.
+        """
+        return super().load(data_type)

zenml/integrations/openai/__init__.py CHANGED Viewed

@@ -21,7 +21,7 @@ class OpenAIIntegration(Integration):
     """Definition of OpenAI integration for ZenML."""
     NAME = OPEN_AI
-    REQUIREMENTS = ["openai>=0.27.0,<1.0.0"]
+    REQUIREMENTS = ["openai>=1.0.0"]
 OpenAIIntegration.check_installation()

zenml/integrations/openai/hooks/open_ai_failure_hook.py CHANGED Viewed

@@ -15,8 +15,9 @@
 import io
 import sys
+from typing import Optional
-import openai
+from openai import OpenAI
 from rich.console import Console
 from zenml import get_step_context
@@ -38,6 +39,8 @@ def openai_alerter_failure_hook_helper(
     Args:
         exception: The exception that was raised.
         model_name: The OpenAI model to use for the chatbot.
+    This implementation uses the OpenAI v1 SDK with automatic retries and backoff.
     """
     client = Client()
     context = get_step_context()
@@ -47,12 +50,15 @@ def openai_alerter_failure_hook_helper(
         openai_secret = client.get_secret(
             "openai", allow_partial_name_match=False
         )
-        openai_api_key = openai_secret.secret_values.get("api_key")
+        openai_api_key: Optional[str] = openai_secret.secret_values.get(
+            "api_key"
+        )
     except (KeyError, NotImplementedError):
         openai_api_key = None
     alerter = client.active_stack.alerter
     if alerter and openai_api_key:
+        # Capture rich traceback
         output_captured = io.StringIO()
         original_stdout = sys.stdout
         sys.stdout = output_captured
@@ -62,25 +68,44 @@ def openai_alerter_failure_hook_helper(
         sys.stdout = original_stdout
         rich_traceback = output_captured.getvalue()
-        response = openai.ChatCompletion.create(  # type: ignore
+        # Initialize OpenAI client with timeout and retry settings
+        openai_client = OpenAI(
+            api_key=openai_api_key,
+            max_retries=3,  # Will retry 3 times with exponential backoff
+            timeout=60.0,  # 60 second timeout
+        )
+        # Create chat completion using the new client pattern
+        response = openai_client.chat.completions.create(
             model=model_name,
             messages=[
                 {
                     "role": "user",
-                    "content": f"This is an error message (following an exception of type '{type(exception)}') I encountered while executing a ZenML step. Please suggest ways I might fix the problem. Feel free to give code snippets as examples, and note that your response will be piped to a Slack bot so make sure the formatting is appropriate: {exception} -- {rich_traceback}. Thank you!",
+                    "content": f"This is an error message (following an exception of type '{type(exception)}') "
+                    f"I encountered while executing a ZenML step. Please suggest ways I might fix the problem. "
+                    f"Feel free to give code snippets as examples, and note that your response will be piped "
+                    f"to a Slack bot so make sure the formatting is appropriate: {exception} -- {rich_traceback}. "
+                    f"Thank you!",
                 }
             ],
         )
-        suggestion = response["choices"][0]["message"]["content"]
-        message = "*Failure Hook Notification! Step failed!*" + "\n\n"
-        message += f"Run name: `{context.pipeline_run.name}`" + "\n"
-        message += f"Step name: `{context.step_run.name}`" + "\n"
-        message += f"Parameters: `{context.step_run.config.parameters}`" + "\n"
-        message += f"Exception: `({type(exception)}) {exception}`" + "\n\n"
-        message += (
-            f"*OpenAI ChatGPT's suggestion (model = `{model_name}`) on how to fix it:*\n `{suggestion}`"
-            + "\n"
+        suggestion = response.choices[0].message.content
+        # Format the alert message
+        message = "\n".join(
+            [
+                "*Failure Hook Notification! Step failed!*",
+                "",
+                f"Run name: `{context.pipeline_run.name}`",
+                f"Step name: `{context.step_run.name}`",
+                f"Parameters: `{context.step_run.config.parameters}`",
+                f"Exception: `({type(exception)}) {exception}`",
+                "",
+                f"*OpenAI ChatGPT's suggestion (model = `{model_name}`) on how to fix it:*\n `{suggestion}`",
+            ]
         )
         alerter.post(message)
     elif not openai_api_key:
         logger.warning(
@@ -111,4 +136,4 @@ def openai_gpt4_alerter_failure_hook(
     Args:
         exception: The exception that was raised.
     """
-    openai_alerter_failure_hook_helper(exception, "gpt-4")
+    openai_alerter_failure_hook_helper(exception, "gpt-4o")

zenml/integrations/vllm/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Initialization for the ZenML vLLM integration."""
+from typing import List, Type
+from zenml.integrations.integration import Integration
+from zenml.stack import Flavor
+from zenml.logger import get_logger
+from zenml.integrations.constants import VLLM
+VLLM_MODEL_DEPLOYER = "vllm"
+logger = get_logger(__name__)
+class VLLMIntegration(Integration):
+    """Definition of vLLM integration for ZenML."""
+    NAME = VLLM
+    REQUIREMENTS = ["vllm>=0.6.0,<0.7.0", "openai>=1.0.0"]
+    @classmethod
+    def activate(cls) -> None:
+        """Activates the integration."""
+        from zenml.integrations.vllm import services
+    @classmethod
+    def flavors(cls) -> List[Type[Flavor]]:
+        """Declare the stack component flavors for the vLLM integration.
+        Returns:
+            List of stack component flavors for this integration.
+        """
+        from zenml.integrations.vllm.flavors import VLLMModelDeployerFlavor
+        return [VLLMModelDeployerFlavor]
+VLLMIntegration.check_installation()

zenml/integrations/vllm/flavors/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""vLLM integration flavors."""
+from zenml.integrations.vllm.flavors.vllm_model_deployer_flavor import (  # noqa
+    VLLMModelDeployerConfig,
+    VLLMModelDeployerFlavor,
+)
+__all__ = ["VLLMModelDeployerConfig", "VLLMModelDeployerFlavor"]

zenml/integrations/vllm/flavors/vllm_model_deployer_flavor.py ADDED Viewed

@@ -0,0 +1,91 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""vLLM model deployer flavor."""
+from typing import TYPE_CHECKING, Optional, Type
+from zenml.integrations.vllm import VLLM_MODEL_DEPLOYER
+from zenml.model_deployers.base_model_deployer import (
+    BaseModelDeployerConfig,
+    BaseModelDeployerFlavor,
+)
+if TYPE_CHECKING:
+    from zenml.integrations.vllm.model_deployers import VLLMModelDeployer
+class VLLMModelDeployerConfig(BaseModelDeployerConfig):
+    """Configuration for vLLM Inference model deployer."""
+    service_path: str = ""
+class VLLMModelDeployerFlavor(BaseModelDeployerFlavor):
+    """vLLM model deployer flavor."""
+    @property
+    def name(self) -> str:
+        """Name of the flavor.
+        Returns:
+            The name of the flavor.
+        """
+        return VLLM_MODEL_DEPLOYER
+    @property
+    def docs_url(self) -> Optional[str]:
+        """A url to point at docs explaining this flavor.
+        Returns:
+            A flavor docs url.
+        """
+        return self.generate_default_docs_url()
+    @property
+    def sdk_docs_url(self) -> Optional[str]:
+        """A url to point at SDK docs explaining this flavor.
+        Returns:
+            A flavor SDK docs url.
+        """
+        return self.generate_default_sdk_docs_url()
+    @property
+    def logo_url(self) -> str:
+        """A url to represent the flavor in the dashboard.
+        Returns:
+            The flavor logo.
+        """
+        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/model_deployer/vllm.png"
+    @property
+    def config_class(self) -> Type[VLLMModelDeployerConfig]:
+        """Returns `VLLMModelDeployerConfig` config class.
+        Returns:
+            The config class.
+        """
+        return VLLMModelDeployerConfig
+    @property
+    def implementation_class(self) -> Type["VLLMModelDeployer"]:
+        """Implementation class for this flavor.
+        Returns:
+            The implementation class.
+        """
+        from zenml.integrations.vllm.model_deployers import VLLMModelDeployer
+        return VLLMModelDeployer

zenml/integrations/vllm/model_deployers/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Initialization of the vLLM model deployers."""
+from zenml.integrations.vllm.model_deployers.vllm_model_deployer import (  # noqa
+    VLLMModelDeployer,
+)
+__all__ = ["VLLMModelDeployer"]

zenml/integrations/vllm/model_deployers/vllm_model_deployer.py ADDED Viewed

@@ -0,0 +1,263 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Implementation of the vLLM Model Deployer."""
+import os
+import shutil
+from typing import ClassVar, Dict, Optional, Type, cast
+from uuid import UUID
+from zenml.config.global_config import GlobalConfiguration
+from zenml.constants import DEFAULT_SERVICE_START_STOP_TIMEOUT
+from zenml.integrations.vllm.flavors.vllm_model_deployer_flavor import (
+    VLLMModelDeployerConfig,
+    VLLMModelDeployerFlavor,
+)
+from zenml.integrations.vllm.services.vllm_deployment import (
+    VLLMDeploymentService,
+    VLLMServiceConfig,
+)
+from zenml.logger import get_logger
+from zenml.model_deployers import BaseModelDeployer, BaseModelDeployerFlavor
+from zenml.services.service import BaseService, ServiceConfig
+from zenml.utils.io_utils import create_dir_recursive_if_not_exists
+logger = get_logger(__name__)
+class VLLMModelDeployer(BaseModelDeployer):
+    """vLLM Inference Server."""
+    NAME: ClassVar[str] = "VLLM"
+    FLAVOR: ClassVar[Type[BaseModelDeployerFlavor]] = VLLMModelDeployerFlavor
+    _service_path: Optional[str] = None
+    @property
+    def config(self) -> VLLMModelDeployerConfig:
+        """Returns the `VLLMModelDeployerConfig` config.
+        Returns:
+            The configuration.
+        """
+        return cast(VLLMModelDeployerConfig, self._config)
+    @staticmethod
+    def get_service_path(id_: UUID) -> str:
+        """Get the path where local vLLM service information is stored.
+        This includes the deployment service configuration, PID and log files
+        are stored.
+        Args:
+            id_: The ID of the vLLM model deployer.
+        Returns:
+            The service path.
+        """
+        service_path = os.path.join(
+            GlobalConfiguration().local_stores_path,
+            str(id_),
+        )
+        create_dir_recursive_if_not_exists(service_path)
+        return service_path
+    @property
+    def local_path(self) -> str:
+        """Returns the path to the root directory.
+        This is where all configurations for vLLM deployment daemon processes
+        are stored.
+        If the service path is not set in the config by the user, the path is
+        set to a local default path according to the component ID.
+        Returns:
+            The path to the local service root directory.
+        """
+        if self._service_path is not None:
+            return self._service_path
+        if self.config.service_path:
+            self._service_path = self.config.service_path
+        else:
+            self._service_path = self.get_service_path(self.id)
+        create_dir_recursive_if_not_exists(self._service_path)
+        return self._service_path
+    @staticmethod
+    def get_model_server_info(  # type: ignore[override]
+        service_instance: "VLLMDeploymentService",
+    ) -> Dict[str, Optional[str]]:
+        """Return implementation specific information on the model server.
+        Args:
+            service_instance: vLLM deployment service object
+        Returns:
+            A dictionary containing the model server information.
+        """
+        return {
+            "HEALTH_CHECK_URL": service_instance.get_healthcheck_url(),
+            "PREDICTION_URL": service_instance.get_prediction_url(),
+            "SERVICE_PATH": service_instance.status.runtime_path,
+            "DAEMON_PID": str(service_instance.status.pid),
+        }
+    def perform_deploy_model(
+        self,
+        id: UUID,
+        config: ServiceConfig,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+    ) -> BaseService:
+        """Create a new vLLM deployment service or update an existing one.
+        This should serve the supplied model and deployment configuration.
+        This method has two modes of operation, depending on the `replace`
+        argument value:
+          * if `replace` is False, calling this method will create a new vLLM
+            deployment server to reflect the model and other configuration
+            parameters specified in the supplied vLLM service `config`.
+          * if `replace` is True, this method will first attempt to find an
+            existing vLLM deployment service that is *equivalent* to the
+            supplied configuration parameters. Two or more vLLM deployment
+            services are considered equivalent if they have the same
+            `pipeline_name`, `pipeline_step_name` and `model_name` configuration
+            parameters. To put it differently, two vLLM deployment services
+            are equivalent if they serve versions of the same model deployed by
+            the same pipeline step. If an equivalent vLLM deployment is found,
+            it will be updated in place to reflect the new configuration
+            parameters.
+        Callers should set `replace` to True if they want a continuous model
+        deployment workflow that doesn't spin up a new vLLM deployment
+        server for each new model version. If multiple equivalent vLLM
+        deployment servers are found, one is selected at random to be updated
+        and the others are deleted.
+        Args:
+            id: the UUID of the vLLM model deployer.
+            config: the configuration of the model to be deployed with vLLM.
+            timeout: the timeout in seconds to wait for the vLLM server
+                to be provisioned and successfully started or updated. If set
+                to 0, the method will return immediately after the vLLM
+                server is provisioned, without waiting for it to fully start.
+        Returns:
+            The ZenML vLLM deployment service object that can be used to
+            interact with the vLLM model http server.
+        """
+        config = cast(VLLMServiceConfig, config)
+        service = self._create_new_service(
+            id=id, timeout=timeout, config=config
+        )
+        logger.info(f"Created a new vLLM deployment service: {service}")
+        return service
+    def _clean_up_existing_service(
+        self,
+        timeout: int,
+        force: bool,
+        existing_service: VLLMDeploymentService,
+    ) -> None:
+        # stop the older service
+        existing_service.stop(timeout=timeout, force=force)
+        # delete the old configuration file
+        if existing_service.status.runtime_path:
+            shutil.rmtree(existing_service.status.runtime_path)
+    # the step will receive a config from the user that mentions the number
+    # of workers etc.the step implementation will create a new config using
+    # all values from the user and add values like pipeline name, model_uri
+    def _create_new_service(
+        self, id: UUID, timeout: int, config: VLLMServiceConfig
+    ) -> VLLMDeploymentService:
+        """Creates a new VLLMDeploymentService.
+        Args:
+            id: the ID of the vLLM deployment service to be created or updated.
+            timeout: the timeout in seconds to wait for the vLLM server
+                to be provisioned and successfully started or updated.
+            config: the configuration of the model to be deployed with vLLM.
+        Returns:
+            The VLLMDeploymentService object that can be used to interact
+            with the vLLM model server.
+        """
+        # set the root runtime path with the stack component's UUID
+        config.root_runtime_path = self.local_path
+        # create a new service for the new model
+        service = VLLMDeploymentService(uuid=id, config=config)
+        service.start(timeout=timeout)
+        return service
+    def perform_stop_model(
+        self,
+        service: BaseService,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+        force: bool = False,
+    ) -> BaseService:
+        """Method to stop a model server.
+        Args:
+            service: The service to stop.
+            timeout: Timeout in seconds to wait for the service to stop.
+            force: If True, force the service to stop.
+        Returns:
+            The stopped service.
+        """
+        service.stop(timeout=timeout, force=force)
+        return service
+    def perform_start_model(
+        self,
+        service: BaseService,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+    ) -> BaseService:
+        """Method to start a model server.
+        Args:
+            service: The service to start.
+            timeout: Timeout in seconds to wait for the service to start.
+        Returns:
+            The started service.
+        """
+        service.start(timeout=timeout)
+        return service
+    def perform_delete_model(
+        self,
+        service: BaseService,
+        timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
+        force: bool = False,
+    ) -> None:
+        """Method to delete all configuration of a model server.
+        Args:
+            service: The service to delete.
+            timeout: Timeout in seconds to wait for the service to stop.
+            force: If True, force the service to stop.
+        """
+        service = cast(VLLMDeploymentService, service)
+        self._clean_up_existing_service(
+            existing_service=service, timeout=timeout, force=force
+        )

zenml/integrations/vllm/services/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Initialization of the vLLM Inference Server."""
+from zenml.integrations.vllm.services.vllm_deployment import (  # noqa
+    VLLMDeploymentService,
+    VLLMServiceConfig,
+)

zenml/integrations/vllm/services/vllm_deployment.py ADDED Viewed

@@ -0,0 +1,197 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Implementation of the vLLM Inference Server Service."""
+import os
+from typing import Any, List, Optional, Union
+from zenml.constants import DEFAULT_LOCAL_SERVICE_IP_ADDRESS
+from zenml.logger import get_logger
+from zenml.services import (
+    HTTPEndpointHealthMonitor,
+    HTTPEndpointHealthMonitorConfig,
+    LocalDaemonService,
+    LocalDaemonServiceConfig,
+    LocalDaemonServiceEndpoint,
+    LocalDaemonServiceEndpointConfig,
+    ServiceEndpointProtocol,
+    ServiceType,
+)
+from zenml.services.service import BaseDeploymentService
+logger = get_logger(__name__)
+VLLM_PREDICTION_URL_PATH = "v1"
+VLLM_HEALTHCHECK_URL_PATH = "health"
+class VLLMDeploymentEndpointConfig(LocalDaemonServiceEndpointConfig):
+    """vLLM deployment service configuration.
+    Attributes:
+        prediction_url_path: URI subpath for prediction requests
+    """
+    prediction_url_path: str
+class VLLMDeploymentEndpoint(LocalDaemonServiceEndpoint):
+    """A service endpoint exposed by the vLLM deployment daemon.
+    Attributes:
+        config: service endpoint configuration
+    """
+    config: VLLMDeploymentEndpointConfig
+    monitor: HTTPEndpointHealthMonitor
+    @property
+    def prediction_url(self) -> Optional[str]:
+        """Gets the prediction URL for the endpoint.
+        Returns:
+            the prediction URL for the endpoint
+        """
+        uri = self.status.uri
+        if not uri:
+            return None
+        return os.path.join(uri, self.config.prediction_url_path)
+class VLLMServiceConfig(LocalDaemonServiceConfig):
+    """vLLM service configurations."""
+    model: str
+    port: int
+    host: Optional[str] = None
+    blocking: bool = True
+    # If unspecified, model name or path will be used.
+    tokenizer: Optional[str] = None
+    served_model_name: Optional[Union[str, List[str]]] = None
+    # Trust remote code from huggingface.
+    trust_remote_code: Optional[bool] = False
+    # ['auto', 'slow', 'mistral']
+    tokenizer_mode: Optional[str] = "auto"
+    # ['auto', 'half', 'float16', 'bfloat16', 'float', 'float32']
+    dtype: Optional[str] = "auto"
+    # The specific model version to use. It can be a branch name, a tag name, or a commit id.
+    # If unspecified, will use the default version.
+    revision: Optional[str] = None
+class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
+    """vLLM Inference Server Deployment Service."""
+    SERVICE_TYPE = ServiceType(
+        name="vllm-deployment",
+        type="model-serving",
+        flavor="vllm",
+        description="vLLM Inference prediction service",
+    )
+    config: VLLMServiceConfig
+    endpoint: VLLMDeploymentEndpoint
+    def __init__(self, config: VLLMServiceConfig, **attrs: Any):
+        """Initialize the vLLM deployment service.
+        Args:
+            config: service configuration
+            attrs: additional attributes to set on the service
+        """
+        if isinstance(config, VLLMServiceConfig) and "endpoint" not in attrs:
+            endpoint = VLLMDeploymentEndpoint(
+                config=VLLMDeploymentEndpointConfig(
+                    protocol=ServiceEndpointProtocol.HTTP,
+                    port=config.port,
+                    ip_address=config.host or DEFAULT_LOCAL_SERVICE_IP_ADDRESS,
+                    prediction_url_path=VLLM_PREDICTION_URL_PATH,
+                ),
+                monitor=HTTPEndpointHealthMonitor(
+                    config=HTTPEndpointHealthMonitorConfig(
+                        healthcheck_uri_path=VLLM_HEALTHCHECK_URL_PATH,
+                    )
+                ),
+            )
+            attrs["endpoint"] = endpoint
+        super().__init__(config=config, **attrs)
+    def run(self) -> None:
+        """Start the service."""
+        logger.info(
+            "Starting vLLM inference server service as blocking "
+            "process... press CTRL+C once to stop it."
+        )
+        self.endpoint.prepare_for_start()
+        import uvloop
+        from vllm.entrypoints.openai.api_server import run_server
+        from vllm.entrypoints.openai.cli_args import make_arg_parser
+        from vllm.utils import FlexibleArgumentParser
+        try:
+            parser = make_arg_parser(FlexibleArgumentParser())
+            args = parser.parse_args()
+            # Override port with the available port
+            self.config.port = self.endpoint.status.port
+            # Update the arguments in place
+            args.__dict__.update(self.config.model_dump())
+            uvloop.run(run_server(args=args))
+        except KeyboardInterrupt:
+            logger.info("Stopping vLLM prediction service...")
+    @property
+    def prediction_url(self) -> Optional[str]:
+        """Gets the prediction URL for the endpoint.
+        Returns:
+            the prediction URL for the endpoint
+        """
+        if not self.is_running:
+            return None
+        return self.endpoint.prediction_url_path
+    def predict(self, data: "Any") -> "Any":
+        """Make a prediction using the service.
+        Args:
+            data: data to make a prediction on
+        Returns:
+            The prediction result.
+        Raises:
+            Exception: if the service is not running
+            ValueError: if the prediction endpoint is unknown.
+        """
+        if not self.is_running:
+            raise Exception(
+                "vLLM Inference service is not running. "
+                "Please start the service before making predictions."
+            )
+        if self.endpoint.prediction_url is not None:
+            from openai import OpenAI
+            client = OpenAI(
+                api_key="EMPTY",
+                base_url=self.endpoint.prediction_url,
+            )
+            models = client.models.list()
+            model = models.data[0].id
+            result = client.completions.create(model=model, prompt=data)
+            # TODO: We can add support for client.chat.completions.create
+        else:
+            raise ValueError("No endpoint known for prediction.")
+        return result

{zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241105.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: zenml-nightly
-Version: 0.68.1.dev20241103
+Version: 0.68.1.dev20241105
 Summary: ZenML: Write production-ready ML code.
 Home-page: https://zenml.io
 License: Apache-2.0

{zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241105.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ RELEASE_NOTES.md,sha256=oShLQurhMKncKnc_y7tiasEfgy1aCOOjxdpax-MlGI8,381641
 ROADMAP.md,sha256=hiLSmr16BH8Dfx7SaQM4JcXCGCVl6mFZPFAwJeDTrJU,407
 SECURITY.md,sha256=9DepA8y03yvCZLHEfcXLTDH4lUyKHquAdukBsccNN7c,682
 zenml/README.md,sha256=827dekbOWAs1BpW7VF1a4d7EbwPbjwccX-2zdXBENZo,1777
-zenml/VERSION,sha256=eubi74vBWO1UFDdJBryWp4s0EJ47tBtNYTJp9JhwYZk,19
+zenml/VERSION,sha256=96VhSJ-qEwmgM7eMLpR1roireLTaxnyn3N4rKKXZBSE,19
 zenml/__init__.py,sha256=XhLh9kV87ErcivCctQJaTtUOjl6kugT3pVyqqLKzBP8,2058
 zenml/actions/__init__.py,sha256=mrt6wPo73iKRxK754_NqsGyJ3buW7RnVeIGXr1xEw8Y,681
 zenml/actions/base_action.py,sha256=UcaHev6BTuLDwuswnyaPjdA8AgUqB5xPZ-lRtuvf2FU,25553
@@ -130,7 +130,7 @@ zenml/image_builders/base_image_builder.py,sha256=-Y5N3zFZsMJvVuzm1M3tU-r38fT9KC
 zenml/image_builders/build_context.py,sha256=TTY5T8aG4epeKOOpLItr8PDjmDijfcGaY3zFzmGV1II,6157
 zenml/image_builders/local_image_builder.py,sha256=nxwzPGgB2ePE51HcvT6hM6w37j9gn2ITEJuPMrx_SKw,5709
 zenml/integrations/README.md,sha256=hFIZwjsAItHjvDWVBqGSF-ZAeMsFR2GKX1Axl2g1Bz0,6190
-zenml/integrations/__init__.py,sha256=fcBgKyAPokSLnMECABQi3S4P2sLaz4EpKFjb8Gv7DRQ,4781
+zenml/integrations/__init__.py,sha256=ciJbNsqNPTHpWeMbFfLNa8fJ0jg8AxJUjOPnqrYPl9M,4843
 zenml/integrations/airflow/__init__.py,sha256=7ffV98vlrdH1RfWHkv8TXNd3hjtXSx4z2U7MZin-87I,1483
 zenml/integrations/airflow/flavors/__init__.py,sha256=Y48mn5OxERPPaXDBd5CFAIn6yhLPsgN5ZMk26hLXiNM,800
 zenml/integrations/airflow/flavors/airflow_orchestrator_flavor.py,sha256=VfZQD2H-WwIgVD1Fi7uewdnkvRoSykY0YCfROFDadXg,6189
@@ -198,7 +198,7 @@ zenml/integrations/comet/experiment_trackers/__init__.py,sha256=reGygyAEgMrlc-9Q
 zenml/integrations/comet/experiment_trackers/comet_experiment_tracker.py,sha256=JnB_TqiCD8t9t6cVxWoomxvBuhA4jIJHYFZ-gKdGXf8,5767
 zenml/integrations/comet/flavors/__init__.py,sha256=x-XK-YwHMxz3zZPoIXo3X5vq_5VYUJAnsIoEX_ZooOU,883
 zenml/integrations/comet/flavors/comet_experiment_tracker_flavor.py,sha256=Rkk1UtEVY2MQBKbUHKxYQpDTWndkOYF8KuKuMGZAb24,3706
-zenml/integrations/constants.py,sha256=zF1MJ6TzxS5gnVORrB1bXGyhH3VIEEeL-M5Fs8fQCBM,2041
+zenml/integrations/constants.py,sha256=Qi3uwS9jIxGY1v4nES-5npWuQTS2uOj6IEUKyOzLehM,2055
 zenml/integrations/databricks/__init__.py,sha256=dkyTxfwIete7mRBlDzIfsTmllYgrd4DB2P4brXHPMUs,2414
 zenml/integrations/databricks/flavors/__init__.py,sha256=S-BZ3R9iKGOw-aUltR8I0ULEe2-LKGTIZhQv9TlnXfk,1122
 zenml/integrations/databricks/flavors/databricks_model_deployer_flavor.py,sha256=eDyYVqO2x1A9qgGICKJx5Z3qiUuTMfW9R3NZUO8OiRk,3591
@@ -358,10 +358,10 @@ zenml/integrations/label_studio/label_config_generators/label_config_generators.
 zenml/integrations/label_studio/label_studio_utils.py,sha256=NelKDXCoEIF37-xh7rffHeuHEwWvkfshR5w5f6HuBII,3316
 zenml/integrations/label_studio/steps/__init__.py,sha256=SQ-6oyRtqHDsU-QjOdvtd-cD8plsW40Dwl5SZnWtbbA,895
 zenml/integrations/label_studio/steps/label_studio_standard_steps.py,sha256=k7UTFzDZBTdV0NbVtRKMqQo-gURvdSMtjtHoFfiIWgs,8695
-zenml/integrations/langchain/__init__.py,sha256=GIxyxtrxvaeWYs-uVO3qg1pGVsMWPsOmMFTFa-Hw1V0,1382
+zenml/integrations/langchain/__init__.py,sha256=Qzsw8brka_N2QFQj3iUKEClHVVH-UMsHCCLAq1tpk24,1411
 zenml/integrations/langchain/materializers/__init__.py,sha256=ouU6MDX_gZc0FVgNK8xO6F7B2XOEikrevQEZpdYyaOM,1037
 zenml/integrations/langchain/materializers/document_materializer.py,sha256=86-V8ADkT0laE8ZvQyj8v9EbxHeeQ9PbiQq06OhMmdo,2287
-zenml/integrations/langchain/materializers/openai_embedding_materializer.py,sha256=WLErmqNJU5gmc-EqMEsIOAdrpyZPB62th9r4Xx0g-Ao,1294
+zenml/integrations/langchain/materializers/openai_embedding_materializer.py,sha256=LXqsU4X-t6NKed7Y8BSVZY2IU7wu0fkO8NlVEM2kibc,2077
 zenml/integrations/langchain/materializers/vector_store_materializer.py,sha256=HQZxrJLtm_dCNZH5FeF6_4YfQRKu-mais6_uzSIEaLs,1273
 zenml/integrations/lightgbm/__init__.py,sha256=6WwTSY7teUMj4Ru0e7xLCl6MR3CtelW7RHgtLdWadag,1162
 zenml/integrations/lightgbm/materializers/__init__.py,sha256=9tUTAisuFmR2-B4E-3l23Ab_sy8Jw6AAKUkG3pnd6ZI,929
@@ -409,9 +409,9 @@ zenml/integrations/neural_prophet/materializers/neural_prophet_materializer.py,s
 zenml/integrations/numpy/__init__.py,sha256=McWmP5C0LNd03GtTrq5KOiKh9JUBhwhX8rmJlUnyR34,1105
 zenml/integrations/numpy/materializers/__init__.py,sha256=txwv8We-dLehTWqY-eDYx40njg4Ld8eQrs1O0MZiiIk,766
 zenml/integrations/numpy/materializers/numpy_materializer.py,sha256=rNIcoZkU6JZcqEc6yt3x3yHvmmWnLAfKy74hLxKXbM8,8477
-zenml/integrations/openai/__init__.py,sha256=Zi9LaUe6sElglthZQ4iuqjO34v5DWZoltj387uPZTCs,966
+zenml/integrations/openai/__init__.py,sha256=cKPFCz_cTnJLQ-crdgpWQlHEZnVVeK5_SyRue2bvCXY,958
 zenml/integrations/openai/hooks/__init__.py,sha256=8VfiVOyIrjya9G_VK5GPEqq9G5i9w5u4ngf-Oo_oHT4,811
-zenml/integrations/openai/hooks/open_ai_failure_hook.py,sha256=t1nRghWpsGPd89XDDOjK5gRTJSdk6mkVmGpX0vUhaos,4046
+zenml/integrations/openai/hooks/open_ai_failure_hook.py,sha256=tQe-dUO7_w24ABfN96TZ7Zc2inJMI5u9sdE8gBxrDyM,4702
 zenml/integrations/pandas/__init__.py,sha256=Rt4BJUlZk-Td2m9l7cLkbAv4vL2Z2YULTqWoICGoU6s,1114
 zenml/integrations/pandas/materializers/__init__.py,sha256=LcN6iO4vZKTMFp1eRF5njIu-UwqMsonms3T4ObFTtbk,770
 zenml/integrations/pandas/materializers/pandas_materializer.py,sha256=kr4kVLluvytiqvYMS8JtwmcGQW6cMJxnsSbX1XMKo6c,7077
@@ -537,6 +537,13 @@ zenml/integrations/tensorflow/materializers/__init__.py,sha256=iQVlAHAqdD6ItJlJy
 zenml/integrations/tensorflow/materializers/keras_materializer.py,sha256=BRXo3w1nB7eujOfFVez79kjhtJjm42Lc498tW4Hx0AY,3281
 zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py,sha256=ozgJzZ8OBP0dv87hfUa7-8DNPYlQdaf8jKRVNvqLR6A,2810
 zenml/integrations/utils.py,sha256=Pw3f7x_nuhpfq-TmYaTqF-bcIYCBIUChcwQtyVaTyY8,2698
+zenml/integrations/vllm/__init__.py,sha256=3ZvUoWUGvYRGg-F_My9Vx4q2_ywDeWcKciyv9E1DFAU,1623
+zenml/integrations/vllm/flavors/__init__.py,sha256=oyOnp9JXWXCYPBvcQkkNrkFAboypx-li-Pyd0YAxb9A,853
+zenml/integrations/vllm/flavors/vllm_model_deployer_flavor.py,sha256=_3P0-qyjdsVzoUftaotT57mtc2EWJe7DljltogdHpoY,2646
+zenml/integrations/vllm/model_deployers/__init__.py,sha256=Z38oWIfkArNsxCm3rQkTdYK4dbtx2BpTUw1gw_kl6Do,803
+zenml/integrations/vllm/model_deployers/vllm_model_deployer.py,sha256=OYPNSkB-I5r4eQ_7kr4F7GDwNj6efcsio8WRteQ5cYI,9665
+zenml/integrations/vllm/services/__init__.py,sha256=Id28GEfHECI0RnGAGGNioD9eZ6aJxdNebe112VgC59g,788
+zenml/integrations/vllm/services/vllm_deployment.py,sha256=jPVKstcJ2AFmEG7R0Q6CcNUz0EEybBZok56F0QSgdTI,6619
 zenml/integrations/wandb/__init__.py,sha256=LBlnX4chpaB3atIsxkF0RSz2AJs9gHQWRptkgkqF6lw,1711
 zenml/integrations/wandb/experiment_trackers/__init__.py,sha256=8nFyyvh-PTF5d9ZfjS7xFSWTWSpreRB1azePv-Ex2sc,771
 zenml/integrations/wandb/experiment_trackers/wandb_experiment_tracker.py,sha256=xNkF-3-WwpC8OV38T5evV35t6rH5o3O6uBlX4cimsKs,5092
@@ -1245,8 +1252,8 @@ zenml/zen_stores/secrets_stores/sql_secrets_store.py,sha256=Bq1djrUP9saoD7vECjS7
 zenml/zen_stores/sql_zen_store.py,sha256=n5LWV-VBX2cfLDNQDk1F_xBCIklEs8Tug54Iafr7_YU,402789
 zenml/zen_stores/template_utils.py,sha256=EKYBgmDLTS_PSMWaIO5yvHPLiQvMqHcsAe6NUCrv-i4,9068
 zenml/zen_stores/zen_store_interface.py,sha256=kzR_i8vHjULld3MquSaMorcab8lJk1e9RZquw1VXjHY,93510
-zenml_nightly-0.68.1.dev20241103.dist-info/LICENSE,sha256=wbnfEnXnafPbqwANHkV6LUsPKOtdpsd-SNw37rogLtc,11359
-zenml_nightly-0.68.1.dev20241103.dist-info/METADATA,sha256=Vtlmp9t5HQE5q4660T13546IG7_wyI4IEeOzzujEmYs,21208
-zenml_nightly-0.68.1.dev20241103.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-zenml_nightly-0.68.1.dev20241103.dist-info/entry_points.txt,sha256=QK3ETQE0YswAM2mWypNMOv8TLtr7EjnqAFq1br_jEFE,43
-zenml_nightly-0.68.1.dev20241103.dist-info/RECORD,,
+zenml_nightly-0.68.1.dev20241105.dist-info/LICENSE,sha256=wbnfEnXnafPbqwANHkV6LUsPKOtdpsd-SNw37rogLtc,11359
+zenml_nightly-0.68.1.dev20241105.dist-info/METADATA,sha256=DqsZKui96Zfhm9sNSEH6-vT66FATfUjIuOHEYsTn0T0,21208
+zenml_nightly-0.68.1.dev20241105.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+zenml_nightly-0.68.1.dev20241105.dist-info/entry_points.txt,sha256=QK3ETQE0YswAM2mWypNMOv8TLtr7EjnqAFq1br_jEFE,43
+zenml_nightly-0.68.1.dev20241105.dist-info/RECORD,,

{zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241105.dist-info}/LICENSE RENAMED Viewed

File without changes

{zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241105.dist-info}/WHEEL RENAMED Viewed

File without changes

{zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241105.dist-info}/entry_points.txt RENAMED Viewed

File without changes

zenml-nightly 0.68.1.dev20241103__py3-none-any.whl → 0.68.1.dev20241105__py3-none-any.whl

zenml-nightly 0.68.1.dev20241103py3-none-any.whl → 0.68.1.dev20241105py3-none-any.whl