zenml-nightly 0.68.1.dev20241104__py3-none-any.whl → 0.68.1.dev20241105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zenml/VERSION CHANGED
@@ -1 +1 @@
1
- 0.68.1.dev20241104
1
+ 0.68.1.dev20241105
@@ -45,6 +45,7 @@ from zenml.integrations.kubernetes import KubernetesIntegration # noqa
45
45
  from zenml.integrations.label_studio import LabelStudioIntegration # noqa
46
46
  from zenml.integrations.langchain import LangchainIntegration # noqa
47
47
  from zenml.integrations.lightgbm import LightGBMIntegration # noqa
48
+
48
49
  # from zenml.integrations.llama_index import LlamaIndexIntegration # noqa
49
50
  from zenml.integrations.mlflow import MlflowIntegration # noqa
50
51
  from zenml.integrations.neptune import NeptuneIntegration # noqa
@@ -52,7 +53,7 @@ from zenml.integrations.neural_prophet import NeuralProphetIntegration # noqa
52
53
  from zenml.integrations.numpy import NumpyIntegration # noqa
53
54
  from zenml.integrations.openai import OpenAIIntegration # noqa
54
55
  from zenml.integrations.pandas import PandasIntegration # noqa
55
- from zenml.integrations.pigeon import PigeonIntegration # noqa
56
+ from zenml.integrations.pigeon import PigeonIntegration # noqa
56
57
  from zenml.integrations.pillow import PillowIntegration # noqa
57
58
  from zenml.integrations.polars import PolarsIntegration # noqa
58
59
  from zenml.integrations.prodigy import ProdigyIntegration # noqa
@@ -78,3 +79,4 @@ from zenml.integrations.tensorflow import TensorflowIntegration # noqa
78
79
  from zenml.integrations.wandb import WandbIntegration # noqa
79
80
  from zenml.integrations.whylogs import WhylogsIntegration # noqa
80
81
  from zenml.integrations.xgboost import XgboostIntegration # noqa
82
+ from zenml.integrations.vllm import VLLMIntegration # noqa
@@ -76,4 +76,5 @@ WANDB = "wandb"
76
76
  VERTEX = "vertex"
77
77
  XGBOOST = "xgboost"
78
78
  VAULT = "vault"
79
+ VLLM = "vllm"
79
80
  LIGHTNING = "lightning"
@@ -0,0 +1,50 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Initialization for the ZenML vLLM integration."""
15
+ from typing import List, Type
16
+ from zenml.integrations.integration import Integration
17
+ from zenml.stack import Flavor
18
+ from zenml.logger import get_logger
19
+ from zenml.integrations.constants import VLLM
20
+
21
+ VLLM_MODEL_DEPLOYER = "vllm"
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class VLLMIntegration(Integration):
27
+ """Definition of vLLM integration for ZenML."""
28
+
29
+ NAME = VLLM
30
+
31
+ REQUIREMENTS = ["vllm>=0.6.0,<0.7.0", "openai>=1.0.0"]
32
+
33
+ @classmethod
34
+ def activate(cls) -> None:
35
+ """Activates the integration."""
36
+ from zenml.integrations.vllm import services
37
+
38
+ @classmethod
39
+ def flavors(cls) -> List[Type[Flavor]]:
40
+ """Declare the stack component flavors for the vLLM integration.
41
+
42
+ Returns:
43
+ List of stack component flavors for this integration.
44
+ """
45
+ from zenml.integrations.vllm.flavors import VLLMModelDeployerFlavor
46
+
47
+ return [VLLMModelDeployerFlavor]
48
+
49
+
50
+ VLLMIntegration.check_installation()
@@ -0,0 +1,21 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """vLLM integration flavors."""
15
+
16
+ from zenml.integrations.vllm.flavors.vllm_model_deployer_flavor import ( # noqa
17
+ VLLMModelDeployerConfig,
18
+ VLLMModelDeployerFlavor,
19
+ )
20
+
21
+ __all__ = ["VLLMModelDeployerConfig", "VLLMModelDeployerFlavor"]
@@ -0,0 +1,91 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """vLLM model deployer flavor."""
15
+
16
+ from typing import TYPE_CHECKING, Optional, Type
17
+
18
+ from zenml.integrations.vllm import VLLM_MODEL_DEPLOYER
19
+ from zenml.model_deployers.base_model_deployer import (
20
+ BaseModelDeployerConfig,
21
+ BaseModelDeployerFlavor,
22
+ )
23
+
24
+ if TYPE_CHECKING:
25
+ from zenml.integrations.vllm.model_deployers import VLLMModelDeployer
26
+
27
+
28
+ class VLLMModelDeployerConfig(BaseModelDeployerConfig):
29
+ """Configuration for vLLM Inference model deployer."""
30
+
31
+ service_path: str = ""
32
+
33
+
34
+ class VLLMModelDeployerFlavor(BaseModelDeployerFlavor):
35
+ """vLLM model deployer flavor."""
36
+
37
+ @property
38
+ def name(self) -> str:
39
+ """Name of the flavor.
40
+
41
+ Returns:
42
+ The name of the flavor.
43
+ """
44
+ return VLLM_MODEL_DEPLOYER
45
+
46
+ @property
47
+ def docs_url(self) -> Optional[str]:
48
+ """A url to point at docs explaining this flavor.
49
+
50
+ Returns:
51
+ A flavor docs url.
52
+ """
53
+ return self.generate_default_docs_url()
54
+
55
+ @property
56
+ def sdk_docs_url(self) -> Optional[str]:
57
+ """A url to point at SDK docs explaining this flavor.
58
+
59
+ Returns:
60
+ A flavor SDK docs url.
61
+ """
62
+ return self.generate_default_sdk_docs_url()
63
+
64
+ @property
65
+ def logo_url(self) -> str:
66
+ """A url to represent the flavor in the dashboard.
67
+
68
+ Returns:
69
+ The flavor logo.
70
+ """
71
+ return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/model_deployer/vllm.png"
72
+
73
+ @property
74
+ def config_class(self) -> Type[VLLMModelDeployerConfig]:
75
+ """Returns `VLLMModelDeployerConfig` config class.
76
+
77
+ Returns:
78
+ The config class.
79
+ """
80
+ return VLLMModelDeployerConfig
81
+
82
+ @property
83
+ def implementation_class(self) -> Type["VLLMModelDeployer"]:
84
+ """Implementation class for this flavor.
85
+
86
+ Returns:
87
+ The implementation class.
88
+ """
89
+ from zenml.integrations.vllm.model_deployers import VLLMModelDeployer
90
+
91
+ return VLLMModelDeployer
@@ -0,0 +1,19 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Initialization of the vLLM model deployers."""
15
+ from zenml.integrations.vllm.model_deployers.vllm_model_deployer import ( # noqa
16
+ VLLMModelDeployer,
17
+ )
18
+
19
+ __all__ = ["VLLMModelDeployer"]
@@ -0,0 +1,263 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Implementation of the vLLM Model Deployer."""
15
+
16
+ import os
17
+ import shutil
18
+ from typing import ClassVar, Dict, Optional, Type, cast
19
+ from uuid import UUID
20
+
21
+ from zenml.config.global_config import GlobalConfiguration
22
+ from zenml.constants import DEFAULT_SERVICE_START_STOP_TIMEOUT
23
+ from zenml.integrations.vllm.flavors.vllm_model_deployer_flavor import (
24
+ VLLMModelDeployerConfig,
25
+ VLLMModelDeployerFlavor,
26
+ )
27
+ from zenml.integrations.vllm.services.vllm_deployment import (
28
+ VLLMDeploymentService,
29
+ VLLMServiceConfig,
30
+ )
31
+ from zenml.logger import get_logger
32
+ from zenml.model_deployers import BaseModelDeployer, BaseModelDeployerFlavor
33
+ from zenml.services.service import BaseService, ServiceConfig
34
+ from zenml.utils.io_utils import create_dir_recursive_if_not_exists
35
+
36
+ logger = get_logger(__name__)
37
+
38
+
39
+ class VLLMModelDeployer(BaseModelDeployer):
40
+ """vLLM Inference Server."""
41
+
42
+ NAME: ClassVar[str] = "VLLM"
43
+ FLAVOR: ClassVar[Type[BaseModelDeployerFlavor]] = VLLMModelDeployerFlavor
44
+
45
+ _service_path: Optional[str] = None
46
+
47
+ @property
48
+ def config(self) -> VLLMModelDeployerConfig:
49
+ """Returns the `VLLMModelDeployerConfig` config.
50
+
51
+ Returns:
52
+ The configuration.
53
+ """
54
+ return cast(VLLMModelDeployerConfig, self._config)
55
+
56
+ @staticmethod
57
+ def get_service_path(id_: UUID) -> str:
58
+ """Get the path where local vLLM service information is stored.
59
+
60
+ This includes the deployment service configuration, PID and log files
61
+ are stored.
62
+
63
+ Args:
64
+ id_: The ID of the vLLM model deployer.
65
+
66
+ Returns:
67
+ The service path.
68
+ """
69
+ service_path = os.path.join(
70
+ GlobalConfiguration().local_stores_path,
71
+ str(id_),
72
+ )
73
+ create_dir_recursive_if_not_exists(service_path)
74
+ return service_path
75
+
76
+ @property
77
+ def local_path(self) -> str:
78
+ """Returns the path to the root directory.
79
+
80
+ This is where all configurations for vLLM deployment daemon processes
81
+ are stored.
82
+
83
+ If the service path is not set in the config by the user, the path is
84
+ set to a local default path according to the component ID.
85
+
86
+ Returns:
87
+ The path to the local service root directory.
88
+ """
89
+ if self._service_path is not None:
90
+ return self._service_path
91
+
92
+ if self.config.service_path:
93
+ self._service_path = self.config.service_path
94
+ else:
95
+ self._service_path = self.get_service_path(self.id)
96
+
97
+ create_dir_recursive_if_not_exists(self._service_path)
98
+ return self._service_path
99
+
100
+ @staticmethod
101
+ def get_model_server_info( # type: ignore[override]
102
+ service_instance: "VLLMDeploymentService",
103
+ ) -> Dict[str, Optional[str]]:
104
+ """Return implementation specific information on the model server.
105
+
106
+ Args:
107
+ service_instance: vLLM deployment service object
108
+
109
+ Returns:
110
+ A dictionary containing the model server information.
111
+ """
112
+ return {
113
+ "HEALTH_CHECK_URL": service_instance.get_healthcheck_url(),
114
+ "PREDICTION_URL": service_instance.get_prediction_url(),
115
+ "SERVICE_PATH": service_instance.status.runtime_path,
116
+ "DAEMON_PID": str(service_instance.status.pid),
117
+ }
118
+
119
+ def perform_deploy_model(
120
+ self,
121
+ id: UUID,
122
+ config: ServiceConfig,
123
+ timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
124
+ ) -> BaseService:
125
+ """Create a new vLLM deployment service or update an existing one.
126
+
127
+ This should serve the supplied model and deployment configuration.
128
+
129
+ This method has two modes of operation, depending on the `replace`
130
+ argument value:
131
+
132
+ * if `replace` is False, calling this method will create a new vLLM
133
+ deployment server to reflect the model and other configuration
134
+ parameters specified in the supplied vLLM service `config`.
135
+
136
+ * if `replace` is True, this method will first attempt to find an
137
+ existing vLLM deployment service that is *equivalent* to the
138
+ supplied configuration parameters. Two or more vLLM deployment
139
+ services are considered equivalent if they have the same
140
+ `pipeline_name`, `pipeline_step_name` and `model_name` configuration
141
+ parameters. To put it differently, two vLLM deployment services
142
+ are equivalent if they serve versions of the same model deployed by
143
+ the same pipeline step. If an equivalent vLLM deployment is found,
144
+ it will be updated in place to reflect the new configuration
145
+ parameters.
146
+
147
+ Callers should set `replace` to True if they want a continuous model
148
+ deployment workflow that doesn't spin up a new vLLM deployment
149
+ server for each new model version. If multiple equivalent vLLM
150
+ deployment servers are found, one is selected at random to be updated
151
+ and the others are deleted.
152
+
153
+ Args:
154
+ id: the UUID of the vLLM model deployer.
155
+ config: the configuration of the model to be deployed with vLLM.
156
+ timeout: the timeout in seconds to wait for the vLLM server
157
+ to be provisioned and successfully started or updated. If set
158
+ to 0, the method will return immediately after the vLLM
159
+ server is provisioned, without waiting for it to fully start.
160
+
161
+ Returns:
162
+ The ZenML vLLM deployment service object that can be used to
163
+ interact with the vLLM model http server.
164
+ """
165
+ config = cast(VLLMServiceConfig, config)
166
+ service = self._create_new_service(
167
+ id=id, timeout=timeout, config=config
168
+ )
169
+ logger.info(f"Created a new vLLM deployment service: {service}")
170
+ return service
171
+
172
+ def _clean_up_existing_service(
173
+ self,
174
+ timeout: int,
175
+ force: bool,
176
+ existing_service: VLLMDeploymentService,
177
+ ) -> None:
178
+ # stop the older service
179
+ existing_service.stop(timeout=timeout, force=force)
180
+
181
+ # delete the old configuration file
182
+ if existing_service.status.runtime_path:
183
+ shutil.rmtree(existing_service.status.runtime_path)
184
+
185
+ # the step will receive a config from the user that mentions the number
186
+ # of workers etc.the step implementation will create a new config using
187
+ # all values from the user and add values like pipeline name, model_uri
188
+ def _create_new_service(
189
+ self, id: UUID, timeout: int, config: VLLMServiceConfig
190
+ ) -> VLLMDeploymentService:
191
+ """Creates a new VLLMDeploymentService.
192
+
193
+ Args:
194
+ id: the ID of the vLLM deployment service to be created or updated.
195
+ timeout: the timeout in seconds to wait for the vLLM server
196
+ to be provisioned and successfully started or updated.
197
+ config: the configuration of the model to be deployed with vLLM.
198
+
199
+ Returns:
200
+ The VLLMDeploymentService object that can be used to interact
201
+ with the vLLM model server.
202
+ """
203
+ # set the root runtime path with the stack component's UUID
204
+ config.root_runtime_path = self.local_path
205
+ # create a new service for the new model
206
+ service = VLLMDeploymentService(uuid=id, config=config)
207
+ service.start(timeout=timeout)
208
+
209
+ return service
210
+
211
+ def perform_stop_model(
212
+ self,
213
+ service: BaseService,
214
+ timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
215
+ force: bool = False,
216
+ ) -> BaseService:
217
+ """Method to stop a model server.
218
+
219
+ Args:
220
+ service: The service to stop.
221
+ timeout: Timeout in seconds to wait for the service to stop.
222
+ force: If True, force the service to stop.
223
+
224
+ Returns:
225
+ The stopped service.
226
+ """
227
+ service.stop(timeout=timeout, force=force)
228
+ return service
229
+
230
+ def perform_start_model(
231
+ self,
232
+ service: BaseService,
233
+ timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
234
+ ) -> BaseService:
235
+ """Method to start a model server.
236
+
237
+ Args:
238
+ service: The service to start.
239
+ timeout: Timeout in seconds to wait for the service to start.
240
+
241
+ Returns:
242
+ The started service.
243
+ """
244
+ service.start(timeout=timeout)
245
+ return service
246
+
247
+ def perform_delete_model(
248
+ self,
249
+ service: BaseService,
250
+ timeout: int = DEFAULT_SERVICE_START_STOP_TIMEOUT,
251
+ force: bool = False,
252
+ ) -> None:
253
+ """Method to delete all configuration of a model server.
254
+
255
+ Args:
256
+ service: The service to delete.
257
+ timeout: Timeout in seconds to wait for the service to stop.
258
+ force: If True, force the service to stop.
259
+ """
260
+ service = cast(VLLMDeploymentService, service)
261
+ self._clean_up_existing_service(
262
+ existing_service=service, timeout=timeout, force=force
263
+ )
@@ -0,0 +1,19 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Initialization of the vLLM Inference Server."""
15
+
16
+ from zenml.integrations.vllm.services.vllm_deployment import ( # noqa
17
+ VLLMDeploymentService,
18
+ VLLMServiceConfig,
19
+ )
@@ -0,0 +1,197 @@
1
+ # Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12
+ # or implied. See the License for the specific language governing
13
+ # permissions and limitations under the License.
14
+ """Implementation of the vLLM Inference Server Service."""
15
+
16
+ import os
17
+ from typing import Any, List, Optional, Union
18
+
19
+ from zenml.constants import DEFAULT_LOCAL_SERVICE_IP_ADDRESS
20
+ from zenml.logger import get_logger
21
+ from zenml.services import (
22
+ HTTPEndpointHealthMonitor,
23
+ HTTPEndpointHealthMonitorConfig,
24
+ LocalDaemonService,
25
+ LocalDaemonServiceConfig,
26
+ LocalDaemonServiceEndpoint,
27
+ LocalDaemonServiceEndpointConfig,
28
+ ServiceEndpointProtocol,
29
+ ServiceType,
30
+ )
31
+ from zenml.services.service import BaseDeploymentService
32
+
33
+ logger = get_logger(__name__)
34
+
35
+
36
+ VLLM_PREDICTION_URL_PATH = "v1"
37
+ VLLM_HEALTHCHECK_URL_PATH = "health"
38
+
39
+
40
+ class VLLMDeploymentEndpointConfig(LocalDaemonServiceEndpointConfig):
41
+ """vLLM deployment service configuration.
42
+
43
+ Attributes:
44
+ prediction_url_path: URI subpath for prediction requests
45
+ """
46
+
47
+ prediction_url_path: str
48
+
49
+
50
+ class VLLMDeploymentEndpoint(LocalDaemonServiceEndpoint):
51
+ """A service endpoint exposed by the vLLM deployment daemon.
52
+
53
+ Attributes:
54
+ config: service endpoint configuration
55
+ """
56
+
57
+ config: VLLMDeploymentEndpointConfig
58
+ monitor: HTTPEndpointHealthMonitor
59
+
60
+ @property
61
+ def prediction_url(self) -> Optional[str]:
62
+ """Gets the prediction URL for the endpoint.
63
+
64
+ Returns:
65
+ the prediction URL for the endpoint
66
+ """
67
+ uri = self.status.uri
68
+ if not uri:
69
+ return None
70
+ return os.path.join(uri, self.config.prediction_url_path)
71
+
72
+
73
+ class VLLMServiceConfig(LocalDaemonServiceConfig):
74
+ """vLLM service configurations."""
75
+
76
+ model: str
77
+ port: int
78
+ host: Optional[str] = None
79
+ blocking: bool = True
80
+ # If unspecified, model name or path will be used.
81
+ tokenizer: Optional[str] = None
82
+ served_model_name: Optional[Union[str, List[str]]] = None
83
+ # Trust remote code from huggingface.
84
+ trust_remote_code: Optional[bool] = False
85
+ # ['auto', 'slow', 'mistral']
86
+ tokenizer_mode: Optional[str] = "auto"
87
+ # ['auto', 'half', 'float16', 'bfloat16', 'float', 'float32']
88
+ dtype: Optional[str] = "auto"
89
+ # The specific model version to use. It can be a branch name, a tag name, or a commit id.
90
+ # If unspecified, will use the default version.
91
+ revision: Optional[str] = None
92
+
93
+
94
+ class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
95
+ """vLLM Inference Server Deployment Service."""
96
+
97
+ SERVICE_TYPE = ServiceType(
98
+ name="vllm-deployment",
99
+ type="model-serving",
100
+ flavor="vllm",
101
+ description="vLLM Inference prediction service",
102
+ )
103
+ config: VLLMServiceConfig
104
+ endpoint: VLLMDeploymentEndpoint
105
+
106
+ def __init__(self, config: VLLMServiceConfig, **attrs: Any):
107
+ """Initialize the vLLM deployment service.
108
+
109
+ Args:
110
+ config: service configuration
111
+ attrs: additional attributes to set on the service
112
+ """
113
+ if isinstance(config, VLLMServiceConfig) and "endpoint" not in attrs:
114
+ endpoint = VLLMDeploymentEndpoint(
115
+ config=VLLMDeploymentEndpointConfig(
116
+ protocol=ServiceEndpointProtocol.HTTP,
117
+ port=config.port,
118
+ ip_address=config.host or DEFAULT_LOCAL_SERVICE_IP_ADDRESS,
119
+ prediction_url_path=VLLM_PREDICTION_URL_PATH,
120
+ ),
121
+ monitor=HTTPEndpointHealthMonitor(
122
+ config=HTTPEndpointHealthMonitorConfig(
123
+ healthcheck_uri_path=VLLM_HEALTHCHECK_URL_PATH,
124
+ )
125
+ ),
126
+ )
127
+ attrs["endpoint"] = endpoint
128
+ super().__init__(config=config, **attrs)
129
+
130
+ def run(self) -> None:
131
+ """Start the service."""
132
+ logger.info(
133
+ "Starting vLLM inference server service as blocking "
134
+ "process... press CTRL+C once to stop it."
135
+ )
136
+
137
+ self.endpoint.prepare_for_start()
138
+
139
+ import uvloop
140
+ from vllm.entrypoints.openai.api_server import run_server
141
+ from vllm.entrypoints.openai.cli_args import make_arg_parser
142
+ from vllm.utils import FlexibleArgumentParser
143
+
144
+ try:
145
+ parser = make_arg_parser(FlexibleArgumentParser())
146
+ args = parser.parse_args()
147
+ # Override port with the available port
148
+ self.config.port = self.endpoint.status.port
149
+ # Update the arguments in place
150
+ args.__dict__.update(self.config.model_dump())
151
+ uvloop.run(run_server(args=args))
152
+ except KeyboardInterrupt:
153
+ logger.info("Stopping vLLM prediction service...")
154
+
155
+ @property
156
+ def prediction_url(self) -> Optional[str]:
157
+ """Gets the prediction URL for the endpoint.
158
+
159
+ Returns:
160
+ the prediction URL for the endpoint
161
+ """
162
+ if not self.is_running:
163
+ return None
164
+ return self.endpoint.prediction_url_path
165
+
166
+ def predict(self, data: "Any") -> "Any":
167
+ """Make a prediction using the service.
168
+
169
+ Args:
170
+ data: data to make a prediction on
171
+
172
+ Returns:
173
+ The prediction result.
174
+
175
+ Raises:
176
+ Exception: if the service is not running
177
+ ValueError: if the prediction endpoint is unknown.
178
+ """
179
+ if not self.is_running:
180
+ raise Exception(
181
+ "vLLM Inference service is not running. "
182
+ "Please start the service before making predictions."
183
+ )
184
+ if self.endpoint.prediction_url is not None:
185
+ from openai import OpenAI
186
+
187
+ client = OpenAI(
188
+ api_key="EMPTY",
189
+ base_url=self.endpoint.prediction_url,
190
+ )
191
+ models = client.models.list()
192
+ model = models.data[0].id
193
+ result = client.completions.create(model=model, prompt=data)
194
+ # TODO: We can add support for client.chat.completions.create
195
+ else:
196
+ raise ValueError("No endpoint known for prediction.")
197
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: zenml-nightly
3
- Version: 0.68.1.dev20241104
3
+ Version: 0.68.1.dev20241105
4
4
  Summary: ZenML: Write production-ready ML code.
5
5
  Home-page: https://zenml.io
6
6
  License: Apache-2.0
@@ -6,7 +6,7 @@ RELEASE_NOTES.md,sha256=oShLQurhMKncKnc_y7tiasEfgy1aCOOjxdpax-MlGI8,381641
6
6
  ROADMAP.md,sha256=hiLSmr16BH8Dfx7SaQM4JcXCGCVl6mFZPFAwJeDTrJU,407
7
7
  SECURITY.md,sha256=9DepA8y03yvCZLHEfcXLTDH4lUyKHquAdukBsccNN7c,682
8
8
  zenml/README.md,sha256=827dekbOWAs1BpW7VF1a4d7EbwPbjwccX-2zdXBENZo,1777
9
- zenml/VERSION,sha256=1S2siAIhM1zXNEGpC70GGCJ14D8KOlXLMXluWW4LEQI,19
9
+ zenml/VERSION,sha256=96VhSJ-qEwmgM7eMLpR1roireLTaxnyn3N4rKKXZBSE,19
10
10
  zenml/__init__.py,sha256=XhLh9kV87ErcivCctQJaTtUOjl6kugT3pVyqqLKzBP8,2058
11
11
  zenml/actions/__init__.py,sha256=mrt6wPo73iKRxK754_NqsGyJ3buW7RnVeIGXr1xEw8Y,681
12
12
  zenml/actions/base_action.py,sha256=UcaHev6BTuLDwuswnyaPjdA8AgUqB5xPZ-lRtuvf2FU,25553
@@ -130,7 +130,7 @@ zenml/image_builders/base_image_builder.py,sha256=-Y5N3zFZsMJvVuzm1M3tU-r38fT9KC
130
130
  zenml/image_builders/build_context.py,sha256=TTY5T8aG4epeKOOpLItr8PDjmDijfcGaY3zFzmGV1II,6157
131
131
  zenml/image_builders/local_image_builder.py,sha256=nxwzPGgB2ePE51HcvT6hM6w37j9gn2ITEJuPMrx_SKw,5709
132
132
  zenml/integrations/README.md,sha256=hFIZwjsAItHjvDWVBqGSF-ZAeMsFR2GKX1Axl2g1Bz0,6190
133
- zenml/integrations/__init__.py,sha256=fcBgKyAPokSLnMECABQi3S4P2sLaz4EpKFjb8Gv7DRQ,4781
133
+ zenml/integrations/__init__.py,sha256=ciJbNsqNPTHpWeMbFfLNa8fJ0jg8AxJUjOPnqrYPl9M,4843
134
134
  zenml/integrations/airflow/__init__.py,sha256=7ffV98vlrdH1RfWHkv8TXNd3hjtXSx4z2U7MZin-87I,1483
135
135
  zenml/integrations/airflow/flavors/__init__.py,sha256=Y48mn5OxERPPaXDBd5CFAIn6yhLPsgN5ZMk26hLXiNM,800
136
136
  zenml/integrations/airflow/flavors/airflow_orchestrator_flavor.py,sha256=VfZQD2H-WwIgVD1Fi7uewdnkvRoSykY0YCfROFDadXg,6189
@@ -198,7 +198,7 @@ zenml/integrations/comet/experiment_trackers/__init__.py,sha256=reGygyAEgMrlc-9Q
198
198
  zenml/integrations/comet/experiment_trackers/comet_experiment_tracker.py,sha256=JnB_TqiCD8t9t6cVxWoomxvBuhA4jIJHYFZ-gKdGXf8,5767
199
199
  zenml/integrations/comet/flavors/__init__.py,sha256=x-XK-YwHMxz3zZPoIXo3X5vq_5VYUJAnsIoEX_ZooOU,883
200
200
  zenml/integrations/comet/flavors/comet_experiment_tracker_flavor.py,sha256=Rkk1UtEVY2MQBKbUHKxYQpDTWndkOYF8KuKuMGZAb24,3706
201
- zenml/integrations/constants.py,sha256=zF1MJ6TzxS5gnVORrB1bXGyhH3VIEEeL-M5Fs8fQCBM,2041
201
+ zenml/integrations/constants.py,sha256=Qi3uwS9jIxGY1v4nES-5npWuQTS2uOj6IEUKyOzLehM,2055
202
202
  zenml/integrations/databricks/__init__.py,sha256=dkyTxfwIete7mRBlDzIfsTmllYgrd4DB2P4brXHPMUs,2414
203
203
  zenml/integrations/databricks/flavors/__init__.py,sha256=S-BZ3R9iKGOw-aUltR8I0ULEe2-LKGTIZhQv9TlnXfk,1122
204
204
  zenml/integrations/databricks/flavors/databricks_model_deployer_flavor.py,sha256=eDyYVqO2x1A9qgGICKJx5Z3qiUuTMfW9R3NZUO8OiRk,3591
@@ -537,6 +537,13 @@ zenml/integrations/tensorflow/materializers/__init__.py,sha256=iQVlAHAqdD6ItJlJy
537
537
  zenml/integrations/tensorflow/materializers/keras_materializer.py,sha256=BRXo3w1nB7eujOfFVez79kjhtJjm42Lc498tW4Hx0AY,3281
538
538
  zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py,sha256=ozgJzZ8OBP0dv87hfUa7-8DNPYlQdaf8jKRVNvqLR6A,2810
539
539
  zenml/integrations/utils.py,sha256=Pw3f7x_nuhpfq-TmYaTqF-bcIYCBIUChcwQtyVaTyY8,2698
540
+ zenml/integrations/vllm/__init__.py,sha256=3ZvUoWUGvYRGg-F_My9Vx4q2_ywDeWcKciyv9E1DFAU,1623
541
+ zenml/integrations/vllm/flavors/__init__.py,sha256=oyOnp9JXWXCYPBvcQkkNrkFAboypx-li-Pyd0YAxb9A,853
542
+ zenml/integrations/vllm/flavors/vllm_model_deployer_flavor.py,sha256=_3P0-qyjdsVzoUftaotT57mtc2EWJe7DljltogdHpoY,2646
543
+ zenml/integrations/vllm/model_deployers/__init__.py,sha256=Z38oWIfkArNsxCm3rQkTdYK4dbtx2BpTUw1gw_kl6Do,803
544
+ zenml/integrations/vllm/model_deployers/vllm_model_deployer.py,sha256=OYPNSkB-I5r4eQ_7kr4F7GDwNj6efcsio8WRteQ5cYI,9665
545
+ zenml/integrations/vllm/services/__init__.py,sha256=Id28GEfHECI0RnGAGGNioD9eZ6aJxdNebe112VgC59g,788
546
+ zenml/integrations/vllm/services/vllm_deployment.py,sha256=jPVKstcJ2AFmEG7R0Q6CcNUz0EEybBZok56F0QSgdTI,6619
540
547
  zenml/integrations/wandb/__init__.py,sha256=LBlnX4chpaB3atIsxkF0RSz2AJs9gHQWRptkgkqF6lw,1711
541
548
  zenml/integrations/wandb/experiment_trackers/__init__.py,sha256=8nFyyvh-PTF5d9ZfjS7xFSWTWSpreRB1azePv-Ex2sc,771
542
549
  zenml/integrations/wandb/experiment_trackers/wandb_experiment_tracker.py,sha256=xNkF-3-WwpC8OV38T5evV35t6rH5o3O6uBlX4cimsKs,5092
@@ -1245,8 +1252,8 @@ zenml/zen_stores/secrets_stores/sql_secrets_store.py,sha256=Bq1djrUP9saoD7vECjS7
1245
1252
  zenml/zen_stores/sql_zen_store.py,sha256=n5LWV-VBX2cfLDNQDk1F_xBCIklEs8Tug54Iafr7_YU,402789
1246
1253
  zenml/zen_stores/template_utils.py,sha256=EKYBgmDLTS_PSMWaIO5yvHPLiQvMqHcsAe6NUCrv-i4,9068
1247
1254
  zenml/zen_stores/zen_store_interface.py,sha256=kzR_i8vHjULld3MquSaMorcab8lJk1e9RZquw1VXjHY,93510
1248
- zenml_nightly-0.68.1.dev20241104.dist-info/LICENSE,sha256=wbnfEnXnafPbqwANHkV6LUsPKOtdpsd-SNw37rogLtc,11359
1249
- zenml_nightly-0.68.1.dev20241104.dist-info/METADATA,sha256=YQvhWfYfiU7EdqKTJv97zWsJOhWPmdI-fu92C77Yoc8,21208
1250
- zenml_nightly-0.68.1.dev20241104.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
1251
- zenml_nightly-0.68.1.dev20241104.dist-info/entry_points.txt,sha256=QK3ETQE0YswAM2mWypNMOv8TLtr7EjnqAFq1br_jEFE,43
1252
- zenml_nightly-0.68.1.dev20241104.dist-info/RECORD,,
1255
+ zenml_nightly-0.68.1.dev20241105.dist-info/LICENSE,sha256=wbnfEnXnafPbqwANHkV6LUsPKOtdpsd-SNw37rogLtc,11359
1256
+ zenml_nightly-0.68.1.dev20241105.dist-info/METADATA,sha256=DqsZKui96Zfhm9sNSEH6-vT66FATfUjIuOHEYsTn0T0,21208
1257
+ zenml_nightly-0.68.1.dev20241105.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
1258
+ zenml_nightly-0.68.1.dev20241105.dist-info/entry_points.txt,sha256=QK3ETQE0YswAM2mWypNMOv8TLtr7EjnqAFq1br_jEFE,43
1259
+ zenml_nightly-0.68.1.dev20241105.dist-info/RECORD,,