zenml-nightly 0.68.1.dev20241103__py3-none-any.whl → 0.68.1.dev20241106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/artifacts/{load_directory_materializer.py → preexisting_data_materializer.py} +8 -9
- zenml/artifacts/utils.py +1 -1
- zenml/integrations/__init__.py +3 -1
- zenml/integrations/bentoml/materializers/bentoml_bento_materializer.py +19 -31
- zenml/integrations/constants.py +1 -0
- zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py +8 -12
- zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py +17 -18
- zenml/integrations/huggingface/materializers/huggingface_t5_materializer.py +2 -5
- zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py +17 -18
- zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py +2 -3
- zenml/integrations/langchain/__init__.py +2 -1
- zenml/integrations/langchain/materializers/openai_embedding_materializer.py +28 -2
- zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py +8 -15
- zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py +11 -16
- zenml/integrations/openai/__init__.py +1 -1
- zenml/integrations/openai/hooks/open_ai_failure_hook.py +39 -14
- zenml/integrations/pillow/materializers/pillow_image_materializer.py +17 -20
- zenml/integrations/polars/materializers/dataframe_materializer.py +26 -39
- zenml/integrations/pycaret/materializers/model_materializer.py +7 -22
- zenml/integrations/tensorflow/materializers/keras_materializer.py +11 -22
- zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py +8 -15
- zenml/integrations/vllm/__init__.py +50 -0
- zenml/integrations/vllm/flavors/__init__.py +21 -0
- zenml/integrations/vllm/flavors/vllm_model_deployer_flavor.py +91 -0
- zenml/integrations/vllm/model_deployers/__init__.py +19 -0
- zenml/integrations/vllm/model_deployers/vllm_model_deployer.py +263 -0
- zenml/integrations/vllm/services/__init__.py +19 -0
- zenml/integrations/vllm/services/vllm_deployment.py +197 -0
- zenml/integrations/whylogs/materializers/whylogs_materializer.py +11 -18
- zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py +11 -22
- zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py +10 -19
- zenml/materializers/base_materializer.py +68 -1
- zenml/orchestrators/step_runner.py +4 -1
- zenml/stack/flavor.py +9 -5
- zenml/steps/step_context.py +2 -0
- zenml/utils/callback_registry.py +71 -0
- {zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241106.dist-info}/METADATA +1 -1
- {zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241106.dist-info}/RECORD +42 -34
- {zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241106.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241106.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.68.1.dev20241103.dist-info → zenml_nightly-0.68.1.dev20241106.dist-info}/entry_points.txt +0 -0
zenml/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.68.1.
|
1
|
+
0.68.1.dev20241106
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Only-load materializer for directories."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from pathlib import Path
|
19
18
|
from typing import Any, ClassVar, Tuple, Type
|
20
19
|
|
@@ -46,14 +45,14 @@ class PreexistingDataMaterializer(BaseMaterializer):
|
|
46
45
|
Returns:
|
47
46
|
Path to the local directory that contains the artifact files.
|
48
47
|
"""
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
48
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
49
|
+
if fileio.isdir(self.uri):
|
50
|
+
self._copy_directory(src=self.uri, dst=temp_dir)
|
51
|
+
return Path(temp_dir)
|
52
|
+
else:
|
53
|
+
dst = os.path.join(temp_dir, os.path.split(self.uri)[-1])
|
54
|
+
fileio.copy(src=self.uri, dst=dst)
|
55
|
+
return Path(dst)
|
57
56
|
|
58
57
|
def save(self, data: Any) -> None:
|
59
58
|
"""Store the directory in the artifact store.
|
zenml/artifacts/utils.py
CHANGED
@@ -31,7 +31,7 @@ from typing import (
|
|
31
31
|
from uuid import UUID, uuid4
|
32
32
|
|
33
33
|
from zenml.artifacts.artifact_config import ArtifactConfig
|
34
|
-
from zenml.artifacts.
|
34
|
+
from zenml.artifacts.preexisting_data_materializer import (
|
35
35
|
PreexistingDataMaterializer,
|
36
36
|
)
|
37
37
|
from zenml.client import Client
|
zenml/integrations/__init__.py
CHANGED
@@ -45,6 +45,7 @@ from zenml.integrations.kubernetes import KubernetesIntegration # noqa
|
|
45
45
|
from zenml.integrations.label_studio import LabelStudioIntegration # noqa
|
46
46
|
from zenml.integrations.langchain import LangchainIntegration # noqa
|
47
47
|
from zenml.integrations.lightgbm import LightGBMIntegration # noqa
|
48
|
+
|
48
49
|
# from zenml.integrations.llama_index import LlamaIndexIntegration # noqa
|
49
50
|
from zenml.integrations.mlflow import MlflowIntegration # noqa
|
50
51
|
from zenml.integrations.neptune import NeptuneIntegration # noqa
|
@@ -52,7 +53,7 @@ from zenml.integrations.neural_prophet import NeuralProphetIntegration # noqa
|
|
52
53
|
from zenml.integrations.numpy import NumpyIntegration # noqa
|
53
54
|
from zenml.integrations.openai import OpenAIIntegration # noqa
|
54
55
|
from zenml.integrations.pandas import PandasIntegration # noqa
|
55
|
-
from zenml.integrations.pigeon import PigeonIntegration
|
56
|
+
from zenml.integrations.pigeon import PigeonIntegration # noqa
|
56
57
|
from zenml.integrations.pillow import PillowIntegration # noqa
|
57
58
|
from zenml.integrations.polars import PolarsIntegration # noqa
|
58
59
|
from zenml.integrations.prodigy import ProdigyIntegration # noqa
|
@@ -78,3 +79,4 @@ from zenml.integrations.tensorflow import TensorflowIntegration # noqa
|
|
78
79
|
from zenml.integrations.wandb import WandbIntegration # noqa
|
79
80
|
from zenml.integrations.whylogs import WhylogsIntegration # noqa
|
80
81
|
from zenml.integrations.xgboost import XgboostIntegration # noqa
|
82
|
+
from zenml.integrations.vllm import VLLMIntegration # noqa
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Materializer for BentoML Bento objects."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
|
19
18
|
|
20
19
|
import bentoml
|
@@ -23,7 +22,6 @@ from bentoml.exceptions import BentoMLException
|
|
23
22
|
|
24
23
|
from zenml.enums import ArtifactType
|
25
24
|
from zenml.integrations.bentoml.constants import DEFAULT_BENTO_FILENAME
|
26
|
-
from zenml.io import fileio
|
27
25
|
from zenml.logger import get_logger
|
28
26
|
from zenml.materializers.base_materializer import BaseMaterializer
|
29
27
|
from zenml.utils import io_utils
|
@@ -49,23 +47,21 @@ class BentoMaterializer(BaseMaterializer):
|
|
49
47
|
Returns:
|
50
48
|
An bento.Bento object.
|
51
49
|
"""
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
imported_bento.save()
|
68
|
-
return imported_bento
|
50
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
51
|
+
# Copy from artifact store to temporary directory
|
52
|
+
io_utils.copy_dir(self.uri, temp_dir)
|
53
|
+
|
54
|
+
# Load the Bento from the temporary directory
|
55
|
+
imported_bento = Bento.import_from(
|
56
|
+
os.path.join(temp_dir, DEFAULT_BENTO_FILENAME)
|
57
|
+
)
|
58
|
+
|
59
|
+
# Try save the Bento to the local BentoML store
|
60
|
+
try:
|
61
|
+
_ = bentoml.get(imported_bento.tag)
|
62
|
+
except BentoMLException:
|
63
|
+
imported_bento.save()
|
64
|
+
return imported_bento
|
69
65
|
|
70
66
|
def save(self, bento: bento.Bento) -> None:
|
71
67
|
"""Write to artifact store.
|
@@ -73,18 +69,10 @@ class BentoMaterializer(BaseMaterializer):
|
|
73
69
|
Args:
|
74
70
|
bento: An bento.Bento object.
|
75
71
|
"""
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
# save the image in a temporary directory
|
81
|
-
bentoml.export_bento(bento.tag, temp_bento_path)
|
82
|
-
|
83
|
-
# copy the saved image to the artifact store
|
84
|
-
io_utils.copy_dir(temp_dir.name, self.uri)
|
85
|
-
|
86
|
-
# Remove the temporary directory
|
87
|
-
fileio.rmtree(temp_dir.name)
|
72
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
73
|
+
temp_bento_path = os.path.join(temp_dir, DEFAULT_BENTO_FILENAME)
|
74
|
+
bentoml.export_bento(bento.tag, temp_bento_path)
|
75
|
+
io_utils.copy_dir(temp_dir, self.uri)
|
88
76
|
|
89
77
|
def extract_metadata(
|
90
78
|
self, bento: bento.Bento
|
zenml/integrations/constants.py
CHANGED
@@ -15,7 +15,6 @@
|
|
15
15
|
|
16
16
|
import os
|
17
17
|
from collections import defaultdict
|
18
|
-
from tempfile import TemporaryDirectory, mkdtemp
|
19
18
|
from typing import (
|
20
19
|
TYPE_CHECKING,
|
21
20
|
Any,
|
@@ -88,12 +87,12 @@ class HFDatasetMaterializer(BaseMaterializer):
|
|
88
87
|
Returns:
|
89
88
|
The dataset read from the specified dir.
|
90
89
|
"""
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
90
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
91
|
+
io_utils.copy_dir(
|
92
|
+
os.path.join(self.uri, DEFAULT_DATASET_DIR),
|
93
|
+
temp_dir,
|
94
|
+
)
|
95
|
+
return load_from_disk(temp_dir)
|
97
96
|
|
98
97
|
def save(self, ds: Union[Dataset, DatasetDict]) -> None:
|
99
98
|
"""Writes a Dataset to the specified dir.
|
@@ -101,16 +100,13 @@ class HFDatasetMaterializer(BaseMaterializer):
|
|
101
100
|
Args:
|
102
101
|
ds: The Dataset to write.
|
103
102
|
"""
|
104
|
-
|
105
|
-
|
106
|
-
try:
|
103
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
104
|
+
path = os.path.join(temp_dir, DEFAULT_DATASET_DIR)
|
107
105
|
ds.save_to_disk(path)
|
108
106
|
io_utils.copy_dir(
|
109
107
|
path,
|
110
108
|
os.path.join(self.uri, DEFAULT_DATASET_DIR),
|
111
109
|
)
|
112
|
-
finally:
|
113
|
-
fileio.rmtree(temp_dir.name)
|
114
110
|
|
115
111
|
def extract_metadata(
|
116
112
|
self, ds: Union[Dataset, DatasetDict]
|
@@ -15,7 +15,6 @@
|
|
15
15
|
|
16
16
|
import importlib
|
17
17
|
import os
|
18
|
-
from tempfile import TemporaryDirectory
|
19
18
|
from typing import Any, ClassVar, Dict, Tuple, Type
|
20
19
|
|
21
20
|
from transformers import (
|
@@ -46,17 +45,17 @@ class HFPTModelMaterializer(BaseMaterializer):
|
|
46
45
|
Returns:
|
47
46
|
The model read from the specified dir.
|
48
47
|
"""
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
48
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
49
|
+
io_utils.copy_dir(
|
50
|
+
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), temp_dir
|
51
|
+
)
|
52
|
+
|
53
|
+
config = AutoConfig.from_pretrained(temp_dir)
|
54
|
+
architecture = config.architectures[0]
|
55
|
+
model_cls = getattr(
|
56
|
+
importlib.import_module("transformers"), architecture
|
57
|
+
)
|
58
|
+
return model_cls.from_pretrained(temp_dir)
|
60
59
|
|
61
60
|
def save(self, model: PreTrainedModel) -> None:
|
62
61
|
"""Writes a Model to the specified dir.
|
@@ -64,12 +63,12 @@ class HFPTModelMaterializer(BaseMaterializer):
|
|
64
63
|
Args:
|
65
64
|
model: The Torch Model to write.
|
66
65
|
"""
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
66
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
67
|
+
model.save_pretrained(temp_dir)
|
68
|
+
io_utils.copy_dir(
|
69
|
+
temp_dir,
|
70
|
+
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR),
|
71
|
+
)
|
73
72
|
|
74
73
|
def extract_metadata(
|
75
74
|
self, model: PreTrainedModel
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the Huggingface t5 materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import Any, ClassVar, Type, Union
|
19
18
|
|
20
19
|
from transformers import (
|
@@ -52,8 +51,7 @@ class HFT5Materializer(BaseMaterializer):
|
|
52
51
|
ValueError: Unsupported data type used
|
53
52
|
"""
|
54
53
|
filepath = self.uri
|
55
|
-
|
56
|
-
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
|
54
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
57
55
|
# Copy files from artifact store to temporary directory
|
58
56
|
for file in fileio.listdir(filepath):
|
59
57
|
src = os.path.join(filepath, file)
|
@@ -86,8 +84,7 @@ class HFT5Materializer(BaseMaterializer):
|
|
86
84
|
Args:
|
87
85
|
obj: A T5ForConditionalGeneration model or T5Tokenizer.
|
88
86
|
"""
|
89
|
-
|
90
|
-
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
|
87
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
91
88
|
# Save the model or tokenizer
|
92
89
|
obj.save_pretrained(temp_dir)
|
93
90
|
|
@@ -15,7 +15,6 @@
|
|
15
15
|
|
16
16
|
import importlib
|
17
17
|
import os
|
18
|
-
from tempfile import TemporaryDirectory
|
19
18
|
from typing import Any, ClassVar, Dict, Tuple, Type
|
20
19
|
|
21
20
|
from transformers import (
|
@@ -46,17 +45,17 @@ class HFTFModelMaterializer(BaseMaterializer):
|
|
46
45
|
Returns:
|
47
46
|
The model read from the specified dir.
|
48
47
|
"""
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
48
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
49
|
+
io_utils.copy_dir(
|
50
|
+
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), temp_dir
|
51
|
+
)
|
52
|
+
|
53
|
+
config = AutoConfig.from_pretrained(temp_dir)
|
54
|
+
architecture = "TF" + config.architectures[0]
|
55
|
+
model_cls = getattr(
|
56
|
+
importlib.import_module("transformers"), architecture
|
57
|
+
)
|
58
|
+
return model_cls.from_pretrained(temp_dir)
|
60
59
|
|
61
60
|
def save(self, model: TFPreTrainedModel) -> None:
|
62
61
|
"""Writes a Model to the specified dir.
|
@@ -64,12 +63,12 @@ class HFTFModelMaterializer(BaseMaterializer):
|
|
64
63
|
Args:
|
65
64
|
model: The TF Model to write.
|
66
65
|
"""
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
66
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
67
|
+
model.save_pretrained(temp_dir)
|
68
|
+
io_utils.copy_dir(
|
69
|
+
temp_dir,
|
70
|
+
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR),
|
71
|
+
)
|
73
72
|
|
74
73
|
def extract_metadata(
|
75
74
|
self, model: TFPreTrainedModel
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the Huggingface tokenizer materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
from tempfile import TemporaryDirectory
|
18
17
|
from typing import Any, ClassVar, Tuple, Type
|
19
18
|
|
20
19
|
from transformers import AutoTokenizer
|
@@ -46,7 +45,7 @@ class HFTokenizerMaterializer(BaseMaterializer):
|
|
46
45
|
Returns:
|
47
46
|
The tokenizer read from the specified dir.
|
48
47
|
"""
|
49
|
-
with
|
48
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
50
49
|
io_utils.copy_dir(
|
51
50
|
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), temp_dir
|
52
51
|
)
|
@@ -58,7 +57,7 @@ class HFTokenizerMaterializer(BaseMaterializer):
|
|
58
57
|
Args:
|
59
58
|
tokenizer: The HFTokenizer to write.
|
60
59
|
"""
|
61
|
-
with
|
60
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
62
61
|
tokenizer.save_pretrained(temp_dir)
|
63
62
|
io_utils.copy_dir(
|
64
63
|
temp_dir,
|
@@ -24,11 +24,37 @@ from zenml.materializers.cloudpickle_materializer import (
|
|
24
24
|
if TYPE_CHECKING and sys.version_info < (3, 8):
|
25
25
|
OpenAIEmbeddings = Any
|
26
26
|
else:
|
27
|
-
from
|
27
|
+
from langchain_community.embeddings import (
|
28
|
+
OpenAIEmbeddings,
|
29
|
+
)
|
28
30
|
|
29
31
|
|
30
32
|
class LangchainOpenaiEmbeddingMaterializer(CloudpickleMaterializer):
|
31
|
-
"""
|
33
|
+
"""Materializer for Langchain OpenAI Embeddings."""
|
32
34
|
|
33
35
|
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
|
34
36
|
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (OpenAIEmbeddings,)
|
37
|
+
|
38
|
+
def save(self, embeddings: Any) -> None:
|
39
|
+
"""Saves the embeddings model after clearing non-picklable clients.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
embeddings: The embeddings model to save.
|
43
|
+
"""
|
44
|
+
# Clear the clients which will be recreated on load
|
45
|
+
embeddings.client = None
|
46
|
+
embeddings.async_client = None
|
47
|
+
|
48
|
+
# Use the parent class's save implementation which uses cloudpickle
|
49
|
+
super().save(embeddings)
|
50
|
+
|
51
|
+
def load(self, data_type: Type[Any]) -> Any:
|
52
|
+
"""Loads the embeddings model and lets it recreate clients when needed.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
data_type: The type of the data to load.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
The loaded embeddings model.
|
59
|
+
"""
|
60
|
+
return super().load(data_type)
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the LightGBM booster materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import Any, ClassVar, Tuple, Type
|
19
18
|
|
20
19
|
import lightgbm as lgb
|
@@ -42,18 +41,13 @@ class LightGBMBoosterMaterializer(BaseMaterializer):
|
|
42
41
|
A lightgbm Booster object.
|
43
42
|
"""
|
44
43
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
44
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
45
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
# Copy from artifact store to temporary file
|
51
|
-
fileio.copy(filepath, temp_file)
|
52
|
-
booster = lgb.Booster(model_file=temp_file)
|
53
|
-
|
54
|
-
# Cleanup and return
|
55
|
-
fileio.rmtree(temp_dir)
|
56
|
-
return booster
|
47
|
+
# Copy from artifact store to temporary file
|
48
|
+
fileio.copy(filepath, temp_file)
|
49
|
+
booster = lgb.Booster(model_file=temp_file)
|
50
|
+
return booster
|
57
51
|
|
58
52
|
def save(self, booster: lgb.Booster) -> None:
|
59
53
|
"""Creates a JSON serialization for a lightgbm Booster model.
|
@@ -62,8 +56,7 @@ class LightGBMBoosterMaterializer(BaseMaterializer):
|
|
62
56
|
booster: A lightgbm Booster model.
|
63
57
|
"""
|
64
58
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
65
|
-
|
66
|
-
|
67
|
-
tmp_path = os.path.join(tmp_dir, "model.txt")
|
59
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
60
|
+
tmp_path = os.path.join(temp_dir, "model.txt")
|
68
61
|
booster.save_model(tmp_path)
|
69
62
|
fileio.copy(tmp_path, filepath)
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the LightGBM materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
|
19
18
|
|
20
19
|
import lightgbm as lgb
|
@@ -46,16 +45,14 @@ class LightGBMDatasetMaterializer(BaseMaterializer):
|
|
46
45
|
"""
|
47
46
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
48
47
|
|
49
|
-
|
50
|
-
|
51
|
-
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
48
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
49
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
52
50
|
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
# Copy from artifact store to temporary file
|
52
|
+
fileio.copy(filepath, temp_file)
|
53
|
+
matrix = lgb.Dataset(temp_file, free_raw_data=False)
|
56
54
|
|
57
|
-
|
58
|
-
return matrix
|
55
|
+
return matrix
|
59
56
|
|
60
57
|
def save(self, matrix: lgb.Dataset) -> None:
|
61
58
|
"""Creates a binary serialization for a lightgbm.Dataset object.
|
@@ -65,14 +62,12 @@ class LightGBMDatasetMaterializer(BaseMaterializer):
|
|
65
62
|
"""
|
66
63
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
67
64
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
matrix.save_binary(temp_file)
|
65
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
66
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
67
|
+
matrix.save_binary(temp_file)
|
72
68
|
|
73
|
-
|
74
|
-
|
75
|
-
fileio.rmtree(temp_dir)
|
69
|
+
# Copy it into artifact store
|
70
|
+
fileio.copy(temp_file, filepath)
|
76
71
|
|
77
72
|
def extract_metadata(
|
78
73
|
self, matrix: lgb.Dataset
|
@@ -15,8 +15,9 @@
|
|
15
15
|
|
16
16
|
import io
|
17
17
|
import sys
|
18
|
+
from typing import Optional
|
18
19
|
|
19
|
-
import
|
20
|
+
from openai import OpenAI
|
20
21
|
from rich.console import Console
|
21
22
|
|
22
23
|
from zenml import get_step_context
|
@@ -38,6 +39,8 @@ def openai_alerter_failure_hook_helper(
|
|
38
39
|
Args:
|
39
40
|
exception: The exception that was raised.
|
40
41
|
model_name: The OpenAI model to use for the chatbot.
|
42
|
+
|
43
|
+
This implementation uses the OpenAI v1 SDK with automatic retries and backoff.
|
41
44
|
"""
|
42
45
|
client = Client()
|
43
46
|
context = get_step_context()
|
@@ -47,12 +50,15 @@ def openai_alerter_failure_hook_helper(
|
|
47
50
|
openai_secret = client.get_secret(
|
48
51
|
"openai", allow_partial_name_match=False
|
49
52
|
)
|
50
|
-
openai_api_key = openai_secret.secret_values.get(
|
53
|
+
openai_api_key: Optional[str] = openai_secret.secret_values.get(
|
54
|
+
"api_key"
|
55
|
+
)
|
51
56
|
except (KeyError, NotImplementedError):
|
52
57
|
openai_api_key = None
|
53
58
|
|
54
59
|
alerter = client.active_stack.alerter
|
55
60
|
if alerter and openai_api_key:
|
61
|
+
# Capture rich traceback
|
56
62
|
output_captured = io.StringIO()
|
57
63
|
original_stdout = sys.stdout
|
58
64
|
sys.stdout = output_captured
|
@@ -62,25 +68,44 @@ def openai_alerter_failure_hook_helper(
|
|
62
68
|
sys.stdout = original_stdout
|
63
69
|
rich_traceback = output_captured.getvalue()
|
64
70
|
|
65
|
-
|
71
|
+
# Initialize OpenAI client with timeout and retry settings
|
72
|
+
openai_client = OpenAI(
|
73
|
+
api_key=openai_api_key,
|
74
|
+
max_retries=3, # Will retry 3 times with exponential backoff
|
75
|
+
timeout=60.0, # 60 second timeout
|
76
|
+
)
|
77
|
+
|
78
|
+
# Create chat completion using the new client pattern
|
79
|
+
response = openai_client.chat.completions.create(
|
66
80
|
model=model_name,
|
67
81
|
messages=[
|
68
82
|
{
|
69
83
|
"role": "user",
|
70
|
-
"content": f"This is an error message (following an exception of type '{type(exception)}')
|
84
|
+
"content": f"This is an error message (following an exception of type '{type(exception)}') "
|
85
|
+
f"I encountered while executing a ZenML step. Please suggest ways I might fix the problem. "
|
86
|
+
f"Feel free to give code snippets as examples, and note that your response will be piped "
|
87
|
+
f"to a Slack bot so make sure the formatting is appropriate: {exception} -- {rich_traceback}. "
|
88
|
+
f"Thank you!",
|
71
89
|
}
|
72
90
|
],
|
73
91
|
)
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
message
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
92
|
+
|
93
|
+
suggestion = response.choices[0].message.content
|
94
|
+
|
95
|
+
# Format the alert message
|
96
|
+
message = "\n".join(
|
97
|
+
[
|
98
|
+
"*Failure Hook Notification! Step failed!*",
|
99
|
+
"",
|
100
|
+
f"Run name: `{context.pipeline_run.name}`",
|
101
|
+
f"Step name: `{context.step_run.name}`",
|
102
|
+
f"Parameters: `{context.step_run.config.parameters}`",
|
103
|
+
f"Exception: `({type(exception)}) {exception}`",
|
104
|
+
"",
|
105
|
+
f"*OpenAI ChatGPT's suggestion (model = `{model_name}`) on how to fix it:*\n `{suggestion}`",
|
106
|
+
]
|
83
107
|
)
|
108
|
+
|
84
109
|
alerter.post(message)
|
85
110
|
elif not openai_api_key:
|
86
111
|
logger.warning(
|
@@ -111,4 +136,4 @@ def openai_gpt4_alerter_failure_hook(
|
|
111
136
|
Args:
|
112
137
|
exception: The exception that was raised.
|
113
138
|
"""
|
114
|
-
openai_alerter_failure_hook_helper(exception, "gpt-
|
139
|
+
openai_alerter_failure_hook_helper(exception, "gpt-4o")
|