zenml-nightly 0.68.1.dev20241105__py3-none-any.whl → 0.68.1.dev20241107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zenml/VERSION +1 -1
- zenml/artifacts/{load_directory_materializer.py → preexisting_data_materializer.py} +8 -9
- zenml/artifacts/utils.py +121 -59
- zenml/constants.py +1 -0
- zenml/integrations/bentoml/materializers/bentoml_bento_materializer.py +19 -31
- zenml/integrations/evidently/__init__.py +1 -1
- zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py +8 -12
- zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py +17 -18
- zenml/integrations/huggingface/materializers/huggingface_t5_materializer.py +2 -5
- zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py +17 -18
- zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py +2 -3
- zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py +8 -15
- zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py +11 -16
- zenml/integrations/pillow/materializers/pillow_image_materializer.py +17 -20
- zenml/integrations/polars/materializers/dataframe_materializer.py +26 -39
- zenml/integrations/pycaret/materializers/model_materializer.py +7 -22
- zenml/integrations/tensorflow/materializers/keras_materializer.py +11 -22
- zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py +8 -15
- zenml/integrations/vllm/services/vllm_deployment.py +16 -7
- zenml/integrations/whylogs/materializers/whylogs_materializer.py +11 -18
- zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py +11 -22
- zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py +10 -19
- zenml/materializers/base_materializer.py +68 -1
- zenml/orchestrators/step_runner.py +17 -11
- zenml/stack/flavor.py +9 -5
- zenml/steps/step_context.py +2 -0
- zenml/utils/callback_registry.py +71 -0
- zenml/zen_server/rbac/endpoint_utils.py +43 -1
- zenml/zen_server/routers/artifact_version_endpoints.py +27 -1
- zenml/zen_stores/rest_zen_store.py +52 -0
- zenml/zen_stores/sql_zen_store.py +16 -0
- zenml/zen_stores/zen_store_interface.py +13 -0
- {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/METADATA +1 -1
- {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/RECORD +37 -36
- {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/LICENSE +0 -0
- {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/WHEEL +0 -0
- {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/entry_points.txt +0 -0
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the LightGBM materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
|
19
18
|
|
20
19
|
import lightgbm as lgb
|
@@ -46,16 +45,14 @@ class LightGBMDatasetMaterializer(BaseMaterializer):
|
|
46
45
|
"""
|
47
46
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
48
47
|
|
49
|
-
|
50
|
-
|
51
|
-
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
48
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
49
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
52
50
|
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
# Copy from artifact store to temporary file
|
52
|
+
fileio.copy(filepath, temp_file)
|
53
|
+
matrix = lgb.Dataset(temp_file, free_raw_data=False)
|
56
54
|
|
57
|
-
|
58
|
-
return matrix
|
55
|
+
return matrix
|
59
56
|
|
60
57
|
def save(self, matrix: lgb.Dataset) -> None:
|
61
58
|
"""Creates a binary serialization for a lightgbm.Dataset object.
|
@@ -65,14 +62,12 @@ class LightGBMDatasetMaterializer(BaseMaterializer):
|
|
65
62
|
"""
|
66
63
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
67
64
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
matrix.save_binary(temp_file)
|
65
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
66
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
67
|
+
matrix.save_binary(temp_file)
|
72
68
|
|
73
|
-
|
74
|
-
|
75
|
-
fileio.rmtree(temp_dir)
|
69
|
+
# Copy it into artifact store
|
70
|
+
fileio.copy(temp_file, filepath)
|
76
71
|
|
77
72
|
def extract_metadata(
|
78
73
|
self, matrix: lgb.Dataset
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Materializer for Pillow Image objects."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
|
19
18
|
|
20
19
|
from PIL import Image
|
@@ -57,16 +56,15 @@ class PillowImageMaterializer(BaseMaterializer):
|
|
57
56
|
files = io_utils.find_files(self.uri, f"{DEFAULT_IMAGE_FILENAME}.*")
|
58
57
|
filepath = [file for file in files if not fileio.isdir(file)][0]
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
)
|
59
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
60
|
+
temp_file = os.path.join(
|
61
|
+
temp_dir,
|
62
|
+
f"{DEFAULT_IMAGE_FILENAME}{os.path.splitext(filepath)[1]}",
|
63
|
+
)
|
66
64
|
|
67
|
-
|
68
|
-
|
69
|
-
|
65
|
+
# copy from artifact store to temporary file
|
66
|
+
fileio.copy(filepath, temp_file)
|
67
|
+
return Image.open(temp_file)
|
70
68
|
|
71
69
|
def save(self, image: Image.Image) -> None:
|
72
70
|
"""Write to artifact store.
|
@@ -74,18 +72,17 @@ class PillowImageMaterializer(BaseMaterializer):
|
|
74
72
|
Args:
|
75
73
|
image: An Image.Image object.
|
76
74
|
"""
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
75
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
76
|
+
file_extension = image.format or DEFAULT_IMAGE_EXTENSION
|
77
|
+
full_filename = f"{DEFAULT_IMAGE_FILENAME}.{file_extension}"
|
78
|
+
temp_image_path = os.path.join(temp_dir, full_filename)
|
81
79
|
|
82
|
-
|
83
|
-
|
80
|
+
# save the image in a temporary directory
|
81
|
+
image.save(temp_image_path)
|
84
82
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
temp_dir.cleanup()
|
83
|
+
# copy the saved image to the artifact store
|
84
|
+
artifact_store_path = os.path.join(self.uri, full_filename)
|
85
|
+
io_utils.copy(temp_image_path, artifact_store_path, overwrite=True) # type: ignore[attr-defined]
|
89
86
|
|
90
87
|
def save_visualizations(
|
91
88
|
self, image: Image.Image
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Polars materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import Any, ClassVar, Tuple, Type, Union
|
19
18
|
|
20
19
|
import polars as pl
|
@@ -22,7 +21,6 @@ import pyarrow as pa # type: ignore
|
|
22
21
|
import pyarrow.parquet as pq # type: ignore
|
23
22
|
|
24
23
|
from zenml.enums import ArtifactType
|
25
|
-
from zenml.io import fileio
|
26
24
|
from zenml.materializers.base_materializer import BaseMaterializer
|
27
25
|
from zenml.utils import io_utils
|
28
26
|
|
@@ -45,35 +43,29 @@ class PolarsMaterializer(BaseMaterializer):
|
|
45
43
|
Returns:
|
46
44
|
A Polars data frame or series.
|
47
45
|
"""
|
48
|
-
|
49
|
-
|
46
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
47
|
+
io_utils.copy_dir(self.uri, temp_dir)
|
50
48
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
table = pq.read_table(
|
56
|
-
os.path.join(temp_dir.name, "dataframe.parquet").replace("\\", "/")
|
57
|
-
)
|
58
|
-
|
59
|
-
# If the data is of type pl.Series, convert it back to a pyarrow array
|
60
|
-
# instead of a table.
|
61
|
-
if (
|
62
|
-
table.schema.metadata
|
63
|
-
and b"zenml_is_pl_series" in table.schema.metadata
|
64
|
-
):
|
65
|
-
isinstance_bytes = table.schema.metadata[b"zenml_is_pl_series"]
|
66
|
-
isinstance_series = bool.from_bytes(isinstance_bytes, "big")
|
67
|
-
if isinstance_series:
|
68
|
-
table = table.column(0)
|
49
|
+
# Load the data from the temporary directory
|
50
|
+
table = pq.read_table(
|
51
|
+
os.path.join(temp_dir, "dataframe.parquet").replace("\\", "/")
|
52
|
+
)
|
69
53
|
|
70
|
-
|
71
|
-
|
54
|
+
# If the data is of type pl.Series, convert it back to a pyarrow array
|
55
|
+
# instead of a table.
|
56
|
+
if (
|
57
|
+
table.schema.metadata
|
58
|
+
and b"zenml_is_pl_series" in table.schema.metadata
|
59
|
+
):
|
60
|
+
isinstance_bytes = table.schema.metadata[b"zenml_is_pl_series"]
|
61
|
+
isinstance_series = bool.from_bytes(isinstance_bytes, "big")
|
62
|
+
if isinstance_series:
|
63
|
+
table = table.column(0)
|
72
64
|
|
73
|
-
|
74
|
-
|
65
|
+
# Convert the table to a Polars data frame or series
|
66
|
+
data = pl.from_arrow(table)
|
75
67
|
|
76
|
-
|
68
|
+
return data
|
77
69
|
|
78
70
|
def save(self, data: Union[pl.DataFrame, pl.Series]) -> None:
|
79
71
|
"""Writes Polars data to the artifact store.
|
@@ -107,15 +99,10 @@ class PolarsMaterializer(BaseMaterializer):
|
|
107
99
|
{b"zenml_is_pl_series": isinstance_bytes}
|
108
100
|
)
|
109
101
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
pq.write_table(table, path) # Uses lz4 compression by default
|
118
|
-
io_utils.copy_dir(temp_dir.name, self.uri)
|
119
|
-
|
120
|
-
# Remove the temporary directory
|
121
|
-
fileio.rmtree(temp_dir.name)
|
102
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
103
|
+
# Write the table to a Parquet file
|
104
|
+
path = os.path.join(temp_dir, "dataframe.parquet").replace(
|
105
|
+
"\\", "/"
|
106
|
+
)
|
107
|
+
pq.write_table(table, path) # Uses lz4 compression by default
|
108
|
+
io_utils.copy_dir(temp_dir, self.uri)
|
@@ -13,7 +13,6 @@
|
|
13
13
|
# permissions and limitations under the License.
|
14
14
|
"""PyCaret materializer."""
|
15
15
|
|
16
|
-
import tempfile
|
17
16
|
from typing import (
|
18
17
|
Any,
|
19
18
|
Type,
|
@@ -65,7 +64,6 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
|
65
64
|
from xgboost import XGBClassifier, XGBRegressor
|
66
65
|
|
67
66
|
from zenml.enums import ArtifactType
|
68
|
-
from zenml.io import fileio
|
69
67
|
from zenml.materializers.base_materializer import BaseMaterializer
|
70
68
|
from zenml.utils import io_utils
|
71
69
|
|
@@ -133,19 +131,10 @@ class PyCaretMaterializer(BaseMaterializer):
|
|
133
131
|
Returns:
|
134
132
|
A PyCaret model.
|
135
133
|
"""
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
io_utils.copy_dir(self.uri, temp_dir.name)
|
141
|
-
|
142
|
-
# Load the model from the temporary directory
|
143
|
-
model = load_model(temp_dir.name)
|
144
|
-
|
145
|
-
# Cleanup and return
|
146
|
-
fileio.rmtree(temp_dir.name)
|
147
|
-
|
148
|
-
return model
|
134
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
135
|
+
io_utils.copy_dir(self.uri, temp_dir)
|
136
|
+
model = load_model(temp_dir)
|
137
|
+
return model
|
149
138
|
|
150
139
|
def save(self, model: Any) -> None:
|
151
140
|
"""Writes a PyCaret model to the artifact store.
|
@@ -153,10 +142,6 @@ class PyCaretMaterializer(BaseMaterializer):
|
|
153
142
|
Args:
|
154
143
|
model: Any of the supported models.
|
155
144
|
"""
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
io_utils.copy_dir(temp_dir.name, self.uri)
|
160
|
-
|
161
|
-
# Remove the temporary directory
|
162
|
-
fileio.rmtree(temp_dir.name)
|
145
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
146
|
+
save_model(model, temp_dir)
|
147
|
+
io_utils.copy_dir(temp_dir, self.uri)
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the TensorFlow Keras materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
|
19
18
|
|
20
19
|
import tensorflow as tf
|
@@ -22,7 +21,6 @@ from tensorflow.python import keras as tf_keras
|
|
22
21
|
from tensorflow.python.keras.utils.layer_utils import count_params
|
23
22
|
|
24
23
|
from zenml.enums import ArtifactType
|
25
|
-
from zenml.io import fileio
|
26
24
|
from zenml.materializers.base_materializer import BaseMaterializer
|
27
25
|
from zenml.utils import io_utils
|
28
26
|
|
@@ -49,20 +47,15 @@ class KerasMaterializer(BaseMaterializer):
|
|
49
47
|
Returns:
|
50
48
|
A keras.Model model.
|
51
49
|
"""
|
52
|
-
|
53
|
-
|
50
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
51
|
+
# Copy from artifact store to temporary directory
|
52
|
+
temp_model_file = os.path.join(temp_dir, self.MODEL_FILE_NAME)
|
53
|
+
io_utils.copy_dir(self.uri, temp_dir)
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
io_utils.copy_dir(self.uri, temp_dir.name)
|
55
|
+
# Load the model from the temporary directory
|
56
|
+
model = tf.keras.models.load_model(temp_model_file)
|
58
57
|
|
59
|
-
|
60
|
-
model = tf.keras.models.load_model(temp_model_file)
|
61
|
-
|
62
|
-
# Cleanup and return
|
63
|
-
fileio.rmtree(temp_dir.name)
|
64
|
-
|
65
|
-
return model
|
58
|
+
return model
|
66
59
|
|
67
60
|
def save(self, model: tf_keras.Model) -> None:
|
68
61
|
"""Writes a keras model to the artifact store.
|
@@ -70,14 +63,10 @@ class KerasMaterializer(BaseMaterializer):
|
|
70
63
|
Args:
|
71
64
|
model: A keras.Model model.
|
72
65
|
"""
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
io_utils.copy_dir(temp_dir.name, self.uri)
|
78
|
-
|
79
|
-
# Remove the temporary directory
|
80
|
-
fileio.rmtree(temp_dir.name)
|
66
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
67
|
+
temp_model_file = os.path.join(temp_dir, self.MODEL_FILE_NAME)
|
68
|
+
model.save(temp_model_file)
|
69
|
+
io_utils.copy_dir(temp_dir, self.uri)
|
81
70
|
|
82
71
|
def extract_metadata(
|
83
72
|
self, model: tf_keras.Model
|
@@ -14,13 +14,11 @@
|
|
14
14
|
"""Implementation of the TensorFlow dataset materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
|
19
18
|
|
20
19
|
import tensorflow as tf
|
21
20
|
|
22
21
|
from zenml.enums import ArtifactType
|
23
|
-
from zenml.io import fileio
|
24
22
|
from zenml.materializers.base_materializer import BaseMaterializer
|
25
23
|
from zenml.utils import io_utils
|
26
24
|
|
@@ -45,13 +43,11 @@ class TensorflowDatasetMaterializer(BaseMaterializer):
|
|
45
43
|
Returns:
|
46
44
|
A tf.data.Dataset object.
|
47
45
|
"""
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
# loaded and needs to read it when the object gets used
|
54
|
-
return dataset
|
46
|
+
with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
|
47
|
+
io_utils.copy_dir(self.uri, temp_dir)
|
48
|
+
path = os.path.join(temp_dir, DEFAULT_FILENAME)
|
49
|
+
dataset = tf.data.Dataset.load(path)
|
50
|
+
return dataset
|
55
51
|
|
56
52
|
def save(self, dataset: tf.data.Dataset) -> None:
|
57
53
|
"""Persists a tf.data.Dataset object.
|
@@ -59,15 +55,12 @@ class TensorflowDatasetMaterializer(BaseMaterializer):
|
|
59
55
|
Args:
|
60
56
|
dataset: The dataset to persist.
|
61
57
|
"""
|
62
|
-
|
63
|
-
|
64
|
-
try:
|
58
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
59
|
+
path = os.path.join(temp_dir, DEFAULT_FILENAME)
|
65
60
|
tf.data.Dataset.save(
|
66
61
|
dataset, path, compression=None, shard_func=None
|
67
62
|
)
|
68
|
-
io_utils.copy_dir(temp_dir
|
69
|
-
finally:
|
70
|
-
fileio.rmtree(temp_dir.name)
|
63
|
+
io_utils.copy_dir(temp_dir, self.uri)
|
71
64
|
|
72
65
|
def extract_metadata(
|
73
66
|
self, dataset: tf.data.Dataset
|
@@ -13,6 +13,7 @@
|
|
13
13
|
# permissions and limitations under the License.
|
14
14
|
"""Implementation of the vLLM Inference Server Service."""
|
15
15
|
|
16
|
+
import argparse
|
16
17
|
import os
|
17
18
|
from typing import Any, List, Optional, Union
|
18
19
|
|
@@ -137,15 +138,23 @@ class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
|
|
137
138
|
self.endpoint.prepare_for_start()
|
138
139
|
|
139
140
|
import uvloop
|
140
|
-
from vllm.entrypoints.openai.api_server import
|
141
|
-
|
142
|
-
|
141
|
+
from vllm.entrypoints.openai.api_server import (
|
142
|
+
run_server,
|
143
|
+
)
|
144
|
+
from vllm.entrypoints.openai.cli_args import (
|
145
|
+
make_arg_parser,
|
146
|
+
)
|
147
|
+
from vllm.utils import (
|
148
|
+
FlexibleArgumentParser,
|
149
|
+
)
|
143
150
|
|
144
151
|
try:
|
145
|
-
parser = make_arg_parser(
|
146
|
-
|
152
|
+
parser: argparse.ArgumentParser = make_arg_parser(
|
153
|
+
FlexibleArgumentParser()
|
154
|
+
)
|
155
|
+
args: argparse.Namespace = parser.parse_args()
|
147
156
|
# Override port with the available port
|
148
|
-
self.config.port = self.endpoint.status.port
|
157
|
+
self.config.port = self.endpoint.status.port or self.config.port
|
149
158
|
# Update the arguments in place
|
150
159
|
args.__dict__.update(self.config.model_dump())
|
151
160
|
uvloop.run(run_server(args=args))
|
@@ -161,7 +170,7 @@ class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
|
|
161
170
|
"""
|
162
171
|
if not self.is_running:
|
163
172
|
return None
|
164
|
-
return self.endpoint.
|
173
|
+
return self.endpoint.prediction_url
|
165
174
|
|
166
175
|
def predict(self, data: "Any") -> "Any":
|
167
176
|
"""Make a prediction using the service.
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the whylogs materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import Any, ClassVar, Dict, Tuple, Type, cast
|
19
18
|
|
20
19
|
from whylogs.core import DatasetProfileView # type: ignore
|
@@ -51,18 +50,14 @@ class WhylogsMaterializer(BaseMaterializer):
|
|
51
50
|
"""
|
52
51
|
filepath = os.path.join(self.uri, PROFILE_FILENAME)
|
53
52
|
|
54
|
-
|
55
|
-
|
56
|
-
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
|
53
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
54
|
+
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
|
57
55
|
|
58
|
-
|
59
|
-
|
60
|
-
|
56
|
+
# Copy from artifact store to temporary file
|
57
|
+
fileio.copy(filepath, temp_file)
|
58
|
+
profile_view = DatasetProfileView.read(temp_file)
|
61
59
|
|
62
|
-
|
63
|
-
fileio.rmtree(temp_dir)
|
64
|
-
|
65
|
-
return profile_view
|
60
|
+
return profile_view
|
66
61
|
|
67
62
|
def save(self, profile_view: DatasetProfileView) -> None:
|
68
63
|
"""Writes a whylogs dataset profile view.
|
@@ -72,15 +67,13 @@ class WhylogsMaterializer(BaseMaterializer):
|
|
72
67
|
"""
|
73
68
|
filepath = os.path.join(self.uri, PROFILE_FILENAME)
|
74
69
|
|
75
|
-
|
76
|
-
|
77
|
-
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
|
70
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
71
|
+
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
|
78
72
|
|
79
|
-
|
73
|
+
profile_view.write(temp_file)
|
80
74
|
|
81
|
-
|
82
|
-
|
83
|
-
fileio.rmtree(temp_dir)
|
75
|
+
# Copy it into artifact store
|
76
|
+
fileio.copy(temp_file, filepath)
|
84
77
|
|
85
78
|
try:
|
86
79
|
self._upload_to_whylabs(profile_view)
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of an XGBoost booster materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import Any, ClassVar, Tuple, Type
|
19
18
|
|
20
19
|
import xgboost as xgb
|
@@ -43,18 +42,15 @@ class XgboostBoosterMaterializer(BaseMaterializer):
|
|
43
42
|
"""
|
44
43
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
45
44
|
|
46
|
-
|
47
|
-
|
48
|
-
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
45
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
46
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
49
47
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
48
|
+
# Copy from artifact store to temporary file
|
49
|
+
fileio.copy(filepath, temp_file)
|
50
|
+
booster = xgb.Booster()
|
51
|
+
booster.load_model(temp_file)
|
54
52
|
|
55
|
-
|
56
|
-
fileio.rmtree(temp_dir)
|
57
|
-
return booster
|
53
|
+
return booster
|
58
54
|
|
59
55
|
def save(self, booster: xgb.Booster) -> None:
|
60
56
|
"""Creates a JSON serialization for a xgboost Booster model.
|
@@ -64,14 +60,7 @@ class XgboostBoosterMaterializer(BaseMaterializer):
|
|
64
60
|
"""
|
65
61
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
66
62
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
booster.save_model(f.name)
|
72
|
-
# Copy it into artifact store
|
73
|
-
fileio.copy(f.name, filepath)
|
74
|
-
|
75
|
-
# Close and remove the temporary file
|
76
|
-
f.close()
|
77
|
-
fileio.remove(f.name)
|
63
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
64
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
65
|
+
booster.save_model(temp_file)
|
66
|
+
fileio.copy(temp_file, filepath)
|
@@ -14,7 +14,6 @@
|
|
14
14
|
"""Implementation of the XGBoost dmatrix materializer."""
|
15
15
|
|
16
16
|
import os
|
17
|
-
import tempfile
|
18
17
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
|
19
18
|
|
20
19
|
import xgboost as xgb
|
@@ -46,17 +45,14 @@ class XgboostDMatrixMaterializer(BaseMaterializer):
|
|
46
45
|
"""
|
47
46
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
48
47
|
|
49
|
-
|
50
|
-
|
51
|
-
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
48
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
49
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
52
50
|
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
# Copy from artifact store to temporary file
|
52
|
+
fileio.copy(filepath, temp_file)
|
53
|
+
matrix = xgb.DMatrix(temp_file)
|
56
54
|
|
57
|
-
|
58
|
-
fileio.rmtree(temp_dir)
|
59
|
-
return matrix
|
55
|
+
return matrix
|
60
56
|
|
61
57
|
def save(self, matrix: xgb.DMatrix) -> None:
|
62
58
|
"""Creates a binary serialization for a xgboost.DMatrix object.
|
@@ -66,15 +62,10 @@ class XgboostDMatrixMaterializer(BaseMaterializer):
|
|
66
62
|
"""
|
67
63
|
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
|
68
64
|
|
69
|
-
|
70
|
-
|
71
|
-
matrix.save_binary(
|
72
|
-
|
73
|
-
fileio.copy(f.name, filepath)
|
74
|
-
|
75
|
-
# Close and remove the temporary file
|
76
|
-
f.close()
|
77
|
-
fileio.remove(f.name)
|
65
|
+
with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
|
66
|
+
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
|
67
|
+
matrix.save_binary(temp_file)
|
68
|
+
fileio.copy(temp_file, filepath)
|
78
69
|
|
79
70
|
def extract_metadata(
|
80
71
|
self, dataset: xgb.DMatrix
|
@@ -13,8 +13,11 @@
|
|
13
13
|
# permissions and limitations under the License.
|
14
14
|
"""Metaclass implementation for registering ZenML BaseMaterializer subclasses."""
|
15
15
|
|
16
|
+
import contextlib
|
16
17
|
import inspect
|
17
|
-
|
18
|
+
import shutil
|
19
|
+
import tempfile
|
20
|
+
from typing import Any, ClassVar, Dict, Iterator, Optional, Tuple, Type, cast
|
18
21
|
|
19
22
|
from zenml.artifact_stores.base_artifact_store import BaseArtifactStore
|
20
23
|
from zenml.enums import ArtifactType, VisualizationType
|
@@ -326,3 +329,67 @@ class BaseMaterializer(metaclass=BaseMaterializerMeta):
|
|
326
329
|
if isinstance(storage_size, int):
|
327
330
|
return {"storage_size": StorageSize(storage_size)}
|
328
331
|
return {}
|
332
|
+
|
333
|
+
@contextlib.contextmanager
|
334
|
+
def get_temporary_directory(
|
335
|
+
self,
|
336
|
+
delete_at_exit: bool,
|
337
|
+
delete_after_step_execution: bool = True,
|
338
|
+
) -> Iterator[str]:
|
339
|
+
"""Context manager to get a temporary directory.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
delete_at_exit: If set to True, the temporary directory will be
|
343
|
+
deleted after the context manager exits.
|
344
|
+
delete_after_step_execution: If `delete_at_exit` is set to False and
|
345
|
+
this is set to True, the temporary directory will be deleted
|
346
|
+
after the step finished executing. If a materializer is being
|
347
|
+
used outside of the context of a step execution, the temporary
|
348
|
+
directory will not be deleted and the user is responsible for
|
349
|
+
deleting it themselves.
|
350
|
+
|
351
|
+
Yields:
|
352
|
+
Path to the temporary directory.
|
353
|
+
"""
|
354
|
+
temp_dir = tempfile.mkdtemp(prefix="zenml-")
|
355
|
+
|
356
|
+
if delete_after_step_execution and not delete_at_exit:
|
357
|
+
# We should not delete the directory when the context manager
|
358
|
+
# exits, but cleanup once the step has finished executing.
|
359
|
+
self._register_directory_for_deletion_after_step_execution(
|
360
|
+
temp_dir
|
361
|
+
)
|
362
|
+
|
363
|
+
try:
|
364
|
+
yield temp_dir
|
365
|
+
finally:
|
366
|
+
if delete_at_exit:
|
367
|
+
shutil.rmtree(temp_dir)
|
368
|
+
|
369
|
+
def _register_directory_for_deletion_after_step_execution(
|
370
|
+
self, directory: str
|
371
|
+
) -> None:
|
372
|
+
"""Register directory to be deleted after the current step finishes.
|
373
|
+
|
374
|
+
If no step is currently being executed, this method does nothing.
|
375
|
+
|
376
|
+
Args:
|
377
|
+
directory: The directory to register for deletion.
|
378
|
+
"""
|
379
|
+
from zenml import get_step_context
|
380
|
+
|
381
|
+
try:
|
382
|
+
step_context = get_step_context()
|
383
|
+
except RuntimeError:
|
384
|
+
logger.debug(
|
385
|
+
"Materializer called outside of step execution, not cleaning "
|
386
|
+
"up directory %s",
|
387
|
+
directory,
|
388
|
+
)
|
389
|
+
return
|
390
|
+
|
391
|
+
def _callback() -> None:
|
392
|
+
shutil.rmtree(directory)
|
393
|
+
logger.debug("Cleaned up materializer directory %s", directory)
|
394
|
+
|
395
|
+
step_context._cleanup_registry.register_callback(_callback)
|