zenml-nightly 0.68.1.dev20241105__py3-none-any.whl → 0.68.1.dev20241107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. zenml/VERSION +1 -1
  2. zenml/artifacts/{load_directory_materializer.py → preexisting_data_materializer.py} +8 -9
  3. zenml/artifacts/utils.py +121 -59
  4. zenml/constants.py +1 -0
  5. zenml/integrations/bentoml/materializers/bentoml_bento_materializer.py +19 -31
  6. zenml/integrations/evidently/__init__.py +1 -1
  7. zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py +8 -12
  8. zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py +17 -18
  9. zenml/integrations/huggingface/materializers/huggingface_t5_materializer.py +2 -5
  10. zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py +17 -18
  11. zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py +2 -3
  12. zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py +8 -15
  13. zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py +11 -16
  14. zenml/integrations/pillow/materializers/pillow_image_materializer.py +17 -20
  15. zenml/integrations/polars/materializers/dataframe_materializer.py +26 -39
  16. zenml/integrations/pycaret/materializers/model_materializer.py +7 -22
  17. zenml/integrations/tensorflow/materializers/keras_materializer.py +11 -22
  18. zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py +8 -15
  19. zenml/integrations/vllm/services/vllm_deployment.py +16 -7
  20. zenml/integrations/whylogs/materializers/whylogs_materializer.py +11 -18
  21. zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py +11 -22
  22. zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py +10 -19
  23. zenml/materializers/base_materializer.py +68 -1
  24. zenml/orchestrators/step_runner.py +17 -11
  25. zenml/stack/flavor.py +9 -5
  26. zenml/steps/step_context.py +2 -0
  27. zenml/utils/callback_registry.py +71 -0
  28. zenml/zen_server/rbac/endpoint_utils.py +43 -1
  29. zenml/zen_server/routers/artifact_version_endpoints.py +27 -1
  30. zenml/zen_stores/rest_zen_store.py +52 -0
  31. zenml/zen_stores/sql_zen_store.py +16 -0
  32. zenml/zen_stores/zen_store_interface.py +13 -0
  33. {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/METADATA +1 -1
  34. {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/RECORD +37 -36
  35. {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/LICENSE +0 -0
  36. {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/WHEEL +0 -0
  37. {zenml_nightly-0.68.1.dev20241105.dist-info → zenml_nightly-0.68.1.dev20241107.dist-info}/entry_points.txt +0 -0
@@ -14,7 +14,6 @@
14
14
  """Implementation of the LightGBM materializer."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
19
18
 
20
19
  import lightgbm as lgb
@@ -46,16 +45,14 @@ class LightGBMDatasetMaterializer(BaseMaterializer):
46
45
  """
47
46
  filepath = os.path.join(self.uri, DEFAULT_FILENAME)
48
47
 
49
- # Create a temporary folder
50
- temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
51
- temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
48
+ with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
49
+ temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
52
50
 
53
- # Copy from artifact store to temporary file
54
- fileio.copy(filepath, temp_file)
55
- matrix = lgb.Dataset(temp_file, free_raw_data=False)
51
+ # Copy from artifact store to temporary file
52
+ fileio.copy(filepath, temp_file)
53
+ matrix = lgb.Dataset(temp_file, free_raw_data=False)
56
54
 
57
- # No clean up this time because matrix is lazy loaded
58
- return matrix
55
+ return matrix
59
56
 
60
57
  def save(self, matrix: lgb.Dataset) -> None:
61
58
  """Creates a binary serialization for a lightgbm.Dataset object.
@@ -65,14 +62,12 @@ class LightGBMDatasetMaterializer(BaseMaterializer):
65
62
  """
66
63
  filepath = os.path.join(self.uri, DEFAULT_FILENAME)
67
64
 
68
- # Make a temporary phantom artifact
69
- temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
70
- temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
71
- matrix.save_binary(temp_file)
65
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
66
+ temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
67
+ matrix.save_binary(temp_file)
72
68
 
73
- # Copy it into artifact store
74
- fileio.copy(temp_file, filepath)
75
- fileio.rmtree(temp_dir)
69
+ # Copy it into artifact store
70
+ fileio.copy(temp_file, filepath)
76
71
 
77
72
  def extract_metadata(
78
73
  self, matrix: lgb.Dataset
@@ -14,7 +14,6 @@
14
14
  """Materializer for Pillow Image objects."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
19
18
 
20
19
  from PIL import Image
@@ -57,16 +56,15 @@ class PillowImageMaterializer(BaseMaterializer):
57
56
  files = io_utils.find_files(self.uri, f"{DEFAULT_IMAGE_FILENAME}.*")
58
57
  filepath = [file for file in files if not fileio.isdir(file)][0]
59
58
 
60
- # create a temporary folder
61
- temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-")
62
- temp_file = os.path.join(
63
- temp_dir.name,
64
- f"{DEFAULT_IMAGE_FILENAME}{os.path.splitext(filepath)[1]}",
65
- )
59
+ with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
60
+ temp_file = os.path.join(
61
+ temp_dir,
62
+ f"{DEFAULT_IMAGE_FILENAME}{os.path.splitext(filepath)[1]}",
63
+ )
66
64
 
67
- # copy from artifact store to temporary file
68
- fileio.copy(filepath, temp_file)
69
- return Image.open(temp_file)
65
+ # copy from artifact store to temporary file
66
+ fileio.copy(filepath, temp_file)
67
+ return Image.open(temp_file)
70
68
 
71
69
  def save(self, image: Image.Image) -> None:
72
70
  """Write to artifact store.
@@ -74,18 +72,17 @@ class PillowImageMaterializer(BaseMaterializer):
74
72
  Args:
75
73
  image: An Image.Image object.
76
74
  """
77
- temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-")
78
- file_extension = image.format or DEFAULT_IMAGE_EXTENSION
79
- full_filename = f"{DEFAULT_IMAGE_FILENAME}.{file_extension}"
80
- temp_image_path = os.path.join(temp_dir.name, full_filename)
75
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
76
+ file_extension = image.format or DEFAULT_IMAGE_EXTENSION
77
+ full_filename = f"{DEFAULT_IMAGE_FILENAME}.{file_extension}"
78
+ temp_image_path = os.path.join(temp_dir, full_filename)
81
79
 
82
- # save the image in a temporary directory
83
- image.save(temp_image_path)
80
+ # save the image in a temporary directory
81
+ image.save(temp_image_path)
84
82
 
85
- # copy the saved image to the artifact store
86
- artifact_store_path = os.path.join(self.uri, full_filename)
87
- io_utils.copy(temp_image_path, artifact_store_path, overwrite=True) # type: ignore[attr-defined]
88
- temp_dir.cleanup()
83
+ # copy the saved image to the artifact store
84
+ artifact_store_path = os.path.join(self.uri, full_filename)
85
+ io_utils.copy(temp_image_path, artifact_store_path, overwrite=True) # type: ignore[attr-defined]
89
86
 
90
87
  def save_visualizations(
91
88
  self, image: Image.Image
@@ -14,7 +14,6 @@
14
14
  """Polars materializer."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import Any, ClassVar, Tuple, Type, Union
19
18
 
20
19
  import polars as pl
@@ -22,7 +21,6 @@ import pyarrow as pa # type: ignore
22
21
  import pyarrow.parquet as pq # type: ignore
23
22
 
24
23
  from zenml.enums import ArtifactType
25
- from zenml.io import fileio
26
24
  from zenml.materializers.base_materializer import BaseMaterializer
27
25
  from zenml.utils import io_utils
28
26
 
@@ -45,35 +43,29 @@ class PolarsMaterializer(BaseMaterializer):
45
43
  Returns:
46
44
  A Polars data frame or series.
47
45
  """
48
- # Create a temporary directory to store the model
49
- temp_dir = tempfile.TemporaryDirectory()
46
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
47
+ io_utils.copy_dir(self.uri, temp_dir)
50
48
 
51
- # Copy from artifact store to temporary directory
52
- io_utils.copy_dir(self.uri, temp_dir.name)
53
-
54
- # Load the data from the temporary directory
55
- table = pq.read_table(
56
- os.path.join(temp_dir.name, "dataframe.parquet").replace("\\", "/")
57
- )
58
-
59
- # If the data is of type pl.Series, convert it back to a pyarrow array
60
- # instead of a table.
61
- if (
62
- table.schema.metadata
63
- and b"zenml_is_pl_series" in table.schema.metadata
64
- ):
65
- isinstance_bytes = table.schema.metadata[b"zenml_is_pl_series"]
66
- isinstance_series = bool.from_bytes(isinstance_bytes, "big")
67
- if isinstance_series:
68
- table = table.column(0)
49
+ # Load the data from the temporary directory
50
+ table = pq.read_table(
51
+ os.path.join(temp_dir, "dataframe.parquet").replace("\\", "/")
52
+ )
69
53
 
70
- # Convert the table to a Polars data frame or series
71
- data = pl.from_arrow(table)
54
+ # If the data is of type pl.Series, convert it back to a pyarrow array
55
+ # instead of a table.
56
+ if (
57
+ table.schema.metadata
58
+ and b"zenml_is_pl_series" in table.schema.metadata
59
+ ):
60
+ isinstance_bytes = table.schema.metadata[b"zenml_is_pl_series"]
61
+ isinstance_series = bool.from_bytes(isinstance_bytes, "big")
62
+ if isinstance_series:
63
+ table = table.column(0)
72
64
 
73
- # Cleanup and return
74
- fileio.rmtree(temp_dir.name)
65
+ # Convert the table to a Polars data frame or series
66
+ data = pl.from_arrow(table)
75
67
 
76
- return data
68
+ return data
77
69
 
78
70
  def save(self, data: Union[pl.DataFrame, pl.Series]) -> None:
79
71
  """Writes Polars data to the artifact store.
@@ -107,15 +99,10 @@ class PolarsMaterializer(BaseMaterializer):
107
99
  {b"zenml_is_pl_series": isinstance_bytes}
108
100
  )
109
101
 
110
- # Create a temporary directory to store the model
111
- temp_dir = tempfile.TemporaryDirectory()
112
-
113
- # Write the table to a Parquet file
114
- path = os.path.join(temp_dir.name, "dataframe.parquet").replace(
115
- "\\", "/"
116
- )
117
- pq.write_table(table, path) # Uses lz4 compression by default
118
- io_utils.copy_dir(temp_dir.name, self.uri)
119
-
120
- # Remove the temporary directory
121
- fileio.rmtree(temp_dir.name)
102
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
103
+ # Write the table to a Parquet file
104
+ path = os.path.join(temp_dir, "dataframe.parquet").replace(
105
+ "\\", "/"
106
+ )
107
+ pq.write_table(table, path) # Uses lz4 compression by default
108
+ io_utils.copy_dir(temp_dir, self.uri)
@@ -13,7 +13,6 @@
13
13
  # permissions and limitations under the License.
14
14
  """PyCaret materializer."""
15
15
 
16
- import tempfile
17
16
  from typing import (
18
17
  Any,
19
18
  Type,
@@ -65,7 +64,6 @@ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
65
64
  from xgboost import XGBClassifier, XGBRegressor
66
65
 
67
66
  from zenml.enums import ArtifactType
68
- from zenml.io import fileio
69
67
  from zenml.materializers.base_materializer import BaseMaterializer
70
68
  from zenml.utils import io_utils
71
69
 
@@ -133,19 +131,10 @@ class PyCaretMaterializer(BaseMaterializer):
133
131
  Returns:
134
132
  A PyCaret model.
135
133
  """
136
- # Create a temporary directory to store the model
137
- temp_dir = tempfile.TemporaryDirectory()
138
-
139
- # Copy from artifact store to temporary directory
140
- io_utils.copy_dir(self.uri, temp_dir.name)
141
-
142
- # Load the model from the temporary directory
143
- model = load_model(temp_dir.name)
144
-
145
- # Cleanup and return
146
- fileio.rmtree(temp_dir.name)
147
-
148
- return model
134
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
135
+ io_utils.copy_dir(self.uri, temp_dir)
136
+ model = load_model(temp_dir)
137
+ return model
149
138
 
150
139
  def save(self, model: Any) -> None:
151
140
  """Writes a PyCaret model to the artifact store.
@@ -153,10 +142,6 @@ class PyCaretMaterializer(BaseMaterializer):
153
142
  Args:
154
143
  model: Any of the supported models.
155
144
  """
156
- # Create a temporary directory to store the model
157
- temp_dir = tempfile.TemporaryDirectory()
158
- save_model(model, temp_dir.name)
159
- io_utils.copy_dir(temp_dir.name, self.uri)
160
-
161
- # Remove the temporary directory
162
- fileio.rmtree(temp_dir.name)
145
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
146
+ save_model(model, temp_dir)
147
+ io_utils.copy_dir(temp_dir, self.uri)
@@ -14,7 +14,6 @@
14
14
  """Implementation of the TensorFlow Keras materializer."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
19
18
 
20
19
  import tensorflow as tf
@@ -22,7 +21,6 @@ from tensorflow.python import keras as tf_keras
22
21
  from tensorflow.python.keras.utils.layer_utils import count_params
23
22
 
24
23
  from zenml.enums import ArtifactType
25
- from zenml.io import fileio
26
24
  from zenml.materializers.base_materializer import BaseMaterializer
27
25
  from zenml.utils import io_utils
28
26
 
@@ -49,20 +47,15 @@ class KerasMaterializer(BaseMaterializer):
49
47
  Returns:
50
48
  A keras.Model model.
51
49
  """
52
- # Create a temporary directory to store the model
53
- temp_dir = tempfile.TemporaryDirectory()
50
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
51
+ # Copy from artifact store to temporary directory
52
+ temp_model_file = os.path.join(temp_dir, self.MODEL_FILE_NAME)
53
+ io_utils.copy_dir(self.uri, temp_dir)
54
54
 
55
- # Copy from artifact store to temporary directory
56
- temp_model_file = os.path.join(temp_dir.name, self.MODEL_FILE_NAME)
57
- io_utils.copy_dir(self.uri, temp_dir.name)
55
+ # Load the model from the temporary directory
56
+ model = tf.keras.models.load_model(temp_model_file)
58
57
 
59
- # Load the model from the temporary directory
60
- model = tf.keras.models.load_model(temp_model_file)
61
-
62
- # Cleanup and return
63
- fileio.rmtree(temp_dir.name)
64
-
65
- return model
58
+ return model
66
59
 
67
60
  def save(self, model: tf_keras.Model) -> None:
68
61
  """Writes a keras model to the artifact store.
@@ -70,14 +63,10 @@ class KerasMaterializer(BaseMaterializer):
70
63
  Args:
71
64
  model: A keras.Model model.
72
65
  """
73
- # Create a temporary directory to store the model
74
- temp_dir = tempfile.TemporaryDirectory()
75
- temp_model_file = os.path.join(temp_dir.name, self.MODEL_FILE_NAME)
76
- model.save(temp_model_file)
77
- io_utils.copy_dir(temp_dir.name, self.uri)
78
-
79
- # Remove the temporary directory
80
- fileio.rmtree(temp_dir.name)
66
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
67
+ temp_model_file = os.path.join(temp_dir, self.MODEL_FILE_NAME)
68
+ model.save(temp_model_file)
69
+ io_utils.copy_dir(temp_dir, self.uri)
81
70
 
82
71
  def extract_metadata(
83
72
  self, model: tf_keras.Model
@@ -14,13 +14,11 @@
14
14
  """Implementation of the TensorFlow dataset materializer."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
19
18
 
20
19
  import tensorflow as tf
21
20
 
22
21
  from zenml.enums import ArtifactType
23
- from zenml.io import fileio
24
22
  from zenml.materializers.base_materializer import BaseMaterializer
25
23
  from zenml.utils import io_utils
26
24
 
@@ -45,13 +43,11 @@ class TensorflowDatasetMaterializer(BaseMaterializer):
45
43
  Returns:
46
44
  A tf.data.Dataset object.
47
45
  """
48
- temp_dir = tempfile.mkdtemp()
49
- io_utils.copy_dir(self.uri, temp_dir)
50
- path = os.path.join(temp_dir, DEFAULT_FILENAME)
51
- dataset = tf.data.Dataset.load(path)
52
- # Don't delete the temporary directory here as the dataset is lazily
53
- # loaded and needs to read it when the object gets used
54
- return dataset
46
+ with self.get_temporary_directory(delete_at_exit=False) as temp_dir:
47
+ io_utils.copy_dir(self.uri, temp_dir)
48
+ path = os.path.join(temp_dir, DEFAULT_FILENAME)
49
+ dataset = tf.data.Dataset.load(path)
50
+ return dataset
55
51
 
56
52
  def save(self, dataset: tf.data.Dataset) -> None:
57
53
  """Persists a tf.data.Dataset object.
@@ -59,15 +55,12 @@ class TensorflowDatasetMaterializer(BaseMaterializer):
59
55
  Args:
60
56
  dataset: The dataset to persist.
61
57
  """
62
- temp_dir = tempfile.TemporaryDirectory()
63
- path = os.path.join(temp_dir.name, DEFAULT_FILENAME)
64
- try:
58
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
59
+ path = os.path.join(temp_dir, DEFAULT_FILENAME)
65
60
  tf.data.Dataset.save(
66
61
  dataset, path, compression=None, shard_func=None
67
62
  )
68
- io_utils.copy_dir(temp_dir.name, self.uri)
69
- finally:
70
- fileio.rmtree(temp_dir.name)
63
+ io_utils.copy_dir(temp_dir, self.uri)
71
64
 
72
65
  def extract_metadata(
73
66
  self, dataset: tf.data.Dataset
@@ -13,6 +13,7 @@
13
13
  # permissions and limitations under the License.
14
14
  """Implementation of the vLLM Inference Server Service."""
15
15
 
16
+ import argparse
16
17
  import os
17
18
  from typing import Any, List, Optional, Union
18
19
 
@@ -137,15 +138,23 @@ class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
137
138
  self.endpoint.prepare_for_start()
138
139
 
139
140
  import uvloop
140
- from vllm.entrypoints.openai.api_server import run_server
141
- from vllm.entrypoints.openai.cli_args import make_arg_parser
142
- from vllm.utils import FlexibleArgumentParser
141
+ from vllm.entrypoints.openai.api_server import (
142
+ run_server,
143
+ )
144
+ from vllm.entrypoints.openai.cli_args import (
145
+ make_arg_parser,
146
+ )
147
+ from vllm.utils import (
148
+ FlexibleArgumentParser,
149
+ )
143
150
 
144
151
  try:
145
- parser = make_arg_parser(FlexibleArgumentParser())
146
- args = parser.parse_args()
152
+ parser: argparse.ArgumentParser = make_arg_parser(
153
+ FlexibleArgumentParser()
154
+ )
155
+ args: argparse.Namespace = parser.parse_args()
147
156
  # Override port with the available port
148
- self.config.port = self.endpoint.status.port
157
+ self.config.port = self.endpoint.status.port or self.config.port
149
158
  # Update the arguments in place
150
159
  args.__dict__.update(self.config.model_dump())
151
160
  uvloop.run(run_server(args=args))
@@ -161,7 +170,7 @@ class VLLMDeploymentService(LocalDaemonService, BaseDeploymentService):
161
170
  """
162
171
  if not self.is_running:
163
172
  return None
164
- return self.endpoint.prediction_url_path
173
+ return self.endpoint.prediction_url
165
174
 
166
175
  def predict(self, data: "Any") -> "Any":
167
176
  """Make a prediction using the service.
@@ -14,7 +14,6 @@
14
14
  """Implementation of the whylogs materializer."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import Any, ClassVar, Dict, Tuple, Type, cast
19
18
 
20
19
  from whylogs.core import DatasetProfileView # type: ignore
@@ -51,18 +50,14 @@ class WhylogsMaterializer(BaseMaterializer):
51
50
  """
52
51
  filepath = os.path.join(self.uri, PROFILE_FILENAME)
53
52
 
54
- # Create a temporary folder
55
- temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
56
- temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
53
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
54
+ temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
57
55
 
58
- # Copy from artifact store to temporary file
59
- fileio.copy(filepath, temp_file)
60
- profile_view = DatasetProfileView.read(temp_file)
56
+ # Copy from artifact store to temporary file
57
+ fileio.copy(filepath, temp_file)
58
+ profile_view = DatasetProfileView.read(temp_file)
61
59
 
62
- # Cleanup and return
63
- fileio.rmtree(temp_dir)
64
-
65
- return profile_view
60
+ return profile_view
66
61
 
67
62
  def save(self, profile_view: DatasetProfileView) -> None:
68
63
  """Writes a whylogs dataset profile view.
@@ -72,15 +67,13 @@ class WhylogsMaterializer(BaseMaterializer):
72
67
  """
73
68
  filepath = os.path.join(self.uri, PROFILE_FILENAME)
74
69
 
75
- # Create a temporary folder
76
- temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
77
- temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
70
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
71
+ temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
78
72
 
79
- profile_view.write(temp_file)
73
+ profile_view.write(temp_file)
80
74
 
81
- # Copy it into artifact store
82
- fileio.copy(temp_file, filepath)
83
- fileio.rmtree(temp_dir)
75
+ # Copy it into artifact store
76
+ fileio.copy(temp_file, filepath)
84
77
 
85
78
  try:
86
79
  self._upload_to_whylabs(profile_view)
@@ -14,7 +14,6 @@
14
14
  """Implementation of an XGBoost booster materializer."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import Any, ClassVar, Tuple, Type
19
18
 
20
19
  import xgboost as xgb
@@ -43,18 +42,15 @@ class XgboostBoosterMaterializer(BaseMaterializer):
43
42
  """
44
43
  filepath = os.path.join(self.uri, DEFAULT_FILENAME)
45
44
 
46
- # Create a temporary folder
47
- temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
48
- temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
45
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
46
+ temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
49
47
 
50
- # Copy from artifact store to temporary file
51
- fileio.copy(filepath, temp_file)
52
- booster = xgb.Booster()
53
- booster.load_model(temp_file)
48
+ # Copy from artifact store to temporary file
49
+ fileio.copy(filepath, temp_file)
50
+ booster = xgb.Booster()
51
+ booster.load_model(temp_file)
54
52
 
55
- # Cleanup and return
56
- fileio.rmtree(temp_dir)
57
- return booster
53
+ return booster
58
54
 
59
55
  def save(self, booster: xgb.Booster) -> None:
60
56
  """Creates a JSON serialization for a xgboost Booster model.
@@ -64,14 +60,7 @@ class XgboostBoosterMaterializer(BaseMaterializer):
64
60
  """
65
61
  filepath = os.path.join(self.uri, DEFAULT_FILENAME)
66
62
 
67
- # Make a temporary phantom artifact
68
- with tempfile.NamedTemporaryFile(
69
- mode="w", suffix=".json", delete=False
70
- ) as f:
71
- booster.save_model(f.name)
72
- # Copy it into artifact store
73
- fileio.copy(f.name, filepath)
74
-
75
- # Close and remove the temporary file
76
- f.close()
77
- fileio.remove(f.name)
63
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
64
+ temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
65
+ booster.save_model(temp_file)
66
+ fileio.copy(temp_file, filepath)
@@ -14,7 +14,6 @@
14
14
  """Implementation of the XGBoost dmatrix materializer."""
15
15
 
16
16
  import os
17
- import tempfile
18
17
  from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type
19
18
 
20
19
  import xgboost as xgb
@@ -46,17 +45,14 @@ class XgboostDMatrixMaterializer(BaseMaterializer):
46
45
  """
47
46
  filepath = os.path.join(self.uri, DEFAULT_FILENAME)
48
47
 
49
- # Create a temporary folder
50
- temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
51
- temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
48
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
49
+ temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
52
50
 
53
- # Copy from artifact store to temporary file
54
- fileio.copy(filepath, temp_file)
55
- matrix = xgb.DMatrix(temp_file)
51
+ # Copy from artifact store to temporary file
52
+ fileio.copy(filepath, temp_file)
53
+ matrix = xgb.DMatrix(temp_file)
56
54
 
57
- # Cleanup and return
58
- fileio.rmtree(temp_dir)
59
- return matrix
55
+ return matrix
60
56
 
61
57
  def save(self, matrix: xgb.DMatrix) -> None:
62
58
  """Creates a binary serialization for a xgboost.DMatrix object.
@@ -66,15 +62,10 @@ class XgboostDMatrixMaterializer(BaseMaterializer):
66
62
  """
67
63
  filepath = os.path.join(self.uri, DEFAULT_FILENAME)
68
64
 
69
- # Make a temporary phantom artifact
70
- with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f:
71
- matrix.save_binary(f.name)
72
- # Copy it into artifact store
73
- fileio.copy(f.name, filepath)
74
-
75
- # Close and remove the temporary file
76
- f.close()
77
- fileio.remove(f.name)
65
+ with self.get_temporary_directory(delete_at_exit=True) as temp_dir:
66
+ temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
67
+ matrix.save_binary(temp_file)
68
+ fileio.copy(temp_file, filepath)
78
69
 
79
70
  def extract_metadata(
80
71
  self, dataset: xgb.DMatrix
@@ -13,8 +13,11 @@
13
13
  # permissions and limitations under the License.
14
14
  """Metaclass implementation for registering ZenML BaseMaterializer subclasses."""
15
15
 
16
+ import contextlib
16
17
  import inspect
17
- from typing import Any, ClassVar, Dict, Optional, Tuple, Type, cast
18
+ import shutil
19
+ import tempfile
20
+ from typing import Any, ClassVar, Dict, Iterator, Optional, Tuple, Type, cast
18
21
 
19
22
  from zenml.artifact_stores.base_artifact_store import BaseArtifactStore
20
23
  from zenml.enums import ArtifactType, VisualizationType
@@ -326,3 +329,67 @@ class BaseMaterializer(metaclass=BaseMaterializerMeta):
326
329
  if isinstance(storage_size, int):
327
330
  return {"storage_size": StorageSize(storage_size)}
328
331
  return {}
332
+
333
+ @contextlib.contextmanager
334
+ def get_temporary_directory(
335
+ self,
336
+ delete_at_exit: bool,
337
+ delete_after_step_execution: bool = True,
338
+ ) -> Iterator[str]:
339
+ """Context manager to get a temporary directory.
340
+
341
+ Args:
342
+ delete_at_exit: If set to True, the temporary directory will be
343
+ deleted after the context manager exits.
344
+ delete_after_step_execution: If `delete_at_exit` is set to False and
345
+ this is set to True, the temporary directory will be deleted
346
+ after the step finished executing. If a materializer is being
347
+ used outside of the context of a step execution, the temporary
348
+ directory will not be deleted and the user is responsible for
349
+ deleting it themselves.
350
+
351
+ Yields:
352
+ Path to the temporary directory.
353
+ """
354
+ temp_dir = tempfile.mkdtemp(prefix="zenml-")
355
+
356
+ if delete_after_step_execution and not delete_at_exit:
357
+ # We should not delete the directory when the context manager
358
+ # exits, but cleanup once the step has finished executing.
359
+ self._register_directory_for_deletion_after_step_execution(
360
+ temp_dir
361
+ )
362
+
363
+ try:
364
+ yield temp_dir
365
+ finally:
366
+ if delete_at_exit:
367
+ shutil.rmtree(temp_dir)
368
+
369
+ def _register_directory_for_deletion_after_step_execution(
370
+ self, directory: str
371
+ ) -> None:
372
+ """Register directory to be deleted after the current step finishes.
373
+
374
+ If no step is currently being executed, this method does nothing.
375
+
376
+ Args:
377
+ directory: The directory to register for deletion.
378
+ """
379
+ from zenml import get_step_context
380
+
381
+ try:
382
+ step_context = get_step_context()
383
+ except RuntimeError:
384
+ logger.debug(
385
+ "Materializer called outside of step execution, not cleaning "
386
+ "up directory %s",
387
+ directory,
388
+ )
389
+ return
390
+
391
+ def _callback() -> None:
392
+ shutil.rmtree(directory)
393
+ logger.debug("Cleaned up materializer directory %s", directory)
394
+
395
+ step_context._cleanup_registry.register_callback(_callback)