PyPI - zea - Versions diffs - 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

zea 0.0.6py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

zea/__init__.py +54 -19
zea/agent/__init__.py +12 -12
zea/agent/masks.py +2 -1
zea/backend/tensorflow/dataloader.py +2 -5
zea/beamform/beamformer.py +100 -50
zea/beamform/lens_correction.py +9 -2
zea/beamform/pfield.py +9 -2
zea/beamform/pixelgrid.py +1 -1
zea/config.py +34 -25
zea/data/__init__.py +22 -25
zea/data/augmentations.py +221 -28
zea/data/convert/__init__.py +1 -6
zea/data/convert/__main__.py +123 -0
zea/data/convert/camus.py +101 -40
zea/data/convert/echonet.py +187 -86
zea/data/convert/echonetlvh/README.md +2 -3
zea/data/convert/echonetlvh/{convert_raw_to_usbmd.py → __init__.py} +174 -103
zea/data/convert/echonetlvh/manual_rejections.txt +73 -0
zea/data/convert/echonetlvh/precompute_crop.py +43 -64
zea/data/convert/picmus.py +37 -40
zea/data/convert/utils.py +86 -0
zea/data/convert/{matlab.py → verasonics.py} +44 -65
zea/data/data_format.py +155 -34
zea/data/dataloader.py +12 -7
zea/data/datasets.py +112 -71
zea/data/file.py +184 -73
zea/data/file_operations.py +496 -0
zea/data/layers.py +3 -3
zea/data/preset_utils.py +1 -1
zea/datapaths.py +16 -4
zea/display.py +14 -13
zea/interface.py +14 -16
zea/internal/_generate_keras_ops.py +6 -7
zea/internal/cache.py +2 -49
zea/internal/checks.py +6 -12
zea/internal/config/validation.py +1 -2
zea/internal/core.py +69 -6
zea/internal/device.py +6 -2
zea/internal/dummy_scan.py +330 -0
zea/internal/operators.py +118 -2
zea/internal/parameters.py +101 -70
zea/internal/setup_zea.py +5 -6
zea/internal/utils.py +282 -0
zea/io_lib.py +322 -146
zea/keras_ops.py +74 -4
zea/log.py +9 -7
zea/metrics.py +15 -7
zea/models/__init__.py +31 -21
zea/models/base.py +30 -14
zea/models/carotid_segmenter.py +19 -4
zea/models/diffusion.py +235 -23
zea/models/echonet.py +22 -8
zea/models/echonetlvh.py +31 -7
zea/models/lpips.py +19 -2
zea/models/lv_segmentation.py +30 -11
zea/models/preset_utils.py +5 -5
zea/models/regional_quality.py +30 -10
zea/models/taesd.py +21 -5
zea/models/unet.py +15 -1
zea/ops.py +770 -336
zea/probes.py +6 -6
zea/scan.py +121 -51
zea/simulator.py +24 -21
zea/tensor_ops.py +477 -353
zea/tools/fit_scan_cone.py +90 -160
zea/tools/hf.py +1 -1
zea/tools/selection_tool.py +47 -86
zea/tracking/__init__.py +16 -0
zea/tracking/base.py +94 -0
zea/tracking/lucas_kanade.py +474 -0
zea/tracking/segmentation.py +110 -0
zea/utils.py +101 -480
zea/visualize.py +177 -39
{zea-0.0.6.dist-info → zea-0.0.8.dist-info}/METADATA +6 -2
zea-0.0.8.dist-info/RECORD +122 -0
zea-0.0.6.dist-info/RECORD +0 -112
{zea-0.0.6.dist-info → zea-0.0.8.dist-info}/WHEEL +0 -0
{zea-0.0.6.dist-info → zea-0.0.8.dist-info}/entry_points.txt +0 -0
{zea-0.0.6.dist-info → zea-0.0.8.dist-info}/licenses/LICENSE +0 -0

zea/data/data_format.py CHANGED Viewed

@@ -6,12 +6,14 @@ import inspect
 from dataclasses import dataclass
 from pathlib import Path
+import h5py
 import numpy as np
+from keras.utils import pad_sequences
 from zea import log
 from zea.data.file import File, validate_file
 from zea.internal.checks import _DATA_TYPES
-from zea.utils import first_not_none_item
+from zea.internal.utils import first_not_none_item
 @dataclass
@@ -19,15 +21,15 @@ class DatasetElement:
     """Class to store a dataset element with a name, data, description and unit. Used to
     supply additional dataset elements to the generate_zea_dataset function."""
-    # The group name to store the dataset under. This can be a nested group, e.g.
-    # "scan/waveforms"
-    group_name: str
     # The name of the dataset. This will be the key in the group.
     dataset_name: str
     # The data to store in the dataset.
     data: np.ndarray
     description: str
     unit: str
+    # The group name to store the dataset under. This can be a nested group, e.g.
+    # "lens/profiles"
+    group_name: str = ""
 def generate_example_dataset(
@@ -110,9 +112,43 @@ def generate_example_dataset(
         focus_distances=focus_distances,
         polar_angles=polar_angles,
         azimuth_angles=azimuth_angles,
+        additional_elements=_generate_example_dataset_elements(),
+        description="This is an example dataset generated by zea",
     )
+def _generate_example_dataset_elements() -> list[DatasetElement]:
+    """Generates a list of example DatasetElement objects to be used as additional
+    elements in the generate_zea_dataset function.
+    Returns:
+        list: A list of DatasetElement objects.
+    """
+    example_elements = [
+        DatasetElement(
+            dataset_name="temperature",
+            data=np.array(42),
+            description="The temperature during the measurement",
+            unit="unitless",
+        ),
+        DatasetElement(
+            dataset_name="lens_profile",
+            data=np.random.rand(100),
+            description="An example lens profile",
+            unit="mm",
+            group_name="lens",
+        ),
+        DatasetElement(
+            dataset_name="lens_material",
+            data=np.array(["material1", "material2", "material3"], dtype=h5py.string_dtype()),
+            description="An example lens material list",
+            unit="unitless",
+            group_name="lens",
+        ),
+    ]
+    return example_elements
 def validate_input_data(raw_data, aligned_data, envelope_data, beamformed_data, image, image_sc):
     """
     Validates input data for generate_zea_dataset
@@ -470,38 +506,45 @@ def _write_datasets(
         )
         if waveforms_one_way is not None:
-            for n in range(len(waveforms_one_way)):
-                _add_dataset(
-                    group_name=scan_group_name + "/waveforms_one_way",
-                    name=f"waveform_{str(n).zfill(3)}",
-                    data=waveforms_one_way[n],
-                    description=(
-                        "One-way waveform as simulated by the Verasonics system, "
-                        "sampled at 250MHz. This is the waveform after being filtered "
-                        "by the tranducer bandwidth once."
-                    ),
-                    unit="V",
-                )
+            _add_dataset(
+                group_name=scan_group_name,
+                name="waveforms_one_way",
+                data=pad_sequences(waveforms_one_way, dtype=np.float32, padding="post"),
+                description=(
+                    "One-way waveform as simulated by the Verasonics system, "
+                    "sampled at 250MHz. This is the waveform after being filtered "
+                    "by the transducer bandwidth once."
+                ),
+                unit="V",
+            )
         if waveforms_two_way is not None:
-            for n in range(len(waveforms_two_way)):
-                _add_dataset(
-                    group_name=scan_group_name + "/waveforms_two_way",
-                    name=f"waveform_{str(n).zfill(3)}",
-                    data=waveforms_two_way[n],
-                    description=(
-                        "Two-way waveform as simulated by the Verasonics system, "
-                        "sampled at 250MHz. This is the waveform after being filtered "
-                        "by the tranducer bandwidth twice."
-                    ),
-                    unit="V",
-                )
+            _add_dataset(
+                group_name=scan_group_name,
+                name="waveforms_two_way",
+                data=pad_sequences(waveforms_two_way, dtype=np.float32, padding="post"),
+                description=(
+                    "Two-way waveform as simulated by the Verasonics system, "
+                    "sampled at 250MHz. This is the waveform after being filtered "
+                    "by the transducer bandwidth twice."
+                ),
+                unit="V",
+            )
     # Add additional elements
     if additional_elements is not None:
+        # Write scan group
+        non_standard_elements_group_name = "non_standard_elements"
+        non_standard_elements_group = dataset.create_group(non_standard_elements_group_name)
+        non_standard_elements_group.attrs["description"] = (
+            "This group contains non-standard elements that can be added by the user."
+        )
         for element in additional_elements:
+            group_name = non_standard_elements_group_name
+            if element.group_name != "":
+                group_name += f"/{element.group_name}"
             _add_dataset(
-                group_name=element.group_name,
+                group_name=group_name,
                 name=element.dataset_name,
                 data=element.data,
                 description=element.description,
@@ -539,6 +582,7 @@ def generate_zea_dataset(
     additional_elements=None,
     event_structure=False,
     cast_to_float=True,
+    overwrite=False,
 ):
     """Generates a dataset in the zea format.
@@ -585,10 +629,10 @@ def generate_zea_dataset(
             waveform was used for each transmit event.
         waveforms_one_way (list): List of one-way waveforms as simulated by the Verasonics
             system, sampled at 250MHz. This is the waveform after being filtered by the
-            tranducer bandwidth once. Every element in the list is a 1D numpy array.
+            transducer bandwidth once. Every element in the list is a 1D numpy array.
         waveforms_two_way (list): List of two-way waveforms as simulated by the Verasonics
             system, sampled at 250MHz. This is the waveform after being filtered by the
-            tranducer bandwidth twice. Every element in the list is a 1D numpy array.
+            transducer bandwidth twice. Every element in the list is a 1D numpy array.
         additional_elements (List[DatasetElement]): A list of additional dataset
             elements to be added to the dataset. Each element should be a DatasetElement
             object. The additional elements are added under the scan group.
@@ -598,6 +642,7 @@ def generate_zea_dataset(
             Instead of just a single data and scan group.
         cast_to_float (bool): Whether to store data as float32. You may want to set this
             to False if storing images.
+        overwrite (bool): Whether to overwrite the file if it already exists. Defaults to False.
     """
     # check if all args are lists
@@ -637,10 +682,10 @@ def generate_zea_dataset(
     # make sure input arguments of func is same length as data_and_parameters
     # except `path` and `event_structure` arguments and ofcourse `data_and_parameters` itself
     assert (
-        len(data_and_parameters) == len(inspect.signature(generate_zea_dataset).parameters) - 3
+        len(data_and_parameters) == len(inspect.signature(generate_zea_dataset).parameters) - 4
     ), (
         "All arguments should be put in data_and_parameters except "
-        "`path`, `event_structure`, and `cast_to_float` arguments."
+        "`path`, `event_structure`, `cast_to_float`, and `overwrite` arguments."
     )
     if event_structure:
@@ -682,7 +727,7 @@ def generate_zea_dataset(
     # Convert path to Path object
     path = Path(path)
-    if path.exists():
+    if path.exists() and not overwrite:
         raise FileExistsError(f"The file {path} already exists.")
     # Create the directory if it does not exist
@@ -720,3 +765,79 @@ def generate_zea_dataset(
     validate_file(path)
     log.info(f"zea dataset written to {log.yellow(path)}")
+def load_description(path):
+    """Loads the description of a zea dataset.
+    Args:
+        path (str): The path to the zea dataset.
+    Returns:
+        str: The description of the dataset, or an empty string if not found.
+    """
+    path = Path(path)
+    with File(path, "r") as file:
+        description = file.attrs.get("description", "")
+    return description
+def load_additional_elements(path):
+    """Loads additional dataset elements from a zea dataset.
+    Args:
+        path (str): The path to the zea dataset.
+    Returns:
+        list: A list of DatasetElement objects.
+    """
+    path = Path(path)
+    with File(path, "r") as file:
+        if "non_standard_elements" not in file:
+            return []
+        additional_elements = _load_additional_elements_from_group(file, "non_standard_elements")
+    return additional_elements
+def _load_additional_elements_from_group(file, path):
+    """Recursively loads additional dataset elements from a group."""
+    elements = []
+    for name, item in file[path].items():
+        if isinstance(item, h5py.Dataset):
+            elements.append(_load_dataset_element_from_group(file, f"{path}/{name}"))
+        elif isinstance(item, h5py.Group):
+            elements.extend(_load_additional_elements_from_group(file, f"{path}/{name}"))
+    return elements
+def _load_dataset_element_from_group(file, path):
+    """Loads a specific dataset element from a group.
+    Args:
+        file (h5py.File): The HDF5 file object.
+        path (str): The full path to the dataset element.
+            e.g., "non_standard_elements/lens/lens_profile"
+    Returns:
+        DatasetElement: The loaded dataset element.
+    """
+    dataset = file[path]
+    description = dataset.attrs.get("description", "")
+    unit = dataset.attrs.get("unit", "")
+    data = dataset[()]
+    path_parts = path.split("/")
+    return DatasetElement(
+        dataset_name=path_parts[-1],
+        data=data,
+        description=description,
+        unit=unit,
+        group_name="/".join(path_parts[1:-1]),
+    )

zea/data/dataloader.py CHANGED Viewed

@@ -5,7 +5,7 @@ H5 dataloader for loading images from zea datasets.
 import re
 from itertools import product
 from pathlib import Path
-from typing import List
+from typing import List, Tuple, Union
 import numpy as np
@@ -65,12 +65,12 @@ def generate_h5_indices(
                 (
                     "/folder/path_to_file.hdf5",
                     "data/image",
-                    [range(0, 1), slice(None, 256, None), slice(None, 256, None)],
+                    (range(0, 1), slice(None, 256, None), slice(None, 256, None)),
                 ),
                 (
                     "/folder/path_to_file.hdf5",
                     "data/image",
-                    [range(1, 2), slice(None, 256, None), slice(None, 256, None)],
+                    (range(1, 2), slice(None, 256, None), slice(None, 256, None)),
                 ),
                 ...,
             ]
@@ -117,7 +117,7 @@ def generate_h5_indices(
             # Optionally limit frames to load from each file
             n_frames_in_file = min(n_frames_in_file, limit_n_frames)
             indices = [
-                range(i, i + block_size, frame_index_stride)
+                list(range(i, i + block_size, frame_index_stride))
                 for i in range(0, n_frames_in_file - block_size + 1, block_step_size)
             ]
             yield [indices]
@@ -132,7 +132,7 @@ def generate_h5_indices(
             continue
         if additional_axes_iter:
-            axis_indices += [range(shape[axis]) for axis in additional_axes_iter]
+            axis_indices += [list(range(shape[axis])) for axis in additional_axes_iter]
         axis_indices = product(*axis_indices)
@@ -140,7 +140,7 @@ def generate_h5_indices(
             full_indices = [slice(size) for size in shape]
             for i, axis in enumerate([initial_frame_axis] + list(additional_axes_iter)):
                 full_indices[axis] = axis_index[i]
-            indices.append((file, key, full_indices))
+            indices.append((file, key, tuple(full_indices)))
     if skipped_files > 0:
         log.warning(
@@ -321,7 +321,12 @@ class H5Generator(Dataset):
         initial_delay=INITIAL_RETRY_DELAY,
         retry_action=_h5_reopen_on_io_error,
     )
-    def load(self, file: File, key: str, indices: tuple | str):
+    def load(
+        self,
+        file: File,
+        key: str,
+        indices: Tuple[Union[list, slice, int], ...] | List[int] | int | None = None,
+    ):
         """Extract data from hdf5 file.
         Args:
             file_name (str): name of the file to extract image from.

zea/data/datasets.py CHANGED Viewed

@@ -31,9 +31,12 @@ Features
 """
+import functools
+import multiprocessing
+import os
 from collections import OrderedDict
 from pathlib import Path
-from typing import List
+from typing import List, Tuple
 import numpy as np
 import tqdm
@@ -48,14 +51,12 @@ from zea.data.preset_utils import (
     _hf_resolve_path,
 )
 from zea.datapaths import format_data_path
+from zea.internal.cache import cache_output
+from zea.internal.core import hash_elements
+from zea.internal.utils import calculate_file_hash, reduce_to_signature
 from zea.io_lib import search_file_tree
 from zea.tools.hf import HFPath
-from zea.utils import (
-    calculate_file_hash,
-    date_string_to_readable,
-    get_date_string,
-    reduce_to_signature,
-)
+from zea.utils import date_string_to_readable, get_date_string
 _CHECK_MAX_DATASET_SIZE = 10000
 _VALIDATED_FLAG_FILE = "validated.flag"
@@ -104,16 +105,78 @@ class H5FileHandleCache:
         return self._file_handle_cache[file_path]
+    def close(self):
+        """Close all cached file handles."""
+        cache: OrderedDict = getattr(self, "_file_handle_cache", None)
+        if not cache:
+            return
+        # iterate over a static list to avoid mutation during iteration
+        for fh in list(cache.values()):
+            if fh is None:
+                continue
+            try:
+                # attempt to close unconditionally and swallow exceptions
+                fh.close()
+            except Exception:
+                # During interpreter shutdown or if the h5py internals are already
+                # torn down, close() can raise weird errors (e.g. TypeError).
+                # Swallow them here to avoid exceptions from __del__.
+                pass
+        cache.clear()  # clear the cache dict
     def __del__(self):
-        """Ensure cached files are closed."""
-        if hasattr(self, "_file_handle_cache"):
-            for _, file in self._file_handle_cache.items():
-                if file is not None and self._check_if_open(file):
-                    file.close()
-            self._file_handle_cache = OrderedDict()
+        self.close()
+@cache_output("filepaths", "key", "_filepath_hash", verbose=True)
+def _find_h5_file_shapes(filepaths, key, _filepath_hash, verbose=True):
+    # NOTE: we cache the output of this function such that file loading over the network is
+    # faster for repeated calls with the same filepaths, key and _filepath_hash
+    assert _filepath_hash is not None
+    get_shape = functools.partial(File.get_shape, key=key)
-def find_h5_files(paths: str | list, key: str = None, search_file_tree_kwargs: dict | None = None):
+    if os.environ.get("ZEA_FIND_H5_SHAPES_PARALLEL", "1") in ("1", "true", "yes"):
+        # using multiprocessing to speed up reading hdf5 files
+        # make sure to call find_h5_file_shapes from within a function
+        # or use if __name__ == "__main__" to avoid freezing the main process
+        with multiprocessing.Pool() as pool:
+            file_shapes = list(
+                tqdm.tqdm(
+                    pool.imap(get_shape, filepaths),
+                    total=len(filepaths),
+                    desc="Getting file shapes in each h5 file",
+                    disable=not verbose,
+                )
+            )
+    else:
+        file_shapes = []
+        for file_path in tqdm.tqdm(
+            filepaths,
+            desc="Getting file shapes in each h5 file",
+            disable=not verbose,
+        ):
+            file_shapes.append(get_shape(file_path))
+    return file_shapes
+def _file_hash(filepaths):
+    # NOTE: this is really fast, even over network filesystemss
+    total_size = 0
+    modified_times = []
+    for fp in filepaths:
+        if os.path.isfile(fp):
+            total_size += os.path.getsize(fp)
+            modified_times.append(os.path.getmtime(fp))
+    return hash_elements([total_size, modified_times])
+def find_h5_files(paths: str | list, key: str = None) -> Tuple[List[str], List[tuple]]:
     """
     Find HDF5 files from a directory or list of directories and optionally retrieve their shapes.
@@ -121,17 +184,11 @@ def find_h5_files(paths: str | list, key: str = None, search_file_tree_kwargs: d
         paths (str or list): A single directory path, a list of directory paths,
             or a single HDF5 file path.
         key (str, optional): The key to get the file shapes for.
-        search_file_tree_kwargs (dict, optional): Additional keyword arguments for the
-            search_file_tree function. Defaults to None.
     Returns:
-        - file_paths (list): List of file paths to the HDF5 files.
-        - file_shapes (list): List of shapes of the HDF5 datasets.
+        - file_paths (list): List of file paths (str) to the HDF5 files.
+        - file_shapes (list): List of shapes (tuple) of the HDF5 datasets.
     """
-    if search_file_tree_kwargs is None:
-        search_file_tree_kwargs = {}
     # Make sure paths is a list
     if not isinstance(paths, (tuple, list)):
         paths = [paths]
@@ -152,14 +209,12 @@ def find_h5_files(paths: str | list, key: str = None, search_file_tree_kwargs: d
             file_paths.append(str(path))
             continue
-        dataset_info = search_file_tree(
-            path,
-            filetypes=FILE_TYPES,
-            hdf5_key_for_length=key,
-            **search_file_tree_kwargs,
-        )
-        file_shapes += dataset_info["file_shapes"]
-        file_paths += [str(Path(path) / file_path) for file_path in dataset_info["file_paths"]]
+        _filepaths = list(search_file_tree(path, filetypes=FILE_TYPES))
+        file_shapes += _find_h5_file_shapes(_filepaths, key, _file_hash(_filepaths))
+        file_paths += _filepaths
+    # Convert file paths to strings
+    file_paths = [str(fp) for fp in file_paths]
     return file_paths, file_shapes
@@ -172,8 +227,7 @@ class Folder:
     def __init__(
         self,
         folder_path: list[str] | list[Path],
-        key: str = None,
-        search_file_tree_kwargs: dict | None = None,
+        key: str,
         validate: bool = True,
         hf_cache_dir: str = HF_DATASETS_DIR,
         **kwargs,
@@ -195,11 +249,8 @@ class Folder:
         self.folder_path = Path(folder_path)
         self.key = key
-        self.search_file_tree_kwargs = search_file_tree_kwargs
         self.validate = validate
-        self.file_paths, self.file_shapes = find_h5_files(
-            folder_path, self.key, self.search_file_tree_kwargs
-        )
+        self.file_paths, self.file_shapes = find_h5_files(folder_path, self.key)
         assert self.n_files > 0, f"No files in folder: {folder_path}"
         if self.validate:
             self.validate_folder()
@@ -241,7 +292,7 @@ class Folder:
             return
         num_frames_per_file = []
-        validated_succesfully = True
+        validated_successfully = True
         for file_path in tqdm.tqdm(
             self.file_paths,
             total=self.n_files,
@@ -253,9 +304,9 @@ class Folder:
                 validation_error_log.append(f"File {file_path} is not a valid zea dataset.\n{e}\n")
                 # convert into warning
                 log.warning(f"Error in file {file_path}.\n{e}")
-                validated_succesfully = False
+                validated_successfully = False
-        if not validated_succesfully:
+        if not validated_successfully:
             log.warning(
                 "Check warnings above for details. No validation file was created. "
                 f"See {validation_error_file_path} for details."
@@ -319,24 +370,27 @@ class Folder:
         data_types = self.get_data_types(self.file_paths[0])
         number_of_frames = sum(num_frames_per_file)
-        with open(validation_file_path, "w", encoding="utf-8") as f:
-            f.write(f"Dataset: {path}\n")
-            f.write(f"Validated on: {get_date_string()}\n")
-            f.write(f"Number of files: {self.n_files}\n")
-            f.write(f"Number of frames: {number_of_frames}\n")
-            f.write(f"Data types: {', '.join(data_types)}\n")
-            f.write(f"{'-' * 80}\n")
-            # write all file names (not entire path) with number of frames on a new line
-            for file_path, num_frames in zip(self.file_paths, num_frames_per_file):
-                f.write(f"{file_path.name}: {num_frames}\n")
-            f.write(f"{'-' * 80}\n")
-        # Write the hash of the validation file
-        validation_file_hash = calculate_file_hash(validation_file_path)
-        with open(validation_file_path, "a", encoding="utf-8") as f:
-            # *** validation file hash *** (80 total line length)
-            f.write("*** validation file hash ***\n")
-            f.write(f"hash: {validation_file_hash}")
+        try:
+            with open(validation_file_path, "w", encoding="utf-8") as f:
+                f.write(f"Dataset: {path}\n")
+                f.write(f"Validated on: {get_date_string()}\n")
+                f.write(f"Number of files: {self.n_files}\n")
+                f.write(f"Number of frames: {number_of_frames}\n")
+                f.write(f"Data types: {', '.join(data_types)}\n")
+                f.write(f"{'-' * 80}\n")
+                # write all file names (not entire path) with number of frames on a new line
+                for file_path, num_frames in zip(self.file_paths, num_frames_per_file):
+                    f.write(f"{file_path.name}: {num_frames}\n")
+                f.write(f"{'-' * 80}\n")
+            # Write the hash of the validation file
+            validation_file_hash = calculate_file_hash(validation_file_path)
+            with open(validation_file_path, "a", encoding="utf-8") as f:
+                # *** validation file hash *** (80 total line length)
+                f.write("*** validation file hash ***\n")
+                f.write(f"hash: {validation_file_hash}")
+        except Exception as e:
+            log.warning(f"Unable to write validation flag: {e}")
     def __repr__(self):
         return (
@@ -413,7 +467,6 @@ class Dataset(H5FileHandleCache):
         self,
         file_paths: List[str] | str,
         key: str,
-        search_file_tree_kwargs: dict | None = None,
         validate: bool = True,
         directory_splits: list | None = None,
         **kwargs,
@@ -424,9 +477,6 @@ class Dataset(H5FileHandleCache):
             file_paths (str or list): (list of) path(s) to the folder(s) containing the HDF5 file(s)
                 or list of HDF5 file paths. Can be a mixed list of folders and files.
             key (str): The key to access the HDF5 dataset.
-            search_file_tree_kwargs (dict, optional): Additional keyword arguments for the
-                search_file_tree function. These are only used when `file_paths` are directories.
-                Defaults to None.
             validate (bool, optional): Whether to validate the dataset. Defaults to True.
             directory_splits (list, optional): List of directory split by. Is a list of floats
                 between 0 and 1, with the same length as the number of file_paths given.
@@ -435,7 +485,6 @@ class Dataset(H5FileHandleCache):
         """
         super().__init__(**kwargs)
         self.key = key
-        self.search_file_tree_kwargs = search_file_tree_kwargs
         self.validate = validate
         self.file_paths, self.file_shapes = self.find_files_and_shapes(file_paths)
@@ -475,7 +524,7 @@ class Dataset(H5FileHandleCache):
                 file_path = Path(file_path)
             if file_path.is_dir():
-                folder = Folder(file_path, self.key, self.search_file_tree_kwargs, self.validate)
+                folder = Folder(file_path, self.key, self.validate)
                 file_paths += folder.file_paths
                 file_shapes += folder.file_shapes
                 del folder
@@ -539,14 +588,6 @@ class Dataset(H5FileHandleCache):
     def __str__(self):
         return f"Dataset with {self.n_files} files (key='{self.key}')"
-    def close(self):
-        """Close all cached file handles."""
-        for file in self._file_handle_cache.values():
-            if file is not None and file.id.valid:
-                file.close()
-        self._file_handle_cache.clear()
-        log.info("Closed all cached file handles.")
     def __enter__(self):
         return self

zea 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

zea 0.0.6py3-none-any.whl → 0.0.8py3-none-any.whl