zea 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zea/__init__.py +1 -1
- zea/backend/tensorflow/dataloader.py +0 -4
- zea/beamform/pixelgrid.py +1 -1
- zea/data/__init__.py +0 -9
- zea/data/augmentations.py +221 -28
- zea/data/convert/__init__.py +1 -6
- zea/data/convert/__main__.py +123 -0
- zea/data/convert/camus.py +99 -39
- zea/data/convert/echonet.py +183 -82
- zea/data/convert/echonetlvh/README.md +2 -3
- zea/data/convert/echonetlvh/{convert_raw_to_usbmd.py → __init__.py} +173 -102
- zea/data/convert/echonetlvh/manual_rejections.txt +73 -0
- zea/data/convert/echonetlvh/precompute_crop.py +43 -64
- zea/data/convert/picmus.py +37 -40
- zea/data/convert/utils.py +86 -0
- zea/data/convert/{matlab.py → verasonics.py} +33 -61
- zea/data/data_format.py +124 -4
- zea/data/dataloader.py +12 -7
- zea/data/datasets.py +109 -70
- zea/data/file.py +91 -82
- zea/data/file_operations.py +496 -0
- zea/data/preset_utils.py +1 -1
- zea/display.py +7 -8
- zea/internal/checks.py +6 -12
- zea/internal/operators.py +4 -0
- zea/io_lib.py +108 -160
- zea/models/__init__.py +1 -1
- zea/models/diffusion.py +62 -11
- zea/models/lv_segmentation.py +2 -0
- zea/ops.py +398 -158
- zea/scan.py +18 -8
- zea/tensor_ops.py +82 -62
- zea/tools/fit_scan_cone.py +90 -160
- zea/tracking/__init__.py +16 -0
- zea/tracking/base.py +94 -0
- zea/tracking/lucas_kanade.py +474 -0
- zea/tracking/segmentation.py +110 -0
- zea/utils.py +11 -2
- {zea-0.0.7.dist-info → zea-0.0.8.dist-info}/METADATA +3 -1
- {zea-0.0.7.dist-info → zea-0.0.8.dist-info}/RECORD +43 -35
- {zea-0.0.7.dist-info → zea-0.0.8.dist-info}/WHEEL +0 -0
- {zea-0.0.7.dist-info → zea-0.0.8.dist-info}/entry_points.txt +0 -0
- {zea-0.0.7.dist-info → zea-0.0.8.dist-info}/licenses/LICENSE +0 -0
zea/data/data_format.py
CHANGED
|
@@ -6,6 +6,7 @@ import inspect
|
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
+
import h5py
|
|
9
10
|
import numpy as np
|
|
10
11
|
from keras.utils import pad_sequences
|
|
11
12
|
|
|
@@ -20,15 +21,15 @@ class DatasetElement:
|
|
|
20
21
|
"""Class to store a dataset element with a name, data, description and unit. Used to
|
|
21
22
|
supply additional dataset elements to the generate_zea_dataset function."""
|
|
22
23
|
|
|
23
|
-
# The group name to store the dataset under. This can be a nested group, e.g.
|
|
24
|
-
# "scan/waveforms"
|
|
25
|
-
group_name: str
|
|
26
24
|
# The name of the dataset. This will be the key in the group.
|
|
27
25
|
dataset_name: str
|
|
28
26
|
# The data to store in the dataset.
|
|
29
27
|
data: np.ndarray
|
|
30
28
|
description: str
|
|
31
29
|
unit: str
|
|
30
|
+
# The group name to store the dataset under. This can be a nested group, e.g.
|
|
31
|
+
# "lens/profiles"
|
|
32
|
+
group_name: str = ""
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
def generate_example_dataset(
|
|
@@ -111,9 +112,43 @@ def generate_example_dataset(
|
|
|
111
112
|
focus_distances=focus_distances,
|
|
112
113
|
polar_angles=polar_angles,
|
|
113
114
|
azimuth_angles=azimuth_angles,
|
|
115
|
+
additional_elements=_generate_example_dataset_elements(),
|
|
116
|
+
description="This is an example dataset generated by zea",
|
|
114
117
|
)
|
|
115
118
|
|
|
116
119
|
|
|
120
|
+
def _generate_example_dataset_elements() -> list[DatasetElement]:
|
|
121
|
+
"""Generates a list of example DatasetElement objects to be used as additional
|
|
122
|
+
elements in the generate_zea_dataset function.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
list: A list of DatasetElement objects.
|
|
126
|
+
"""
|
|
127
|
+
example_elements = [
|
|
128
|
+
DatasetElement(
|
|
129
|
+
dataset_name="temperature",
|
|
130
|
+
data=np.array(42),
|
|
131
|
+
description="The temperature during the measurement",
|
|
132
|
+
unit="unitless",
|
|
133
|
+
),
|
|
134
|
+
DatasetElement(
|
|
135
|
+
dataset_name="lens_profile",
|
|
136
|
+
data=np.random.rand(100),
|
|
137
|
+
description="An example lens profile",
|
|
138
|
+
unit="mm",
|
|
139
|
+
group_name="lens",
|
|
140
|
+
),
|
|
141
|
+
DatasetElement(
|
|
142
|
+
dataset_name="lens_material",
|
|
143
|
+
data=np.array(["material1", "material2", "material3"], dtype=h5py.string_dtype()),
|
|
144
|
+
description="An example lens material list",
|
|
145
|
+
unit="unitless",
|
|
146
|
+
group_name="lens",
|
|
147
|
+
),
|
|
148
|
+
]
|
|
149
|
+
return example_elements
|
|
150
|
+
|
|
151
|
+
|
|
117
152
|
def validate_input_data(raw_data, aligned_data, envelope_data, beamformed_data, image, image_sc):
|
|
118
153
|
"""
|
|
119
154
|
Validates input data for generate_zea_dataset
|
|
@@ -498,9 +533,18 @@ def _write_datasets(
|
|
|
498
533
|
|
|
499
534
|
# Add additional elements
|
|
500
535
|
if additional_elements is not None:
|
|
536
|
+
# Write scan group
|
|
537
|
+
non_standard_elements_group_name = "non_standard_elements"
|
|
538
|
+
non_standard_elements_group = dataset.create_group(non_standard_elements_group_name)
|
|
539
|
+
non_standard_elements_group.attrs["description"] = (
|
|
540
|
+
"This group contains non-standard elements that can be added by the user."
|
|
541
|
+
)
|
|
501
542
|
for element in additional_elements:
|
|
543
|
+
group_name = non_standard_elements_group_name
|
|
544
|
+
if element.group_name != "":
|
|
545
|
+
group_name += f"/{element.group_name}"
|
|
502
546
|
_add_dataset(
|
|
503
|
-
group_name=
|
|
547
|
+
group_name=group_name,
|
|
504
548
|
name=element.dataset_name,
|
|
505
549
|
data=element.data,
|
|
506
550
|
description=element.description,
|
|
@@ -721,3 +765,79 @@ def generate_zea_dataset(
|
|
|
721
765
|
|
|
722
766
|
validate_file(path)
|
|
723
767
|
log.info(f"zea dataset written to {log.yellow(path)}")
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def load_description(path):
|
|
771
|
+
"""Loads the description of a zea dataset.
|
|
772
|
+
|
|
773
|
+
Args:
|
|
774
|
+
path (str): The path to the zea dataset.
|
|
775
|
+
|
|
776
|
+
Returns:
|
|
777
|
+
str: The description of the dataset, or an empty string if not found.
|
|
778
|
+
"""
|
|
779
|
+
path = Path(path)
|
|
780
|
+
|
|
781
|
+
with File(path, "r") as file:
|
|
782
|
+
description = file.attrs.get("description", "")
|
|
783
|
+
|
|
784
|
+
return description
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
def load_additional_elements(path):
|
|
788
|
+
"""Loads additional dataset elements from a zea dataset.
|
|
789
|
+
|
|
790
|
+
Args:
|
|
791
|
+
path (str): The path to the zea dataset.
|
|
792
|
+
|
|
793
|
+
Returns:
|
|
794
|
+
list: A list of DatasetElement objects.
|
|
795
|
+
"""
|
|
796
|
+
path = Path(path)
|
|
797
|
+
|
|
798
|
+
with File(path, "r") as file:
|
|
799
|
+
if "non_standard_elements" not in file:
|
|
800
|
+
return []
|
|
801
|
+
|
|
802
|
+
additional_elements = _load_additional_elements_from_group(file, "non_standard_elements")
|
|
803
|
+
|
|
804
|
+
return additional_elements
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
def _load_additional_elements_from_group(file, path):
|
|
808
|
+
"""Recursively loads additional dataset elements from a group."""
|
|
809
|
+
elements = []
|
|
810
|
+
for name, item in file[path].items():
|
|
811
|
+
if isinstance(item, h5py.Dataset):
|
|
812
|
+
elements.append(_load_dataset_element_from_group(file, f"{path}/{name}"))
|
|
813
|
+
elif isinstance(item, h5py.Group):
|
|
814
|
+
elements.extend(_load_additional_elements_from_group(file, f"{path}/{name}"))
|
|
815
|
+
return elements
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
def _load_dataset_element_from_group(file, path):
|
|
819
|
+
"""Loads a specific dataset element from a group.
|
|
820
|
+
|
|
821
|
+
Args:
|
|
822
|
+
file (h5py.File): The HDF5 file object.
|
|
823
|
+
path (str): The full path to the dataset element.
|
|
824
|
+
e.g., "non_standard_elements/lens/lens_profile"
|
|
825
|
+
|
|
826
|
+
Returns:
|
|
827
|
+
DatasetElement: The loaded dataset element.
|
|
828
|
+
"""
|
|
829
|
+
|
|
830
|
+
dataset = file[path]
|
|
831
|
+
description = dataset.attrs.get("description", "")
|
|
832
|
+
unit = dataset.attrs.get("unit", "")
|
|
833
|
+
data = dataset[()]
|
|
834
|
+
|
|
835
|
+
path_parts = path.split("/")
|
|
836
|
+
|
|
837
|
+
return DatasetElement(
|
|
838
|
+
dataset_name=path_parts[-1],
|
|
839
|
+
data=data,
|
|
840
|
+
description=description,
|
|
841
|
+
unit=unit,
|
|
842
|
+
group_name="/".join(path_parts[1:-1]),
|
|
843
|
+
)
|
zea/data/dataloader.py
CHANGED
|
@@ -5,7 +5,7 @@ H5 dataloader for loading images from zea datasets.
|
|
|
5
5
|
import re
|
|
6
6
|
from itertools import product
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import List
|
|
8
|
+
from typing import List, Tuple, Union
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
@@ -65,12 +65,12 @@ def generate_h5_indices(
|
|
|
65
65
|
(
|
|
66
66
|
"/folder/path_to_file.hdf5",
|
|
67
67
|
"data/image",
|
|
68
|
-
|
|
68
|
+
(range(0, 1), slice(None, 256, None), slice(None, 256, None)),
|
|
69
69
|
),
|
|
70
70
|
(
|
|
71
71
|
"/folder/path_to_file.hdf5",
|
|
72
72
|
"data/image",
|
|
73
|
-
|
|
73
|
+
(range(1, 2), slice(None, 256, None), slice(None, 256, None)),
|
|
74
74
|
),
|
|
75
75
|
...,
|
|
76
76
|
]
|
|
@@ -117,7 +117,7 @@ def generate_h5_indices(
|
|
|
117
117
|
# Optionally limit frames to load from each file
|
|
118
118
|
n_frames_in_file = min(n_frames_in_file, limit_n_frames)
|
|
119
119
|
indices = [
|
|
120
|
-
range(i, i + block_size, frame_index_stride)
|
|
120
|
+
list(range(i, i + block_size, frame_index_stride))
|
|
121
121
|
for i in range(0, n_frames_in_file - block_size + 1, block_step_size)
|
|
122
122
|
]
|
|
123
123
|
yield [indices]
|
|
@@ -132,7 +132,7 @@ def generate_h5_indices(
|
|
|
132
132
|
continue
|
|
133
133
|
|
|
134
134
|
if additional_axes_iter:
|
|
135
|
-
axis_indices += [range(shape[axis]) for axis in additional_axes_iter]
|
|
135
|
+
axis_indices += [list(range(shape[axis])) for axis in additional_axes_iter]
|
|
136
136
|
|
|
137
137
|
axis_indices = product(*axis_indices)
|
|
138
138
|
|
|
@@ -140,7 +140,7 @@ def generate_h5_indices(
|
|
|
140
140
|
full_indices = [slice(size) for size in shape]
|
|
141
141
|
for i, axis in enumerate([initial_frame_axis] + list(additional_axes_iter)):
|
|
142
142
|
full_indices[axis] = axis_index[i]
|
|
143
|
-
indices.append((file, key, full_indices))
|
|
143
|
+
indices.append((file, key, tuple(full_indices)))
|
|
144
144
|
|
|
145
145
|
if skipped_files > 0:
|
|
146
146
|
log.warning(
|
|
@@ -321,7 +321,12 @@ class H5Generator(Dataset):
|
|
|
321
321
|
initial_delay=INITIAL_RETRY_DELAY,
|
|
322
322
|
retry_action=_h5_reopen_on_io_error,
|
|
323
323
|
)
|
|
324
|
-
def load(
|
|
324
|
+
def load(
|
|
325
|
+
self,
|
|
326
|
+
file: File,
|
|
327
|
+
key: str,
|
|
328
|
+
indices: Tuple[Union[list, slice, int], ...] | List[int] | int | None = None,
|
|
329
|
+
):
|
|
325
330
|
"""Extract data from hdf5 file.
|
|
326
331
|
Args:
|
|
327
332
|
file_name (str): name of the file to extract image from.
|
zea/data/datasets.py
CHANGED
|
@@ -31,9 +31,12 @@ Features
|
|
|
31
31
|
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
|
+
import functools
|
|
35
|
+
import multiprocessing
|
|
36
|
+
import os
|
|
34
37
|
from collections import OrderedDict
|
|
35
38
|
from pathlib import Path
|
|
36
|
-
from typing import List
|
|
39
|
+
from typing import List, Tuple
|
|
37
40
|
|
|
38
41
|
import numpy as np
|
|
39
42
|
import tqdm
|
|
@@ -48,16 +51,12 @@ from zea.data.preset_utils import (
|
|
|
48
51
|
_hf_resolve_path,
|
|
49
52
|
)
|
|
50
53
|
from zea.datapaths import format_data_path
|
|
51
|
-
from zea.internal.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
)
|
|
54
|
+
from zea.internal.cache import cache_output
|
|
55
|
+
from zea.internal.core import hash_elements
|
|
56
|
+
from zea.internal.utils import calculate_file_hash, reduce_to_signature
|
|
55
57
|
from zea.io_lib import search_file_tree
|
|
56
58
|
from zea.tools.hf import HFPath
|
|
57
|
-
from zea.utils import
|
|
58
|
-
date_string_to_readable,
|
|
59
|
-
get_date_string,
|
|
60
|
-
)
|
|
59
|
+
from zea.utils import date_string_to_readable, get_date_string
|
|
61
60
|
|
|
62
61
|
_CHECK_MAX_DATASET_SIZE = 10000
|
|
63
62
|
_VALIDATED_FLAG_FILE = "validated.flag"
|
|
@@ -106,16 +105,78 @@ class H5FileHandleCache:
|
|
|
106
105
|
|
|
107
106
|
return self._file_handle_cache[file_path]
|
|
108
107
|
|
|
108
|
+
def close(self):
|
|
109
|
+
"""Close all cached file handles."""
|
|
110
|
+
cache: OrderedDict = getattr(self, "_file_handle_cache", None)
|
|
111
|
+
if not cache:
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
# iterate over a static list to avoid mutation during iteration
|
|
115
|
+
for fh in list(cache.values()):
|
|
116
|
+
if fh is None:
|
|
117
|
+
continue
|
|
118
|
+
try:
|
|
119
|
+
# attempt to close unconditionally and swallow exceptions
|
|
120
|
+
fh.close()
|
|
121
|
+
except Exception:
|
|
122
|
+
# During interpreter shutdown or if the h5py internals are already
|
|
123
|
+
# torn down, close() can raise weird errors (e.g. TypeError).
|
|
124
|
+
# Swallow them here to avoid exceptions from __del__.
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
cache.clear() # clear the cache dict
|
|
128
|
+
|
|
109
129
|
def __del__(self):
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
130
|
+
self.close()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@cache_output("filepaths", "key", "_filepath_hash", verbose=True)
|
|
134
|
+
def _find_h5_file_shapes(filepaths, key, _filepath_hash, verbose=True):
|
|
135
|
+
# NOTE: we cache the output of this function such that file loading over the network is
|
|
136
|
+
# faster for repeated calls with the same filepaths, key and _filepath_hash
|
|
137
|
+
|
|
138
|
+
assert _filepath_hash is not None
|
|
139
|
+
|
|
140
|
+
get_shape = functools.partial(File.get_shape, key=key)
|
|
141
|
+
|
|
142
|
+
if os.environ.get("ZEA_FIND_H5_SHAPES_PARALLEL", "1") in ("1", "true", "yes"):
|
|
143
|
+
# using multiprocessing to speed up reading hdf5 files
|
|
144
|
+
# make sure to call find_h5_file_shapes from within a function
|
|
145
|
+
# or use if __name__ == "__main__" to avoid freezing the main process
|
|
146
|
+
|
|
147
|
+
with multiprocessing.Pool() as pool:
|
|
148
|
+
file_shapes = list(
|
|
149
|
+
tqdm.tqdm(
|
|
150
|
+
pool.imap(get_shape, filepaths),
|
|
151
|
+
total=len(filepaths),
|
|
152
|
+
desc="Getting file shapes in each h5 file",
|
|
153
|
+
disable=not verbose,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
file_shapes = []
|
|
158
|
+
for file_path in tqdm.tqdm(
|
|
159
|
+
filepaths,
|
|
160
|
+
desc="Getting file shapes in each h5 file",
|
|
161
|
+
disable=not verbose,
|
|
162
|
+
):
|
|
163
|
+
file_shapes.append(get_shape(file_path))
|
|
116
164
|
|
|
165
|
+
return file_shapes
|
|
117
166
|
|
|
118
|
-
|
|
167
|
+
|
|
168
|
+
def _file_hash(filepaths):
|
|
169
|
+
# NOTE: this is really fast, even over network filesystemss
|
|
170
|
+
total_size = 0
|
|
171
|
+
modified_times = []
|
|
172
|
+
for fp in filepaths:
|
|
173
|
+
if os.path.isfile(fp):
|
|
174
|
+
total_size += os.path.getsize(fp)
|
|
175
|
+
modified_times.append(os.path.getmtime(fp))
|
|
176
|
+
return hash_elements([total_size, modified_times])
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def find_h5_files(paths: str | list, key: str = None) -> Tuple[List[str], List[tuple]]:
|
|
119
180
|
"""
|
|
120
181
|
Find HDF5 files from a directory or list of directories and optionally retrieve their shapes.
|
|
121
182
|
|
|
@@ -123,17 +184,11 @@ def find_h5_files(paths: str | list, key: str = None, search_file_tree_kwargs: d
|
|
|
123
184
|
paths (str or list): A single directory path, a list of directory paths,
|
|
124
185
|
or a single HDF5 file path.
|
|
125
186
|
key (str, optional): The key to get the file shapes for.
|
|
126
|
-
search_file_tree_kwargs (dict, optional): Additional keyword arguments for the
|
|
127
|
-
search_file_tree function. Defaults to None.
|
|
128
187
|
|
|
129
188
|
Returns:
|
|
130
|
-
- file_paths (list): List of file paths to the HDF5 files.
|
|
131
|
-
- file_shapes (list): List of shapes of the HDF5 datasets.
|
|
189
|
+
- file_paths (list): List of file paths (str) to the HDF5 files.
|
|
190
|
+
- file_shapes (list): List of shapes (tuple) of the HDF5 datasets.
|
|
132
191
|
"""
|
|
133
|
-
|
|
134
|
-
if search_file_tree_kwargs is None:
|
|
135
|
-
search_file_tree_kwargs = {}
|
|
136
|
-
|
|
137
192
|
# Make sure paths is a list
|
|
138
193
|
if not isinstance(paths, (tuple, list)):
|
|
139
194
|
paths = [paths]
|
|
@@ -154,14 +209,12 @@ def find_h5_files(paths: str | list, key: str = None, search_file_tree_kwargs: d
|
|
|
154
209
|
file_paths.append(str(path))
|
|
155
210
|
continue
|
|
156
211
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
file_shapes += dataset_info["file_shapes"]
|
|
164
|
-
file_paths += [str(Path(path) / file_path) for file_path in dataset_info["file_paths"]]
|
|
212
|
+
_filepaths = list(search_file_tree(path, filetypes=FILE_TYPES))
|
|
213
|
+
file_shapes += _find_h5_file_shapes(_filepaths, key, _file_hash(_filepaths))
|
|
214
|
+
file_paths += _filepaths
|
|
215
|
+
|
|
216
|
+
# Convert file paths to strings
|
|
217
|
+
file_paths = [str(fp) for fp in file_paths]
|
|
165
218
|
|
|
166
219
|
return file_paths, file_shapes
|
|
167
220
|
|
|
@@ -174,8 +227,7 @@ class Folder:
|
|
|
174
227
|
def __init__(
|
|
175
228
|
self,
|
|
176
229
|
folder_path: list[str] | list[Path],
|
|
177
|
-
key: str
|
|
178
|
-
search_file_tree_kwargs: dict | None = None,
|
|
230
|
+
key: str,
|
|
179
231
|
validate: bool = True,
|
|
180
232
|
hf_cache_dir: str = HF_DATASETS_DIR,
|
|
181
233
|
**kwargs,
|
|
@@ -197,11 +249,8 @@ class Folder:
|
|
|
197
249
|
|
|
198
250
|
self.folder_path = Path(folder_path)
|
|
199
251
|
self.key = key
|
|
200
|
-
self.search_file_tree_kwargs = search_file_tree_kwargs
|
|
201
252
|
self.validate = validate
|
|
202
|
-
self.file_paths, self.file_shapes = find_h5_files(
|
|
203
|
-
folder_path, self.key, self.search_file_tree_kwargs
|
|
204
|
-
)
|
|
253
|
+
self.file_paths, self.file_shapes = find_h5_files(folder_path, self.key)
|
|
205
254
|
assert self.n_files > 0, f"No files in folder: {folder_path}"
|
|
206
255
|
if self.validate:
|
|
207
256
|
self.validate_folder()
|
|
@@ -321,24 +370,27 @@ class Folder:
|
|
|
321
370
|
data_types = self.get_data_types(self.file_paths[0])
|
|
322
371
|
|
|
323
372
|
number_of_frames = sum(num_frames_per_file)
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
373
|
+
try:
|
|
374
|
+
with open(validation_file_path, "w", encoding="utf-8") as f:
|
|
375
|
+
f.write(f"Dataset: {path}\n")
|
|
376
|
+
f.write(f"Validated on: {get_date_string()}\n")
|
|
377
|
+
f.write(f"Number of files: {self.n_files}\n")
|
|
378
|
+
f.write(f"Number of frames: {number_of_frames}\n")
|
|
379
|
+
f.write(f"Data types: {', '.join(data_types)}\n")
|
|
380
|
+
f.write(f"{'-' * 80}\n")
|
|
381
|
+
# write all file names (not entire path) with number of frames on a new line
|
|
382
|
+
for file_path, num_frames in zip(self.file_paths, num_frames_per_file):
|
|
383
|
+
f.write(f"{file_path.name}: {num_frames}\n")
|
|
384
|
+
f.write(f"{'-' * 80}\n")
|
|
385
|
+
|
|
386
|
+
# Write the hash of the validation file
|
|
387
|
+
validation_file_hash = calculate_file_hash(validation_file_path)
|
|
388
|
+
with open(validation_file_path, "a", encoding="utf-8") as f:
|
|
389
|
+
# *** validation file hash *** (80 total line length)
|
|
390
|
+
f.write("*** validation file hash ***\n")
|
|
391
|
+
f.write(f"hash: {validation_file_hash}")
|
|
392
|
+
except Exception as e:
|
|
393
|
+
log.warning(f"Unable to write validation flag: {e}")
|
|
342
394
|
|
|
343
395
|
def __repr__(self):
|
|
344
396
|
return (
|
|
@@ -415,7 +467,6 @@ class Dataset(H5FileHandleCache):
|
|
|
415
467
|
self,
|
|
416
468
|
file_paths: List[str] | str,
|
|
417
469
|
key: str,
|
|
418
|
-
search_file_tree_kwargs: dict | None = None,
|
|
419
470
|
validate: bool = True,
|
|
420
471
|
directory_splits: list | None = None,
|
|
421
472
|
**kwargs,
|
|
@@ -426,9 +477,6 @@ class Dataset(H5FileHandleCache):
|
|
|
426
477
|
file_paths (str or list): (list of) path(s) to the folder(s) containing the HDF5 file(s)
|
|
427
478
|
or list of HDF5 file paths. Can be a mixed list of folders and files.
|
|
428
479
|
key (str): The key to access the HDF5 dataset.
|
|
429
|
-
search_file_tree_kwargs (dict, optional): Additional keyword arguments for the
|
|
430
|
-
search_file_tree function. These are only used when `file_paths` are directories.
|
|
431
|
-
Defaults to None.
|
|
432
480
|
validate (bool, optional): Whether to validate the dataset. Defaults to True.
|
|
433
481
|
directory_splits (list, optional): List of directory split by. Is a list of floats
|
|
434
482
|
between 0 and 1, with the same length as the number of file_paths given.
|
|
@@ -437,7 +485,6 @@ class Dataset(H5FileHandleCache):
|
|
|
437
485
|
"""
|
|
438
486
|
super().__init__(**kwargs)
|
|
439
487
|
self.key = key
|
|
440
|
-
self.search_file_tree_kwargs = search_file_tree_kwargs
|
|
441
488
|
self.validate = validate
|
|
442
489
|
|
|
443
490
|
self.file_paths, self.file_shapes = self.find_files_and_shapes(file_paths)
|
|
@@ -477,7 +524,7 @@ class Dataset(H5FileHandleCache):
|
|
|
477
524
|
file_path = Path(file_path)
|
|
478
525
|
|
|
479
526
|
if file_path.is_dir():
|
|
480
|
-
folder = Folder(file_path, self.key, self.
|
|
527
|
+
folder = Folder(file_path, self.key, self.validate)
|
|
481
528
|
file_paths += folder.file_paths
|
|
482
529
|
file_shapes += folder.file_shapes
|
|
483
530
|
del folder
|
|
@@ -541,14 +588,6 @@ class Dataset(H5FileHandleCache):
|
|
|
541
588
|
def __str__(self):
|
|
542
589
|
return f"Dataset with {self.n_files} files (key='{self.key}')"
|
|
543
590
|
|
|
544
|
-
def close(self):
|
|
545
|
-
"""Close all cached file handles."""
|
|
546
|
-
for file in self._file_handle_cache.values():
|
|
547
|
-
if file is not None and file.id.valid:
|
|
548
|
-
file.close()
|
|
549
|
-
self._file_handle_cache.clear()
|
|
550
|
-
log.info("Closed all cached file handles.")
|
|
551
|
-
|
|
552
591
|
def __enter__(self):
|
|
553
592
|
return self
|
|
554
593
|
|