zarrify 0.0.3__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zarrify/__about__.py +1 -3
- zarrify/__init__.py +3 -7
- zarrify/formats/__init__.py +0 -0
- zarrify/formats/mrc.py +30 -18
- zarrify/formats/n5.py +181 -9
- zarrify/formats/tiff.py +15 -5
- zarrify/formats/tiff_stack.py +16 -6
- zarrify/to_zarr.py +136 -88
- zarrify/utils/__init__.py +0 -0
- zarrify/utils/dask_utils.py +9 -6
- zarrify/utils/volume.py +15 -1
- zarrify-0.0.8.dist-info/METADATA +65 -0
- zarrify-0.0.8.dist-info/RECORD +17 -0
- {zarrify-0.0.3.dist-info → zarrify-0.0.8.dist-info}/WHEEL +2 -1
- zarrify-0.0.8.dist-info/entry_points.txt +2 -0
- zarrify-0.0.8.dist-info/top_level.txt +1 -0
- zarrify-0.0.3.dist-info/METADATA +0 -24
- zarrify-0.0.3.dist-info/RECORD +0 -14
- zarrify-0.0.3.dist-info/entry_points.txt +0 -3
- {zarrify-0.0.3.dist-info → zarrify-0.0.8.dist-info/licenses}/LICENSE.txt +0 -0
zarrify/__about__.py
CHANGED
zarrify/__init__.py
CHANGED
@@ -1,8 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# SPDX-License-Identifier: MIT
|
1
|
+
from zarrify.to_zarr import to_zarr
|
2
|
+
from zarrify.__about__ import __version__
|
4
3
|
|
5
|
-
|
6
|
-
from zarrify import to_zarr
|
7
|
-
|
8
|
-
__all__ = ["to_zarr"]
|
4
|
+
__all__ = ["to_zarr", "__version__"]
|
File without changes
|
zarrify/formats/mrc.py
CHANGED
@@ -7,7 +7,10 @@ from dask.distributed import Client, wait
|
|
7
7
|
from toolz import partition_all
|
8
8
|
import time
|
9
9
|
from zarrify.utils.volume import Volume
|
10
|
-
|
10
|
+
from abc import ABCMeta
|
11
|
+
from numcodecs import Zstd
|
12
|
+
import logging
|
13
|
+
import copy
|
11
14
|
|
12
15
|
class Mrc3D(Volume):
|
13
16
|
|
@@ -28,25 +31,16 @@ class Mrc3D(Volume):
|
|
28
31
|
|
29
32
|
self.memmap = mrcfile.mmap(self.src_path, mode="r")
|
30
33
|
self.ndim = self.memmap.data.ndim
|
31
|
-
self.shape = self.memmap.shape
|
34
|
+
self.shape = self.memmap.data.shape
|
32
35
|
self.dtype = self.memmap.data.dtype
|
33
36
|
|
34
|
-
def save_chunk(self, z_arr: zarr.Array, chunk_slice: Tuple[slice, ...]):
|
35
|
-
"""Copies data from a particular part of the input mrc array into a specific chunk of the output zarr array.
|
36
|
-
|
37
|
-
Args:
|
38
|
-
z_arr (zarr.core.Array): output zarr array object
|
39
|
-
chunk_slice (Tuple[slice, ...]): slice of the mrc array to copy.
|
40
|
-
"""
|
41
|
-
mrc_file = mrcfile.mmap(self.src_path, mode="r")
|
42
|
-
|
43
|
-
if not (mrc_file.data[chunk_slice] == 0).all():
|
44
|
-
z_arr[chunk_slice] = mrc_file.data[chunk_slice]
|
45
37
|
|
46
38
|
def write_to_zarr(
|
47
39
|
self,
|
48
|
-
|
40
|
+
dest: str,
|
49
41
|
client: Client,
|
42
|
+
zarr_chunks : list[int],
|
43
|
+
comp : ABCMeta = Zstd(level=6),
|
50
44
|
):
|
51
45
|
"""Use mrcfile memmap to access small parts of the mrc file and write them into zarr chunks.
|
52
46
|
|
@@ -54,7 +48,13 @@ class Mrc3D(Volume):
|
|
54
48
|
dest_path (str): path to the zarr group where the output dataset is stored.
|
55
49
|
client (Client): instance of a dask client
|
56
50
|
"""
|
51
|
+
|
52
|
+
logging.basicConfig(level=logging.INFO,
|
53
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
57
54
|
|
55
|
+
src_path = copy.copy(self.src_path)
|
56
|
+
z_arr = self.get_output_array(dest, zarr_chunks, comp)
|
57
|
+
|
58
58
|
out_slices = slices_from_chunks(
|
59
59
|
normalize_chunks(z_arr.chunks, shape=z_arr.shape)
|
60
60
|
)
|
@@ -62,12 +62,24 @@ class Mrc3D(Volume):
|
|
62
62
|
|
63
63
|
for idx, part in enumerate(out_slices_partitioned):
|
64
64
|
|
65
|
-
|
65
|
+
logging.info(f"{idx + 1} / {len(out_slices_partitioned)}")
|
66
66
|
start = time.time()
|
67
|
-
fut = client.map(lambda v:
|
68
|
-
|
67
|
+
fut = client.map(lambda v: save_chunk(src_path, z_arr, v), part)
|
68
|
+
logging.info(
|
69
69
|
f"Submitted {len(part)} tasks to the scheduler in {time.time()- start}s"
|
70
70
|
)
|
71
71
|
# wait for all the futures to complete
|
72
72
|
result = wait(fut)
|
73
|
-
|
73
|
+
logging.info(f"Completed {len(part)} tasks in {time.time() - start}s")
|
74
|
+
|
75
|
+
def save_chunk(src_path, z_arr: zarr.Array, chunk_slice: Tuple[slice, ...]):
|
76
|
+
"""Copies data from a particular part of the input mrc array into a specific chunk of the output zarr array.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
z_arr (zarr.core.Array): output zarr array object
|
80
|
+
chunk_slice (Tuple[slice, ...]): slice of the mrc array to copy.
|
81
|
+
"""
|
82
|
+
mrc_file = mrcfile.mmap(src_path, mode="r")
|
83
|
+
|
84
|
+
if not (mrc_file.data[chunk_slice] == 0).all():
|
85
|
+
z_arr[chunk_slice] = mrc_file.data[chunk_slice]
|
zarrify/formats/n5.py
CHANGED
@@ -1,8 +1,25 @@
|
|
1
1
|
import zarr
|
2
2
|
import os
|
3
3
|
from zarrify.utils.volume import Volume
|
4
|
+
import json
|
5
|
+
from itertools import chain
|
6
|
+
import natsort
|
7
|
+
from operator import itemgetter
|
8
|
+
import pydantic_zarr as pz
|
9
|
+
import dask.array as da
|
10
|
+
from dask.array.core import slices_from_chunks, normalize_chunks
|
11
|
+
from toolz import partition_all
|
12
|
+
from dask.distributed import Client, wait
|
13
|
+
from typing import Tuple
|
14
|
+
import time
|
15
|
+
import numpy as np
|
16
|
+
import logging
|
17
|
+
import pint
|
18
|
+
from abc import ABCMeta
|
19
|
+
from numcodecs import Zstd
|
4
20
|
|
5
|
-
class
|
21
|
+
class N5Group(Volume):
|
22
|
+
|
6
23
|
def __init__(
|
7
24
|
self,
|
8
25
|
src_path: str,
|
@@ -17,15 +34,14 @@ class N53D(Volume):
|
|
17
34
|
input_filepath (str): path to source tiff file.
|
18
35
|
"""
|
19
36
|
super().__init__(src_path, axes, scale, translation, units)
|
20
|
-
self.store_path, self.
|
37
|
+
self.store_path, self.path = self.separate_store_path(src_path, '')
|
21
38
|
self.n5_store = zarr.N5Store(self.store_path)
|
22
|
-
self.n5_arr = zarr.open(store = self.n5_store, path=self.arr_path, mode='r')
|
23
|
-
|
24
|
-
self.shape = self.n5_arr.shape
|
25
|
-
self.dtype = self.n5_arr.dtype
|
26
|
-
self.chunks = self.n5_arr.chunks
|
27
39
|
|
28
|
-
|
40
|
+
self.n5_obj = zarr.open(store = self.n5_store, path=self.path, mode='r')
|
41
|
+
|
42
|
+
def separate_store_path(self,
|
43
|
+
store : str,
|
44
|
+
path : str):
|
29
45
|
"""
|
30
46
|
sometimes you can pass a total os path to node, leading to
|
31
47
|
an empty('') node.path attribute.
|
@@ -42,4 +58,160 @@ class N53D(Volume):
|
|
42
58
|
new_store, path_prefix = os.path.split(store)
|
43
59
|
if ".n5" in path_prefix:
|
44
60
|
return store, path
|
45
|
-
return separate_store_path(new_store, os.path.join(path_prefix, path))
|
61
|
+
return self.separate_store_path(new_store, os.path.join(path_prefix, path))
|
62
|
+
|
63
|
+
#creates attributes.json recursively within n5 group, if missing
|
64
|
+
def reconstruct_json(self,
|
65
|
+
n5src : str):
|
66
|
+
dir_list = os.listdir(n5src)
|
67
|
+
if "attributes.json" not in dir_list:
|
68
|
+
with open(os.path.join(n5src,"attributes.json"), "w") as jfile:
|
69
|
+
dict = {"n5": "2.0.0"}
|
70
|
+
jfile.write(json.dumps(dict, indent=4))
|
71
|
+
for obj in dir_list:
|
72
|
+
if os.path.isdir(os.path.join(n5src, obj)):
|
73
|
+
self.reconstruct_json(os.path.join(n5src, obj))
|
74
|
+
|
75
|
+
def apply_ome_template(self, zgroup : zarr.Group):
|
76
|
+
|
77
|
+
z_attrs = {"multiscales": [{}]}
|
78
|
+
|
79
|
+
# normalize input units, i.e. 'meter' or 'm'-> 'meter'
|
80
|
+
ureg = pint.UnitRegistry()
|
81
|
+
units_list = [ureg.Unit(unit) for unit in zgroup.attrs['units']]
|
82
|
+
|
83
|
+
#populate .zattrs
|
84
|
+
z_attrs['multiscales'][0]['axes'] = [{"name": axis,
|
85
|
+
"type": "space",
|
86
|
+
"unit": unit} for (axis, unit) in zip(zgroup.attrs['axes'],
|
87
|
+
units_list)]
|
88
|
+
z_attrs['multiscales'][0]['version'] = '0.4'
|
89
|
+
z_attrs['multiscales'][0]['name'] = zgroup.name
|
90
|
+
z_attrs['multiscales'][0]['coordinateTransformations'] = [{"type": "scale",
|
91
|
+
"scale": [1.0, 1.0, 1.0]}, {"type" : "translation", "translation" : [1.0, 1.0, 1.0]}]
|
92
|
+
|
93
|
+
return z_attrs
|
94
|
+
|
95
|
+
def normalize_to_omengff(self,
|
96
|
+
zgroup : zarr.Group):
|
97
|
+
group_keys = zgroup.keys()
|
98
|
+
|
99
|
+
for key in chain(group_keys, '/'):
|
100
|
+
if isinstance(zgroup[key], zarr.hierarchy.Group):
|
101
|
+
if key!='/':
|
102
|
+
self.normalize_to_omengff(zgroup[key])
|
103
|
+
if 'scales' in zgroup[key].attrs.asdict():
|
104
|
+
zattrs = self.apply_ome_template(zgroup[key])
|
105
|
+
zarrays = zgroup[key].arrays(recurse=True)
|
106
|
+
|
107
|
+
unsorted_datasets = []
|
108
|
+
for arr in zarrays:
|
109
|
+
unsorted_datasets.append(self.ome_dataset_metadata(arr[1], zgroup[key]))
|
110
|
+
|
111
|
+
#1.apply natural sort to organize datasets metadata array for different resolution degrees (s0 -> s10)
|
112
|
+
#2.add datasets metadata to the omengff template
|
113
|
+
zattrs['multiscales'][0]['datasets'] = natsort.natsorted(unsorted_datasets, key=itemgetter(*['path']))
|
114
|
+
zgroup[key].attrs['multiscales'] = zattrs['multiscales']
|
115
|
+
|
116
|
+
def ome_dataset_metadata(n5arr : zarr.Array,
|
117
|
+
group : zarr.Group):
|
118
|
+
|
119
|
+
arr_attrs_n5 = n5arr.attrs['transform']
|
120
|
+
dataset_meta = {
|
121
|
+
"path": os.path.relpath(n5arr.path, group.path),
|
122
|
+
"coordinateTransformations": [{
|
123
|
+
'type': 'scale',
|
124
|
+
'scale': arr_attrs_n5['scale']},{
|
125
|
+
'type': 'translation',
|
126
|
+
'translation' : arr_attrs_n5['translate']
|
127
|
+
}]}
|
128
|
+
|
129
|
+
return dataset_meta
|
130
|
+
|
131
|
+
# d=groupspec.to_dict(),
|
132
|
+
def normalize_groupspec(self, d, comp):
|
133
|
+
for k,v in d.items():
|
134
|
+
if k == "compressor":
|
135
|
+
d[k] = comp.get_config()
|
136
|
+
|
137
|
+
elif k == 'dimension_separator':
|
138
|
+
d[k] = '/'
|
139
|
+
elif isinstance(v, dict):
|
140
|
+
self.normalize_groupspec(v, comp)
|
141
|
+
|
142
|
+
def copy_n5_tree(self, n5_root, z_store, comp):
|
143
|
+
spec_n5 = pz.GroupSpec.from_zarr(n5_root)
|
144
|
+
spec_n5_dict = spec_n5.dict()
|
145
|
+
self.normalize_groupspec(spec_n5_dict, comp)
|
146
|
+
spec_n5 = pz.GroupSpec(**spec_n5_dict)
|
147
|
+
return spec_n5.to_zarr(z_store, path= '')
|
148
|
+
|
149
|
+
|
150
|
+
#creates attributes.json, if missing
|
151
|
+
def reconstruct_json(self, n5src):
|
152
|
+
dir_list = os.listdir(n5src)
|
153
|
+
if "attributes.json" not in dir_list:
|
154
|
+
with open(os.path.join(n5src,"attributes.json"), "w") as jfile:
|
155
|
+
dict = {"n5": "2.0.0"}
|
156
|
+
jfile.write(json.dumps(dict, indent=4))
|
157
|
+
for obj in dir_list:
|
158
|
+
if os.path.isdir(os.path.join(n5src, obj)):
|
159
|
+
self.reconstruct_json(os.path.join(n5src, obj))
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
def save_chunk(
|
164
|
+
self,
|
165
|
+
source: zarr.Array,
|
166
|
+
dest: zarr.Array,
|
167
|
+
out_slices: Tuple[slice, ...],
|
168
|
+
invert: bool):
|
169
|
+
|
170
|
+
in_slices = tuple(out_slice for out_slice in out_slices)
|
171
|
+
source_data = source[in_slices]
|
172
|
+
# only store source_data if it is not all 0s
|
173
|
+
if not (source_data == 0).all():
|
174
|
+
if invert == True:
|
175
|
+
dest[out_slices] = np.invert(source_data)
|
176
|
+
else:
|
177
|
+
dest[out_slices] = source_data
|
178
|
+
return 1
|
179
|
+
|
180
|
+
def write_to_zarr(
|
181
|
+
self,
|
182
|
+
dest: str,
|
183
|
+
client: Client,
|
184
|
+
zarr_chunks : list[int],
|
185
|
+
comp : ABCMeta = Zstd(level=6),
|
186
|
+
):
|
187
|
+
|
188
|
+
logging.basicConfig(level=logging.INFO,
|
189
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
190
|
+
|
191
|
+
self.reconstruct_json(os.path.join(self.store_path, self.path))
|
192
|
+
|
193
|
+
# input n5 arrays list to convert
|
194
|
+
n5_root = zarr.open_group(self.n5_store, mode = 'r')
|
195
|
+
zarr_arrays = n5_root.arrays(recurse=True)
|
196
|
+
# copy input n5 tree structure to output zarr and add ome-metadata, when N5 metadata is present
|
197
|
+
z_store = zarr.NestedDirectoryStore(dest)
|
198
|
+
zg = self.copy_n5_tree(n5_root, z_store, comp)
|
199
|
+
|
200
|
+
self.normalize_to_omengff(zg)
|
201
|
+
|
202
|
+
for item in zarr_arrays:
|
203
|
+
n5arr = item[1]
|
204
|
+
dest_arr = zarr.open_array(os.path.join(dest, n5arr.path), mode='a')
|
205
|
+
|
206
|
+
out_slices = slices_from_chunks(normalize_chunks(dest_arr.chunks, shape=dest_arr.shape))
|
207
|
+
# break the slices up into batches, to make things easier for the dask scheduler
|
208
|
+
out_slices_partitioned = tuple(partition_all(100000, out_slices))
|
209
|
+
|
210
|
+
for idx, part in enumerate(out_slices_partitioned):
|
211
|
+
logging.info(f'{idx + 1} / {len(out_slices_partitioned)}')
|
212
|
+
start = time.time()
|
213
|
+
fut = client.map(lambda v: self.save_chunk(n5arr, dest_arr, v, invert=False), part)
|
214
|
+
logging.info(f'Submitted {len(part)} tasks to the scheduler in {time.time()- start}s')
|
215
|
+
# wait for all the futures to complete
|
216
|
+
result = wait(fut)
|
217
|
+
logging.info(f'Completed {len(part)} tasks in {time.time() - start}s')
|
zarrify/formats/tiff.py
CHANGED
@@ -7,6 +7,9 @@ import time
|
|
7
7
|
import dask.array as da
|
8
8
|
import copy
|
9
9
|
from zarrify.utils.volume import Volume
|
10
|
+
from abc import ABCMeta
|
11
|
+
from numcodecs import Zstd
|
12
|
+
import logging
|
10
13
|
|
11
14
|
|
12
15
|
class Tiff3D(Volume):
|
@@ -32,22 +35,29 @@ class Tiff3D(Volume):
|
|
32
35
|
self.shape = self.zarr_arr.shape
|
33
36
|
self.dtype = self.zarr_arr.dtype
|
34
37
|
|
35
|
-
def write_to_zarr(self,
|
36
|
-
|
38
|
+
def write_to_zarr(self,
|
39
|
+
dest: str,
|
40
|
+
client: Client,
|
41
|
+
zarr_chunks : list[int],
|
42
|
+
comp : ABCMeta = Zstd(level=6),
|
43
|
+
):
|
44
|
+
|
45
|
+
z_arr = self.get_output_array(dest, zarr_chunks, comp)
|
46
|
+
chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
|
37
47
|
|
38
48
|
src_path = copy.copy(self.src_path)
|
39
49
|
|
40
50
|
start = time.time()
|
41
51
|
fut = client.map(
|
42
|
-
lambda v: write_volume_slab_to_zarr(v,
|
52
|
+
lambda v: write_volume_slab_to_zarr(v, z_arr, src_path), chunks_list
|
43
53
|
)
|
44
|
-
|
54
|
+
logging.info(
|
45
55
|
f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
|
46
56
|
)
|
47
57
|
|
48
58
|
# wait for all the futures to complete
|
49
59
|
result = wait(fut)
|
50
|
-
|
60
|
+
logging.info(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
|
51
61
|
|
52
62
|
return 0
|
53
63
|
|
zarrify/formats/tiff_stack.py
CHANGED
@@ -8,6 +8,9 @@ import dask.array as da
|
|
8
8
|
from natsort import natsorted
|
9
9
|
from glob import glob
|
10
10
|
from zarrify.utils.volume import Volume
|
11
|
+
from abc import ABCMeta
|
12
|
+
from numcodecs import Zstd
|
13
|
+
import logging
|
11
14
|
|
12
15
|
|
13
16
|
class TiffStack(Volume):
|
@@ -54,7 +57,7 @@ class TiffStack(Volume):
|
|
54
57
|
try:
|
55
58
|
image_tile = imread(src_volume[slab_index])
|
56
59
|
except:
|
57
|
-
|
60
|
+
logging.info(
|
58
61
|
f"Tiff tile with index {slab_index} is not present in tiff stack."
|
59
62
|
)
|
60
63
|
np_slab[slab_index - chunk_num, :, :] = image_tile
|
@@ -63,20 +66,27 @@ class TiffStack(Volume):
|
|
63
66
|
zarray[chunk_num : chunk_num + zarray.chunks[0], :, :] = np_slab
|
64
67
|
|
65
68
|
# parallel writing of tiff stack into zarr array
|
66
|
-
def write_to_zarr(self,
|
67
|
-
|
69
|
+
def write_to_zarr(self,
|
70
|
+
dest: str,
|
71
|
+
client: Client,
|
72
|
+
zarr_chunks : list[int],
|
73
|
+
comp : ABCMeta = Zstd(level=6),
|
74
|
+
):
|
75
|
+
|
76
|
+
z_arr = self.get_output_array(dest, zarr_chunks, comp)
|
77
|
+
chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
|
68
78
|
|
69
79
|
start = time.time()
|
70
80
|
fut = client.map(
|
71
|
-
lambda v: self.write_tile_slab_to_zarr(v,
|
81
|
+
lambda v: self.write_tile_slab_to_zarr(v, z_arr, self.stack_list),
|
72
82
|
chunks_list,
|
73
83
|
)
|
74
|
-
|
84
|
+
logging.info(
|
75
85
|
f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
|
76
86
|
)
|
77
87
|
|
78
88
|
# wait for all the futures to complete
|
79
89
|
result = wait(fut)
|
80
|
-
|
90
|
+
logging.info(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
|
81
91
|
|
82
92
|
return 0
|
zarrify/to_zarr.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import zarr
|
2
2
|
from numcodecs import Zstd
|
3
|
-
import
|
3
|
+
from pathlib import Path
|
4
4
|
import click
|
5
5
|
import sys
|
6
6
|
from dask.distributed import Client
|
@@ -8,108 +8,156 @@ import time
|
|
8
8
|
from zarrify.formats.tiff_stack import TiffStack
|
9
9
|
from zarrify.formats.tiff import Tiff3D
|
10
10
|
from zarrify.formats.mrc import Mrc3D
|
11
|
-
from zarrify.formats.n5 import
|
11
|
+
from zarrify.formats.n5 import N5Group
|
12
12
|
from zarrify.utils.dask_utils import initialize_dask_client
|
13
|
+
from typing import Union
|
13
14
|
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
# )
|
22
|
-
# @click.option("--dest", "-s", type=click.STRING, help="Output .zarr file path.")
|
23
|
-
# @click.option(
|
24
|
-
# "--num_workers", "-w", default=100, type=click.INT, help="Number of dask workers"
|
25
|
-
# )
|
26
|
-
# @click.option(
|
27
|
-
# "--cluster",
|
28
|
-
# "-c",
|
29
|
-
# default="",
|
30
|
-
# type=click.STRING,
|
31
|
-
# help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'",
|
32
|
-
# )
|
33
|
-
# @click.option(
|
34
|
-
# "--zarr_chunks",
|
35
|
-
# "-zc",
|
36
|
-
# nargs=3,
|
37
|
-
# default=(64, 128, 128),
|
38
|
-
# type=click.INT,
|
39
|
-
# help="Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)",
|
40
|
-
# )
|
41
|
-
# @click.option(
|
42
|
-
# "--axes",
|
43
|
-
# "-a",
|
44
|
-
# nargs=3,
|
45
|
-
# default=("z", "y", "x"),
|
46
|
-
# type=str,
|
47
|
-
# help="Metadata axis names. Order matters. \n Example: -a z y x",
|
48
|
-
# )
|
49
|
-
# @click.option(
|
50
|
-
# "--translation",
|
51
|
-
# "-t",
|
52
|
-
# nargs=3,
|
53
|
-
# default=(0.0, 0.0, 0.0),
|
54
|
-
# type=float,
|
55
|
-
# help="Metadata translation(offset) value. Order matters. \n Example: -t 1.0 2.0 3.0",
|
56
|
-
# )
|
57
|
-
# @click.option(
|
58
|
-
# "--scale",
|
59
|
-
# "-s",
|
60
|
-
# nargs=3,
|
61
|
-
# default=(1.0, 1.0, 1.0),
|
62
|
-
# type=float,
|
63
|
-
# help="Metadata scale value. Order matters. \n Example: -s 1.0 2.0 3.0",
|
64
|
-
# )
|
65
|
-
# @click.option(
|
66
|
-
# "--units",
|
67
|
-
# "-u",
|
68
|
-
# nargs=3,
|
69
|
-
# default=("nanometer", "nanometer", "nanometer"),
|
70
|
-
# type=str,
|
71
|
-
# help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer",
|
72
|
-
# )
|
73
|
-
# def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, units):
|
16
|
+
def init_dataset(src :str,
|
17
|
+
axes : list[str],
|
18
|
+
scale : list[float],
|
19
|
+
translation : list[float],
|
20
|
+
units : list[str]) -> Union[TiffStack, Tiff3D, N5Group, Mrc3D]:
|
21
|
+
"""Returns an instance of a dataset class (TiffStack, N5Group, Mrc3D, or Tiff3D), depending on the input file format.
|
74
22
|
|
75
|
-
|
76
|
-
|
23
|
+
Args:
|
24
|
+
src (str): source file/container location
|
25
|
+
axes (list[str]): axis order (for ome-zarr metadata)
|
26
|
+
scale (list[float]): voxel size (for ome-zarr metadata)
|
27
|
+
translation (list[float]): offset (for ome-zarr metadata)
|
28
|
+
units (list[str]): physical units (for ome-zarr metadata)
|
29
|
+
|
30
|
+
Raises:
|
31
|
+
ValueError: return value error if the input file format not in the list.
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
Union[TiffStack, Tiff3D, N5Group, Mrc3D]: return a file format object depending on the input file format.
|
35
|
+
\n All different file formats objects have identical instance methods (write_to_zarr, add_ome_metadata) to emulate API abstraction.
|
36
|
+
"""
|
37
|
+
|
38
|
+
src_path = Path(src)
|
39
|
+
params = (src, axes, scale, translation, units)
|
40
|
+
|
41
|
+
|
42
|
+
ext = src_path.suffix.lower()
|
43
|
+
|
44
|
+
if ext=='.n5':
|
45
|
+
return N5Group(*params)
|
46
|
+
elif src_path.is_dir():
|
47
|
+
return TiffStack(*params)
|
48
|
+
elif ext == ".mrc":
|
49
|
+
return Mrc3D(*params)
|
50
|
+
elif ext in (".tif", ".tiff"):
|
51
|
+
return Tiff3D(*params)
|
52
|
+
|
53
|
+
raise ValueError(f"Unsupported source type: {src}")
|
77
54
|
|
78
55
|
def to_zarr(src : str,
|
79
56
|
dest: str,
|
80
57
|
client : Client,
|
81
58
|
num_workers : int = 20,
|
82
59
|
zarr_chunks : list[int] = [128]*3,
|
83
|
-
axes : list[str] =
|
84
|
-
scale : list[float] = [1.0]*3,
|
85
|
-
translation : list[float] = [0.0]*3,
|
86
|
-
units: list[str] = ['nanometer']*3):
|
87
|
-
|
88
|
-
dataset = N53D(src, axes, scale, translation, units)
|
89
|
-
if src.endswith(".mrc"):
|
90
|
-
dataset = Mrc3D(src, axes, scale, translation, units)
|
91
|
-
elif src.endswith(".tif") or src.endswith(".tiff"):
|
92
|
-
dataset = Tiff3D(src, axes, scale, translation, units)
|
93
|
-
if os.path.isdir(src):
|
94
|
-
dataset = TiffStack(src, axes, scale, translation, units)
|
60
|
+
axes : list[str] = ['z', 'y', 'x'],
|
61
|
+
scale : list[float] = [1.0,]*3,
|
62
|
+
translation : list[float] = [0.0,]*3,
|
63
|
+
units: list[str] = ['nanometer',]*3):
|
64
|
+
"""Convert Tiff stack, 3D Tiff, N5, or MRC file to OME-Zarr.
|
95
65
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
66
|
+
Args:
|
67
|
+
src (str): input data location.
|
68
|
+
dest (str): output zarr group location.
|
69
|
+
client (Client): dask client instance.
|
70
|
+
num_workers (int, optional): Number of dask workers. Defaults to 20.
|
71
|
+
zarr_chunks (list[int], optional): _description_. Defaults to [128,]*3.
|
72
|
+
axes (list[str], optional): axis order. Defaults to ['z', 'y', 'x'].
|
73
|
+
scale (list[float], optional): voxel size (in physical units). Defaults to [1.0,]*3.
|
74
|
+
translation (list[float], optional): offset (in physical units). Defaults to [0.0,]*3.
|
75
|
+
units (list[str], optional): physical units. Defaults to ['nanometer']*3.
|
76
|
+
"""
|
77
|
+
|
78
|
+
dataset = init_dataset(src, axes, scale, translation, units)
|
105
79
|
|
106
80
|
# write in parallel to zarr using dask
|
107
81
|
client.cluster.scale(num_workers)
|
108
|
-
dataset.write_to_zarr(
|
82
|
+
dataset.write_to_zarr(dest, client, zarr_chunks)
|
109
83
|
client.cluster.scale(0)
|
110
84
|
# populate zarr metadata
|
111
|
-
dataset.add_ome_metadata(
|
85
|
+
dataset.add_ome_metadata(dest)
|
86
|
+
|
112
87
|
|
88
|
+
@click.command("zarrify")
|
89
|
+
@click.option(
|
90
|
+
"--src",
|
91
|
+
"-s",
|
92
|
+
type=click.Path(exists=True),
|
93
|
+
help="Input file/directory location",
|
94
|
+
)
|
95
|
+
@click.option("--dest", "-d", type=click.STRING, help="Output .zarr file path.")
|
96
|
+
@click.option(
|
97
|
+
"--num_workers", "-w", default=100, type=click.INT, help="Number of dask workers"
|
98
|
+
)
|
99
|
+
@click.option(
|
100
|
+
"--cluster",
|
101
|
+
"-c",
|
102
|
+
default=None,
|
103
|
+
type=click.STRING,
|
104
|
+
help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'",
|
105
|
+
)
|
106
|
+
@click.option(
|
107
|
+
"--zarr_chunks",
|
108
|
+
"-zc",
|
109
|
+
nargs=3,
|
110
|
+
default=(64, 128, 128),
|
111
|
+
type=click.INT,
|
112
|
+
help="Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)",
|
113
|
+
)
|
114
|
+
@click.option(
|
115
|
+
"--axes",
|
116
|
+
"-a",
|
117
|
+
nargs=3,
|
118
|
+
default=("z", "y", "x"),
|
119
|
+
type=str,
|
120
|
+
help="Metadata axis names. Order matters. \n Example: -a z y x",
|
121
|
+
)
|
122
|
+
@click.option(
|
123
|
+
"--translation",
|
124
|
+
"-t",
|
125
|
+
nargs=3,
|
126
|
+
default=(0.0, 0.0, 0.0),
|
127
|
+
type=float,
|
128
|
+
help="Metadata translation(offset) value. Order matters. \n Example: -t 1.0 2.0 3.0",
|
129
|
+
)
|
130
|
+
@click.option(
|
131
|
+
"--scale",
|
132
|
+
"-sc",
|
133
|
+
nargs=3,
|
134
|
+
default=(1.0, 1.0, 1.0),
|
135
|
+
type=float,
|
136
|
+
help="Metadata scale value. Order matters. \n Example: --scale 1.0 2.0 3.0",
|
137
|
+
)
|
138
|
+
@click.option(
|
139
|
+
"--units",
|
140
|
+
"-u",
|
141
|
+
nargs=3,
|
142
|
+
default=("nanometer", "nanometer", "nanometer"),
|
143
|
+
type=str,
|
144
|
+
help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer",
|
145
|
+
)
|
146
|
+
def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, units):
|
147
|
+
|
148
|
+
# create a dask client to submit tasks
|
149
|
+
client = initialize_dask_client(cluster)
|
150
|
+
|
151
|
+
# convert src dataset(n5, tiff, mrc) to zarr ome dataset
|
152
|
+
to_zarr(src,
|
153
|
+
dest,
|
154
|
+
client,
|
155
|
+
num_workers,
|
156
|
+
zarr_chunks,
|
157
|
+
axes,
|
158
|
+
scale,
|
159
|
+
translation,
|
160
|
+
units)
|
113
161
|
|
114
|
-
|
115
|
-
|
162
|
+
if __name__ == "__main__":
|
163
|
+
cli()
|
File without changes
|
zarrify/utils/dask_utils.py
CHANGED
@@ -2,9 +2,10 @@ from dask_jobqueue import LSFCluster
|
|
2
2
|
from dask.distributed import Client, LocalCluster
|
3
3
|
import os
|
4
4
|
import sys
|
5
|
+
import logging
|
5
6
|
|
6
7
|
|
7
|
-
def initialize_dask_client(cluster_type: str) -> Client:
|
8
|
+
def initialize_dask_client(cluster_type: str | None = None) -> Client:
|
8
9
|
"""Initialize dask client.
|
9
10
|
|
10
11
|
Args:
|
@@ -13,9 +14,8 @@ def initialize_dask_client(cluster_type: str) -> Client:
|
|
13
14
|
Returns:
|
14
15
|
(Client): instance of a dask client
|
15
16
|
"""
|
16
|
-
if cluster_type ==
|
17
|
-
|
18
|
-
sys.exit(0)
|
17
|
+
if cluster_type == None:
|
18
|
+
raise ValueError("Cluster type must be specified")
|
19
19
|
elif cluster_type == "lsf":
|
20
20
|
num_cores = 1
|
21
21
|
cluster = LSFCluster(
|
@@ -29,11 +29,14 @@ def initialize_dask_client(cluster_type: str) -> Client:
|
|
29
29
|
)
|
30
30
|
elif cluster_type == "local":
|
31
31
|
cluster = LocalCluster()
|
32
|
+
else:
|
33
|
+
raise ValueError(f"Unsupported cluster type: {cluster_type}")
|
32
34
|
|
33
35
|
client = Client(cluster)
|
36
|
+
print(str(client.dashboard_link))
|
34
37
|
with open(
|
35
38
|
os.path.join(os.getcwd(), "dask_dashboard_link" + ".txt"), "w"
|
36
39
|
) as text_file:
|
37
40
|
text_file.write(str(client.dashboard_link))
|
38
|
-
|
39
|
-
return client
|
41
|
+
logging.info(client.dashboard_link)
|
42
|
+
return client
|
zarrify/utils/volume.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import zarr
|
2
|
+
from abc import ABCMeta
|
2
3
|
|
3
4
|
|
4
5
|
class Volume:
|
@@ -18,13 +19,26 @@ class Volume:
|
|
18
19
|
"scale": scale,
|
19
20
|
"units": units,
|
20
21
|
}
|
22
|
+
|
23
|
+
def get_output_array(self, dest: str, chunks: list[int], comp: ABCMeta) -> zarr.Array:
|
24
|
+
z_store = zarr.NestedDirectoryStore(dest)
|
25
|
+
z_root = zarr.open(store=z_store, mode="a")
|
26
|
+
|
27
|
+
return z_root.require_dataset(
|
28
|
+
name="s0",
|
29
|
+
shape=self.shape,
|
30
|
+
dtype=self.dtype,
|
31
|
+
chunks=chunks,
|
32
|
+
compressor=comp,
|
33
|
+
)
|
21
34
|
|
22
|
-
def add_ome_metadata(self,
|
35
|
+
def add_ome_metadata(self, dest: str):
|
23
36
|
"""Add selected tiff metadata to zarr attributes file (.zattrs).
|
24
37
|
|
25
38
|
Args:
|
26
39
|
root (zarr.Group): root group of the output zarr array
|
27
40
|
"""
|
41
|
+
root = zarr.open(dest, mode = 'a')
|
28
42
|
# json template for a multiscale structure
|
29
43
|
z_attrs: dict = {"multiscales": [{}]}
|
30
44
|
z_attrs["multiscales"][0]["axes"] = [
|
@@ -0,0 +1,65 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: zarrify
|
3
|
+
Version: 0.0.8
|
4
|
+
Summary: Convert scientific image formats (TIFF, MRC, N5) to OME-Zarr
|
5
|
+
Author-email: Yurii Zubov <zubov452@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Keywords: zarr,ome-zarr,tiff,mrc,n5,scientific-imaging
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
14
|
+
Requires-Python: >=3.10
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
License-File: LICENSE.txt
|
17
|
+
Requires-Dist: click<9.0.0,>=8.1.8
|
18
|
+
Requires-Dist: colorama<0.5.0,>=0.4.6
|
19
|
+
Requires-Dist: dask<2025.0.0,>=2024.12.1
|
20
|
+
Requires-Dist: dask-jobqueue==0.8.2
|
21
|
+
Requires-Dist: imagecodecs<2025.0.0,>=2024.12.30
|
22
|
+
Requires-Dist: mrcfile<2.0.0,>=1.5.3
|
23
|
+
Requires-Dist: natsort<9.0.0,>=8.4.0
|
24
|
+
Requires-Dist: numcodecs<0.16,>=0.13.0
|
25
|
+
Requires-Dist: pydantic-zarr<0.5.0,>=0.4.0
|
26
|
+
Requires-Dist: pint<1.0.0,>=0.20.0
|
27
|
+
Requires-Dist: tifffile<2026.0.0,>=2025.1.10
|
28
|
+
Requires-Dist: zarr==2.18.3
|
29
|
+
Requires-Dist: bokeh>=3.1.0
|
30
|
+
Provides-Extra: test
|
31
|
+
Requires-Dist: pytest; extra == "test"
|
32
|
+
Dynamic: license-file
|
33
|
+
|
34
|
+
# zarrify
|
35
|
+
|
36
|
+
Convert TIFF, MRC, and N5 files to OME-Zarr format.
|
37
|
+
|
38
|
+
## Install
|
39
|
+
|
40
|
+
```bash
|
41
|
+
pip install zarrify
|
42
|
+
```
|
43
|
+
|
44
|
+
## Usage
|
45
|
+
|
46
|
+
```bash
|
47
|
+
zarrify --src input.tiff --dest output.zarr --cluster local
|
48
|
+
```
|
49
|
+
|
50
|
+
## Python API
|
51
|
+
|
52
|
+
```python
|
53
|
+
import zarrify
|
54
|
+
from zarrify.utils.dask_utils import initialize_dask_client
|
55
|
+
|
56
|
+
client = initialize_dask_client("local")
|
57
|
+
zarrify.to_zarr("input.tiff", "output.zarr", client)
|
58
|
+
```
|
59
|
+
|
60
|
+
## Supported formats
|
61
|
+
|
62
|
+
- TIFF stacks
|
63
|
+
- 3D TIFF files
|
64
|
+
- MRC files
|
65
|
+
- N5 containers
|
@@ -0,0 +1,17 @@
|
|
1
|
+
zarrify/__about__.py,sha256=wOJN3HxAgnSon5vWYU3Txm2UZ_7tBHDKXUKZIH-mXX8,22
|
2
|
+
zarrify/__init__.py,sha256=uccp13weBXDK1VItmabtQB8VEt0UEjx3RaziwM2dix0,115
|
3
|
+
zarrify/to_zarr.py,sha256=on7WEOorwBm7gWQj8vmRTVwpnG-oK6IPHgAXuKEGwMw,5168
|
4
|
+
zarrify/formats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
zarrify/formats/mrc.py,sha256=WpNS0m6R9DnS57fUnVgJZlmdP6c5CgAKrthseo2ecS4,2893
|
6
|
+
zarrify/formats/n5.py,sha256=p20KK5zEqXl6GR6iK-k28jAiL3SL4fc6LMGoGCSXs1o,8653
|
7
|
+
zarrify/formats/tiff.py,sha256=kQDCe8jeGgWgNP80YxiHuTypD01wX9tgSS0xYgB9bPA,2383
|
8
|
+
zarrify/formats/tiff_stack.py,sha256=W8DFH4LI95qtVsOU7wmLChWnFb7QGcTXZlcR0Vj5fvw,3007
|
9
|
+
zarrify/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
zarrify/utils/dask_utils.py,sha256=39PG0wz7uglcOBYv9zB_7uDFzGQMKYGGg3KhPspiptw,1227
|
11
|
+
zarrify/utils/volume.py,sha256=SGnvsiSlW7nwd_bxio_LNwoxd-g4IiW361EkvO2NBWA,2176
|
12
|
+
zarrify-0.0.8.dist-info/licenses/LICENSE.txt,sha256=CGTDUwAd3nAjOp8LJaPGfTzWEzTnqfLYCulsMyw8B_s,1517
|
13
|
+
zarrify-0.0.8.dist-info/METADATA,sha256=KETKL9flFkzeNOAHingnX-Xkebkod7ql0g40z3RMl44,1689
|
14
|
+
zarrify-0.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
+
zarrify-0.0.8.dist-info/entry_points.txt,sha256=NVzqP12vRMcm9ljd8uF5fHBiyY-fdP1UDRBymNwgWRs,48
|
16
|
+
zarrify-0.0.8.dist-info/top_level.txt,sha256=Lpz93pp40fUO2wnHnyrC4BKZRIx8QZ1ckaLtBTGobuo,8
|
17
|
+
zarrify-0.0.8.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
zarrify
|
zarrify-0.0.3.dist-info/METADATA
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.3
|
2
|
-
Name: zarrify
|
3
|
-
Version: 0.0.3
|
4
|
-
Summary:
|
5
|
-
Author: Yurii Zubov
|
6
|
-
Author-email: zubov452@gmail.com
|
7
|
-
Requires-Python: >=3.10
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
9
|
-
Classifier: Programming Language :: Python :: 3.10
|
10
|
-
Classifier: Programming Language :: Python :: 3.11
|
11
|
-
Classifier: Programming Language :: Python :: 3.12
|
12
|
-
Classifier: Programming Language :: Python :: 3.13
|
13
|
-
Requires-Dist: click (>=8.1.8,<9.0.0)
|
14
|
-
Requires-Dist: colorama (>=0.4.6,<0.5.0)
|
15
|
-
Requires-Dist: dask (>=2024.12.1,<2025.0.0)
|
16
|
-
Requires-Dist: dask-jobqueue (==0.8.2)
|
17
|
-
Requires-Dist: imagecodecs (>=2024.12.30,<2025.0.0)
|
18
|
-
Requires-Dist: mrcfile (>=1.5.3,<2.0.0)
|
19
|
-
Requires-Dist: natsort (>=8.4.0,<9.0.0)
|
20
|
-
Requires-Dist: tifffile (>=2025.1.10,<2026.0.0)
|
21
|
-
Requires-Dist: zarr (==2.16.1)
|
22
|
-
Description-Content-Type: text/markdown
|
23
|
-
|
24
|
-
|
zarrify-0.0.3.dist-info/RECORD
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
zarrify/__about__.py,sha256=HPktDE8PNXBOjJb_lSg6ji4H2nkvEFYUlj4x6RAUgvE,95
|
2
|
-
zarrify/__init__.py,sha256=qQat7kir84KIAzcPbg_YDcmVt8AX2M1jhi6EYM7MTG0,157
|
3
|
-
zarrify/formats/mrc.py,sha256=-_jpEd9cz86ydZJPY8RPrN0RYcJl4GjeGKd006tYJBo,2495
|
4
|
-
zarrify/formats/n5.py,sha256=WaI6hAxgXO3TjqaLwaCZXDWMa7ROFh8m0nltZkZ3szw,1561
|
5
|
-
zarrify/formats/tiff.py,sha256=QuNcdtcOsg1iGdEMMHl1uNkY8lcx-Xv25QKolZmxkOU,2144
|
6
|
-
zarrify/formats/tiff_stack.py,sha256=NtHn-2XIzNyRBH6MrxtXCdfi1C3wV_7JKdn_2cOb6eI,2761
|
7
|
-
zarrify/to_zarr.py,sha256=1fZyU6d1hPMS_g4V3XhN08b9daR7uFn5G9iIRwR9wgs,3385
|
8
|
-
zarrify/utils/dask_utils.py,sha256=BC3M5Fd0tOEoSaanHL0WCGpwxNkboxQDUTr9ROBiq_o,1108
|
9
|
-
zarrify/utils/volume.py,sha256=dTSkodL2utp_zVDBxcS4HfidE80_ISl32x7Vp4M0s3g,1717
|
10
|
-
zarrify-0.0.3.dist-info/LICENSE.txt,sha256=CGTDUwAd3nAjOp8LJaPGfTzWEzTnqfLYCulsMyw8B_s,1517
|
11
|
-
zarrify-0.0.3.dist-info/METADATA,sha256=fTAGCWrlOFTbpA2lcBOhbP1OYWGYOw-Hl7jYml8WjTA,805
|
12
|
-
zarrify-0.0.3.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
13
|
-
zarrify-0.0.3.dist-info/entry_points.txt,sha256=bLnFtFGlpKYvDMeVDTU3LSM3SEEPhWyuU2sx8-k5vKg,47
|
14
|
-
zarrify-0.0.3.dist-info/RECORD,,
|
File without changes
|