zarrify 0.0.2__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zarrify-0.0.8/PKG-INFO ADDED
@@ -0,0 +1,65 @@
1
+ Metadata-Version: 2.4
2
+ Name: zarrify
3
+ Version: 0.0.8
4
+ Summary: Convert scientific image formats (TIFF, MRC, N5) to OME-Zarr
5
+ Author-email: Yurii Zubov <zubov452@gmail.com>
6
+ License: MIT
7
+ Keywords: zarr,ome-zarr,tiff,mrc,n5,scientific-imaging
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE.txt
17
+ Requires-Dist: click<9.0.0,>=8.1.8
18
+ Requires-Dist: colorama<0.5.0,>=0.4.6
19
+ Requires-Dist: dask<2025.0.0,>=2024.12.1
20
+ Requires-Dist: dask-jobqueue==0.8.2
21
+ Requires-Dist: imagecodecs<2025.0.0,>=2024.12.30
22
+ Requires-Dist: mrcfile<2.0.0,>=1.5.3
23
+ Requires-Dist: natsort<9.0.0,>=8.4.0
24
+ Requires-Dist: numcodecs<0.16,>=0.13.0
25
+ Requires-Dist: pydantic-zarr<0.5.0,>=0.4.0
26
+ Requires-Dist: pint<1.0.0,>=0.20.0
27
+ Requires-Dist: tifffile<2026.0.0,>=2025.1.10
28
+ Requires-Dist: zarr==2.18.3
29
+ Requires-Dist: bokeh>=3.1.0
30
+ Provides-Extra: test
31
+ Requires-Dist: pytest; extra == "test"
32
+ Dynamic: license-file
33
+
34
+ # zarrify
35
+
36
+ Convert TIFF, MRC, and N5 files to OME-Zarr format.
37
+
38
+ ## Install
39
+
40
+ ```bash
41
+ pip install zarrify
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ ```bash
47
+ zarrify --src input.tiff --dest output.zarr --cluster local
48
+ ```
49
+
50
+ ## Python API
51
+
52
+ ```python
53
+ import zarrify
54
+ from zarrify.utils.dask_utils import initialize_dask_client
55
+
56
+ client = initialize_dask_client("local")
57
+ zarrify.to_zarr("input.tiff", "output.zarr", client)
58
+ ```
59
+
60
+ ## Supported formats
61
+
62
+ - TIFF stacks
63
+ - 3D TIFF files
64
+ - MRC files
65
+ - N5 containers
@@ -0,0 +1,32 @@
1
+ # zarrify
2
+
3
+ Convert TIFF, MRC, and N5 files to OME-Zarr format.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install zarrify
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ zarrify --src input.tiff --dest output.zarr --cluster local
15
+ ```
16
+
17
+ ## Python API
18
+
19
+ ```python
20
+ import zarrify
21
+ from zarrify.utils.dask_utils import initialize_dask_client
22
+
23
+ client = initialize_dask_client("local")
24
+ zarrify.to_zarr("input.tiff", "output.zarr", client)
25
+ ```
26
+
27
+ ## Supported formats
28
+
29
+ - TIFF stacks
30
+ - 3D TIFF files
31
+ - MRC files
32
+ - N5 containers
@@ -0,0 +1,48 @@
1
+ [project]
2
+ name = "zarrify"
3
+ version = "0.0.8"
4
+ description = "Convert scientific image formats (TIFF, MRC, N5) to OME-Zarr"
5
+ authors = [
6
+ { name = "Yurii Zubov", email = "zubov452@gmail.com" }
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.10"
10
+ license = { text = "MIT" }
11
+ keywords = ["zarr", "ome-zarr", "tiff", "mrc", "n5", "scientific-imaging"]
12
+ classifiers = [
13
+ "Intended Audience :: Science/Research",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Topic :: Scientific/Engineering :: Image Processing",
19
+ ]
20
+
21
+ dependencies = [
22
+ "click>=8.1.8,<9.0.0",
23
+ "colorama>=0.4.6,<0.5.0",
24
+ "dask>=2024.12.1,<2025.0.0",
25
+ "dask-jobqueue==0.8.2",
26
+ "imagecodecs>=2024.12.30,<2025.0.0",
27
+ "mrcfile>=1.5.3,<2.0.0",
28
+ "natsort>=8.4.0,<9.0.0",
29
+ "numcodecs>=0.13.0,<0.16",
30
+ "pydantic-zarr>=0.4.0,<0.5.0",
31
+ "pint>=0.20.0,<1.0.0",
32
+ "tifffile>=2025.1.10,<2026.0.0",
33
+ "zarr==2.18.3",
34
+ "bokeh>=3.1.0"
35
+ ]
36
+
37
+ [project.scripts]
38
+ zarrify = "zarrify.to_zarr:cli"
39
+
40
+ [project.optional-dependencies]
41
+ test = [
42
+ "pytest"
43
+ ]
44
+
45
+
46
+ [build-system]
47
+ requires = ["setuptools>=61.0"]
48
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __version__ = "0.0.8"
@@ -0,0 +1,4 @@
1
+ from zarrify.to_zarr import to_zarr
2
+ from zarrify.__about__ import __version__
3
+
4
+ __all__ = ["to_zarr", "__version__"]
@@ -7,7 +7,10 @@ from dask.distributed import Client, wait
7
7
  from toolz import partition_all
8
8
  import time
9
9
  from zarrify.utils.volume import Volume
10
-
10
+ from abc import ABCMeta
11
+ from numcodecs import Zstd
12
+ import logging
13
+ import copy
11
14
 
12
15
  class Mrc3D(Volume):
13
16
 
@@ -28,25 +31,16 @@ class Mrc3D(Volume):
28
31
 
29
32
  self.memmap = mrcfile.mmap(self.src_path, mode="r")
30
33
  self.ndim = self.memmap.data.ndim
31
- self.shape = self.memmap.shape
34
+ self.shape = self.memmap.data.shape
32
35
  self.dtype = self.memmap.data.dtype
33
36
 
34
- def save_chunk(self, z_arr: zarr.Array, chunk_slice: Tuple[slice, ...]):
35
- """Copies data from a particular part of the input mrc array into a specific chunk of the output zarr array.
36
-
37
- Args:
38
- z_arr (zarr.core.Array): output zarr array object
39
- chunk_slice (Tuple[slice, ...]): slice of the mrc array to copy.
40
- """
41
- mrc_file = mrcfile.mmap(self.src_path, mode="r")
42
-
43
- if not (mrc_file.data[chunk_slice] == 0).all():
44
- z_arr[chunk_slice] = mrc_file.data[chunk_slice]
45
37
 
46
38
  def write_to_zarr(
47
39
  self,
48
- z_arr: zarr.Array,
40
+ dest: str,
49
41
  client: Client,
42
+ zarr_chunks : list[int],
43
+ comp : ABCMeta = Zstd(level=6),
50
44
  ):
51
45
  """Use mrcfile memmap to access small parts of the mrc file and write them into zarr chunks.
52
46
 
@@ -54,7 +48,13 @@ class Mrc3D(Volume):
54
48
  dest_path (str): path to the zarr group where the output dataset is stored.
55
49
  client (Client): instance of a dask client
56
50
  """
51
+
52
+ logging.basicConfig(level=logging.INFO,
53
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
57
54
 
55
+ src_path = copy.copy(self.src_path)
56
+ z_arr = self.get_output_array(dest, zarr_chunks, comp)
57
+
58
58
  out_slices = slices_from_chunks(
59
59
  normalize_chunks(z_arr.chunks, shape=z_arr.shape)
60
60
  )
@@ -62,12 +62,24 @@ class Mrc3D(Volume):
62
62
 
63
63
  for idx, part in enumerate(out_slices_partitioned):
64
64
 
65
- print(f"{idx + 1} / {len(out_slices_partitioned)}")
65
+ logging.info(f"{idx + 1} / {len(out_slices_partitioned)}")
66
66
  start = time.time()
67
- fut = client.map(lambda v: self.save_chunk(z_arr, v), part)
68
- print(
67
+ fut = client.map(lambda v: save_chunk(src_path, z_arr, v), part)
68
+ logging.info(
69
69
  f"Submitted {len(part)} tasks to the scheduler in {time.time()- start}s"
70
70
  )
71
71
  # wait for all the futures to complete
72
72
  result = wait(fut)
73
- print(f"Completed {len(part)} tasks in {time.time() - start}s")
73
+ logging.info(f"Completed {len(part)} tasks in {time.time() - start}s")
74
+
75
+ def save_chunk(src_path, z_arr: zarr.Array, chunk_slice: Tuple[slice, ...]):
76
+ """Copies data from a particular part of the input mrc array into a specific chunk of the output zarr array.
77
+
78
+ Args:
79
+ z_arr (zarr.core.Array): output zarr array object
80
+ chunk_slice (Tuple[slice, ...]): slice of the mrc array to copy.
81
+ """
82
+ mrc_file = mrcfile.mmap(src_path, mode="r")
83
+
84
+ if not (mrc_file.data[chunk_slice] == 0).all():
85
+ z_arr[chunk_slice] = mrc_file.data[chunk_slice]
@@ -0,0 +1,217 @@
1
+ import zarr
2
+ import os
3
+ from zarrify.utils.volume import Volume
4
+ import json
5
+ from itertools import chain
6
+ import natsort
7
+ from operator import itemgetter
8
+ import pydantic_zarr as pz
9
+ import dask.array as da
10
+ from dask.array.core import slices_from_chunks, normalize_chunks
11
+ from toolz import partition_all
12
+ from dask.distributed import Client, wait
13
+ from typing import Tuple
14
+ import time
15
+ import numpy as np
16
+ import logging
17
+ import pint
18
+ from abc import ABCMeta
19
+ from numcodecs import Zstd
20
+
21
+ class N5Group(Volume):
22
+
23
+ def __init__(
24
+ self,
25
+ src_path: str,
26
+ axes: list[str],
27
+ scale: list[float],
28
+ translation: list[float],
29
+ units: list[str],
30
+ ):
31
+ """Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr.
32
+
33
+ Args:
34
+ input_filepath (str): path to source tiff file.
35
+ """
36
+ super().__init__(src_path, axes, scale, translation, units)
37
+ self.store_path, self.path = self.separate_store_path(src_path, '')
38
+ self.n5_store = zarr.N5Store(self.store_path)
39
+
40
+ self.n5_obj = zarr.open(store = self.n5_store, path=self.path, mode='r')
41
+
42
+ def separate_store_path(self,
43
+ store : str,
44
+ path : str):
45
+ """
46
+ sometimes you can pass a total os path to node, leading to
47
+ an empty('') node.path attribute.
48
+ the correct way is to separate path to container(.n5, .zarr)
49
+ from path to array within a container.
50
+
51
+ Args:
52
+ store (string): path to store
53
+ path (string): path array/group (.n5 or .zarr)
54
+
55
+ Returns:
56
+ (string, string): returns regularized store and group/array path
57
+ """
58
+ new_store, path_prefix = os.path.split(store)
59
+ if ".n5" in path_prefix:
60
+ return store, path
61
+ return self.separate_store_path(new_store, os.path.join(path_prefix, path))
62
+
63
+ #creates attributes.json recursively within n5 group, if missing
64
+ def reconstruct_json(self,
65
+ n5src : str):
66
+ dir_list = os.listdir(n5src)
67
+ if "attributes.json" not in dir_list:
68
+ with open(os.path.join(n5src,"attributes.json"), "w") as jfile:
69
+ dict = {"n5": "2.0.0"}
70
+ jfile.write(json.dumps(dict, indent=4))
71
+ for obj in dir_list:
72
+ if os.path.isdir(os.path.join(n5src, obj)):
73
+ self.reconstruct_json(os.path.join(n5src, obj))
74
+
75
+ def apply_ome_template(self, zgroup : zarr.Group):
76
+
77
+ z_attrs = {"multiscales": [{}]}
78
+
79
+ # normalize input units, i.e. 'meter' or 'm'-> 'meter'
80
+ ureg = pint.UnitRegistry()
81
+ units_list = [ureg.Unit(unit) for unit in zgroup.attrs['units']]
82
+
83
+ #populate .zattrs
84
+ z_attrs['multiscales'][0]['axes'] = [{"name": axis,
85
+ "type": "space",
86
+ "unit": unit} for (axis, unit) in zip(zgroup.attrs['axes'],
87
+ units_list)]
88
+ z_attrs['multiscales'][0]['version'] = '0.4'
89
+ z_attrs['multiscales'][0]['name'] = zgroup.name
90
+ z_attrs['multiscales'][0]['coordinateTransformations'] = [{"type": "scale",
91
+ "scale": [1.0, 1.0, 1.0]}, {"type" : "translation", "translation" : [1.0, 1.0, 1.0]}]
92
+
93
+ return z_attrs
94
+
95
+ def normalize_to_omengff(self,
96
+ zgroup : zarr.Group):
97
+ group_keys = zgroup.keys()
98
+
99
+ for key in chain(group_keys, '/'):
100
+ if isinstance(zgroup[key], zarr.hierarchy.Group):
101
+ if key!='/':
102
+ self.normalize_to_omengff(zgroup[key])
103
+ if 'scales' in zgroup[key].attrs.asdict():
104
+ zattrs = self.apply_ome_template(zgroup[key])
105
+ zarrays = zgroup[key].arrays(recurse=True)
106
+
107
+ unsorted_datasets = []
108
+ for arr in zarrays:
109
+ unsorted_datasets.append(self.ome_dataset_metadata(arr[1], zgroup[key]))
110
+
111
+ #1.apply natural sort to organize datasets metadata array for different resolution degrees (s0 -> s10)
112
+ #2.add datasets metadata to the omengff template
113
+ zattrs['multiscales'][0]['datasets'] = natsort.natsorted(unsorted_datasets, key=itemgetter(*['path']))
114
+ zgroup[key].attrs['multiscales'] = zattrs['multiscales']
115
+
116
+ def ome_dataset_metadata(n5arr : zarr.Array,
117
+ group : zarr.Group):
118
+
119
+ arr_attrs_n5 = n5arr.attrs['transform']
120
+ dataset_meta = {
121
+ "path": os.path.relpath(n5arr.path, group.path),
122
+ "coordinateTransformations": [{
123
+ 'type': 'scale',
124
+ 'scale': arr_attrs_n5['scale']},{
125
+ 'type': 'translation',
126
+ 'translation' : arr_attrs_n5['translate']
127
+ }]}
128
+
129
+ return dataset_meta
130
+
131
+ # d=groupspec.to_dict(),
132
+ def normalize_groupspec(self, d, comp):
133
+ for k,v in d.items():
134
+ if k == "compressor":
135
+ d[k] = comp.get_config()
136
+
137
+ elif k == 'dimension_separator':
138
+ d[k] = '/'
139
+ elif isinstance(v, dict):
140
+ self.normalize_groupspec(v, comp)
141
+
142
+ def copy_n5_tree(self, n5_root, z_store, comp):
143
+ spec_n5 = pz.GroupSpec.from_zarr(n5_root)
144
+ spec_n5_dict = spec_n5.dict()
145
+ self.normalize_groupspec(spec_n5_dict, comp)
146
+ spec_n5 = pz.GroupSpec(**spec_n5_dict)
147
+ return spec_n5.to_zarr(z_store, path= '')
148
+
149
+
150
+ #creates attributes.json, if missing
151
+ def reconstruct_json(self, n5src):
152
+ dir_list = os.listdir(n5src)
153
+ if "attributes.json" not in dir_list:
154
+ with open(os.path.join(n5src,"attributes.json"), "w") as jfile:
155
+ dict = {"n5": "2.0.0"}
156
+ jfile.write(json.dumps(dict, indent=4))
157
+ for obj in dir_list:
158
+ if os.path.isdir(os.path.join(n5src, obj)):
159
+ self.reconstruct_json(os.path.join(n5src, obj))
160
+
161
+
162
+
163
+ def save_chunk(
164
+ self,
165
+ source: zarr.Array,
166
+ dest: zarr.Array,
167
+ out_slices: Tuple[slice, ...],
168
+ invert: bool):
169
+
170
+ in_slices = tuple(out_slice for out_slice in out_slices)
171
+ source_data = source[in_slices]
172
+ # only store source_data if it is not all 0s
173
+ if not (source_data == 0).all():
174
+ if invert == True:
175
+ dest[out_slices] = np.invert(source_data)
176
+ else:
177
+ dest[out_slices] = source_data
178
+ return 1
179
+
180
+ def write_to_zarr(
181
+ self,
182
+ dest: str,
183
+ client: Client,
184
+ zarr_chunks : list[int],
185
+ comp : ABCMeta = Zstd(level=6),
186
+ ):
187
+
188
+ logging.basicConfig(level=logging.INFO,
189
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
190
+
191
+ self.reconstruct_json(os.path.join(self.store_path, self.path))
192
+
193
+ # input n5 arrays list to convert
194
+ n5_root = zarr.open_group(self.n5_store, mode = 'r')
195
+ zarr_arrays = n5_root.arrays(recurse=True)
196
+ # copy input n5 tree structure to output zarr and add ome-metadata, when N5 metadata is present
197
+ z_store = zarr.NestedDirectoryStore(dest)
198
+ zg = self.copy_n5_tree(n5_root, z_store, comp)
199
+
200
+ self.normalize_to_omengff(zg)
201
+
202
+ for item in zarr_arrays:
203
+ n5arr = item[1]
204
+ dest_arr = zarr.open_array(os.path.join(dest, n5arr.path), mode='a')
205
+
206
+ out_slices = slices_from_chunks(normalize_chunks(dest_arr.chunks, shape=dest_arr.shape))
207
+ # break the slices up into batches, to make things easier for the dask scheduler
208
+ out_slices_partitioned = tuple(partition_all(100000, out_slices))
209
+
210
+ for idx, part in enumerate(out_slices_partitioned):
211
+ logging.info(f'{idx + 1} / {len(out_slices_partitioned)}')
212
+ start = time.time()
213
+ fut = client.map(lambda v: self.save_chunk(n5arr, dest_arr, v, invert=False), part)
214
+ logging.info(f'Submitted {len(part)} tasks to the scheduler in {time.time()- start}s')
215
+ # wait for all the futures to complete
216
+ result = wait(fut)
217
+ logging.info(f'Completed {len(part)} tasks in {time.time() - start}s')
@@ -7,6 +7,9 @@ import time
7
7
  import dask.array as da
8
8
  import copy
9
9
  from zarrify.utils.volume import Volume
10
+ from abc import ABCMeta
11
+ from numcodecs import Zstd
12
+ import logging
10
13
 
11
14
 
12
15
  class Tiff3D(Volume):
@@ -32,22 +35,29 @@ class Tiff3D(Volume):
32
35
  self.shape = self.zarr_arr.shape
33
36
  self.dtype = self.zarr_arr.dtype
34
37
 
35
- def write_to_zarr(self, zarray: zarr.Group, client: Client):
36
- chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0])
38
+ def write_to_zarr(self,
39
+ dest: str,
40
+ client: Client,
41
+ zarr_chunks : list[int],
42
+ comp : ABCMeta = Zstd(level=6),
43
+ ):
44
+
45
+ z_arr = self.get_output_array(dest, zarr_chunks, comp)
46
+ chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
37
47
 
38
48
  src_path = copy.copy(self.src_path)
39
49
 
40
50
  start = time.time()
41
51
  fut = client.map(
42
- lambda v: write_volume_slab_to_zarr(v, zarray, src_path), chunks_list
52
+ lambda v: write_volume_slab_to_zarr(v, z_arr, src_path), chunks_list
43
53
  )
44
- print(
54
+ logging.info(
45
55
  f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
46
56
  )
47
57
 
48
58
  # wait for all the futures to complete
49
59
  result = wait(fut)
50
- print(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
60
+ logging.info(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
51
61
 
52
62
  return 0
53
63
 
@@ -8,6 +8,9 @@ import dask.array as da
8
8
  from natsort import natsorted
9
9
  from glob import glob
10
10
  from zarrify.utils.volume import Volume
11
+ from abc import ABCMeta
12
+ from numcodecs import Zstd
13
+ import logging
11
14
 
12
15
 
13
16
  class TiffStack(Volume):
@@ -54,7 +57,7 @@ class TiffStack(Volume):
54
57
  try:
55
58
  image_tile = imread(src_volume[slab_index])
56
59
  except:
57
- print(
60
+ logging.info(
58
61
  f"Tiff tile with index {slab_index} is not present in tiff stack."
59
62
  )
60
63
  np_slab[slab_index - chunk_num, :, :] = image_tile
@@ -63,20 +66,27 @@ class TiffStack(Volume):
63
66
  zarray[chunk_num : chunk_num + zarray.chunks[0], :, :] = np_slab
64
67
 
65
68
  # parallel writing of tiff stack into zarr array
66
- def write_to_zarr(self, zarray: zarr.Array, client: Client):
67
- chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0])
69
+ def write_to_zarr(self,
70
+ dest: str,
71
+ client: Client,
72
+ zarr_chunks : list[int],
73
+ comp : ABCMeta = Zstd(level=6),
74
+ ):
75
+
76
+ z_arr = self.get_output_array(dest, zarr_chunks, comp)
77
+ chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
68
78
 
69
79
  start = time.time()
70
80
  fut = client.map(
71
- lambda v: self.write_tile_slab_to_zarr(v, zarray, self.stack_list),
81
+ lambda v: self.write_tile_slab_to_zarr(v, z_arr, self.stack_list),
72
82
  chunks_list,
73
83
  )
74
- print(
84
+ logging.info(
75
85
  f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
76
86
  )
77
87
 
78
88
  # wait for all the futures to complete
79
89
  result = wait(fut)
80
- print(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
90
+ logging.info(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
81
91
 
82
92
  return 0
@@ -0,0 +1,163 @@
1
+ import zarr
2
+ from numcodecs import Zstd
3
+ from pathlib import Path
4
+ import click
5
+ import sys
6
+ from dask.distributed import Client
7
+ import time
8
+ from zarrify.formats.tiff_stack import TiffStack
9
+ from zarrify.formats.tiff import Tiff3D
10
+ from zarrify.formats.mrc import Mrc3D
11
+ from zarrify.formats.n5 import N5Group
12
+ from zarrify.utils.dask_utils import initialize_dask_client
13
+ from typing import Union
14
+
15
+
16
+ def init_dataset(src :str,
17
+ axes : list[str],
18
+ scale : list[float],
19
+ translation : list[float],
20
+ units : list[str]) -> Union[TiffStack, Tiff3D, N5Group, Mrc3D]:
21
+ """Returns an instance of a dataset class (TiffStack, N5Group, Mrc3D, or Tiff3D), depending on the input file format.
22
+
23
+ Args:
24
+ src (str): source file/container location
25
+ axes (list[str]): axis order (for ome-zarr metadata)
26
+ scale (list[float]): voxel size (for ome-zarr metadata)
27
+ translation (list[float]): offset (for ome-zarr metadata)
28
+ units (list[str]): physical units (for ome-zarr metadata)
29
+
30
+ Raises:
31
+ ValueError: return value error if the input file format not in the list.
32
+
33
+ Returns:
34
+ Union[TiffStack, Tiff3D, N5Group, Mrc3D]: return a file format object depending on the input file format.
35
+ \n All different file formats objects have identical instance methods (write_to_zarr, add_ome_metadata) to emulate API abstraction.
36
+ """
37
+
38
+ src_path = Path(src)
39
+ params = (src, axes, scale, translation, units)
40
+
41
+
42
+ ext = src_path.suffix.lower()
43
+
44
+ if ext=='.n5':
45
+ return N5Group(*params)
46
+ elif src_path.is_dir():
47
+ return TiffStack(*params)
48
+ elif ext == ".mrc":
49
+ return Mrc3D(*params)
50
+ elif ext in (".tif", ".tiff"):
51
+ return Tiff3D(*params)
52
+
53
+ raise ValueError(f"Unsupported source type: {src}")
54
+
55
+ def to_zarr(src : str,
56
+ dest: str,
57
+ client : Client,
58
+ num_workers : int = 20,
59
+ zarr_chunks : list[int] = [128]*3,
60
+ axes : list[str] = ['z', 'y', 'x'],
61
+ scale : list[float] = [1.0,]*3,
62
+ translation : list[float] = [0.0,]*3,
63
+ units: list[str] = ['nanometer',]*3):
64
+ """Convert Tiff stack, 3D Tiff, N5, or MRC file to OME-Zarr.
65
+
66
+ Args:
67
+ src (str): input data location.
68
+ dest (str): output zarr group location.
69
+ client (Client): dask client instance.
70
+ num_workers (int, optional): Number of dask workers. Defaults to 20.
71
+ zarr_chunks (list[int], optional): _description_. Defaults to [128,]*3.
72
+ axes (list[str], optional): axis order. Defaults to ['z', 'y', 'x'].
73
+ scale (list[float], optional): voxel size (in physical units). Defaults to [1.0,]*3.
74
+ translation (list[float], optional): offset (in physical units). Defaults to [0.0,]*3.
75
+ units (list[str], optional): physical units. Defaults to ['nanometer']*3.
76
+ """
77
+
78
+ dataset = init_dataset(src, axes, scale, translation, units)
79
+
80
+ # write in parallel to zarr using dask
81
+ client.cluster.scale(num_workers)
82
+ dataset.write_to_zarr(dest, client, zarr_chunks)
83
+ client.cluster.scale(0)
84
+ # populate zarr metadata
85
+ dataset.add_ome_metadata(dest)
86
+
87
+
88
+ @click.command("zarrify")
89
+ @click.option(
90
+ "--src",
91
+ "-s",
92
+ type=click.Path(exists=True),
93
+ help="Input file/directory location",
94
+ )
95
+ @click.option("--dest", "-d", type=click.STRING, help="Output .zarr file path.")
96
+ @click.option(
97
+ "--num_workers", "-w", default=100, type=click.INT, help="Number of dask workers"
98
+ )
99
+ @click.option(
100
+ "--cluster",
101
+ "-c",
102
+ default=None,
103
+ type=click.STRING,
104
+ help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'",
105
+ )
106
+ @click.option(
107
+ "--zarr_chunks",
108
+ "-zc",
109
+ nargs=3,
110
+ default=(64, 128, 128),
111
+ type=click.INT,
112
+ help="Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)",
113
+ )
114
+ @click.option(
115
+ "--axes",
116
+ "-a",
117
+ nargs=3,
118
+ default=("z", "y", "x"),
119
+ type=str,
120
+ help="Metadata axis names. Order matters. \n Example: -a z y x",
121
+ )
122
+ @click.option(
123
+ "--translation",
124
+ "-t",
125
+ nargs=3,
126
+ default=(0.0, 0.0, 0.0),
127
+ type=float,
128
+ help="Metadata translation(offset) value. Order matters. \n Example: -t 1.0 2.0 3.0",
129
+ )
130
+ @click.option(
131
+ "--scale",
132
+ "-sc",
133
+ nargs=3,
134
+ default=(1.0, 1.0, 1.0),
135
+ type=float,
136
+ help="Metadata scale value. Order matters. \n Example: --scale 1.0 2.0 3.0",
137
+ )
138
+ @click.option(
139
+ "--units",
140
+ "-u",
141
+ nargs=3,
142
+ default=("nanometer", "nanometer", "nanometer"),
143
+ type=str,
144
+ help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer",
145
+ )
146
+ def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, units):
147
+
148
+ # create a dask client to submit tasks
149
+ client = initialize_dask_client(cluster)
150
+
151
+ # convert src dataset(n5, tiff, mrc) to zarr ome dataset
152
+ to_zarr(src,
153
+ dest,
154
+ client,
155
+ num_workers,
156
+ zarr_chunks,
157
+ axes,
158
+ scale,
159
+ translation,
160
+ units)
161
+
162
+ if __name__ == "__main__":
163
+ cli()
File without changes
@@ -2,9 +2,10 @@ from dask_jobqueue import LSFCluster
2
2
  from dask.distributed import Client, LocalCluster
3
3
  import os
4
4
  import sys
5
+ import logging
5
6
 
6
7
 
7
- def initialize_dask_client(cluster_type: str) -> Client:
8
+ def initialize_dask_client(cluster_type: str | None = None) -> Client:
8
9
  """Initialize dask client.
9
10
 
10
11
  Args:
@@ -13,9 +14,8 @@ def initialize_dask_client(cluster_type: str) -> Client:
13
14
  Returns:
14
15
  (Client): instance of a dask client
15
16
  """
16
- if cluster_type == "":
17
- print("Did not specify which instance of the dask client to use!")
18
- sys.exit(0)
17
+ if cluster_type == None:
18
+ raise ValueError("Cluster type must be specified")
19
19
  elif cluster_type == "lsf":
20
20
  num_cores = 1
21
21
  cluster = LSFCluster(
@@ -29,11 +29,14 @@ def initialize_dask_client(cluster_type: str) -> Client:
29
29
  )
30
30
  elif cluster_type == "local":
31
31
  cluster = LocalCluster()
32
+ else:
33
+ raise ValueError(f"Unsupported cluster type: {cluster_type}")
32
34
 
33
35
  client = Client(cluster)
36
+ print(str(client.dashboard_link))
34
37
  with open(
35
38
  os.path.join(os.getcwd(), "dask_dashboard_link" + ".txt"), "w"
36
39
  ) as text_file:
37
40
  text_file.write(str(client.dashboard_link))
38
- print(client.dashboard_link)
39
- return client
41
+ logging.info(client.dashboard_link)
42
+ return client
@@ -1,4 +1,5 @@
1
1
  import zarr
2
+ from abc import ABCMeta
2
3
 
3
4
 
4
5
  class Volume:
@@ -18,13 +19,26 @@ class Volume:
18
19
  "scale": scale,
19
20
  "units": units,
20
21
  }
22
+
23
+ def get_output_array(self, dest: str, chunks: list[int], comp: ABCMeta) -> zarr.Array:
24
+ z_store = zarr.NestedDirectoryStore(dest)
25
+ z_root = zarr.open(store=z_store, mode="a")
26
+
27
+ return z_root.require_dataset(
28
+ name="s0",
29
+ shape=self.shape,
30
+ dtype=self.dtype,
31
+ chunks=chunks,
32
+ compressor=comp,
33
+ )
21
34
 
22
- def add_ome_metadata(self, root: zarr.Group):
35
+ def add_ome_metadata(self, dest: str):
23
36
  """Add selected tiff metadata to zarr attributes file (.zattrs).
24
37
 
25
38
  Args:
26
39
  root (zarr.Group): root group of the output zarr array
27
40
  """
41
+ root = zarr.open(dest, mode = 'a')
28
42
  # json template for a multiscale structure
29
43
  z_attrs: dict = {"multiscales": [{}]}
30
44
  z_attrs["multiscales"][0]["axes"] = [
@@ -0,0 +1,65 @@
1
+ Metadata-Version: 2.4
2
+ Name: zarrify
3
+ Version: 0.0.8
4
+ Summary: Convert scientific image formats (TIFF, MRC, N5) to OME-Zarr
5
+ Author-email: Yurii Zubov <zubov452@gmail.com>
6
+ License: MIT
7
+ Keywords: zarr,ome-zarr,tiff,mrc,n5,scientific-imaging
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE.txt
17
+ Requires-Dist: click<9.0.0,>=8.1.8
18
+ Requires-Dist: colorama<0.5.0,>=0.4.6
19
+ Requires-Dist: dask<2025.0.0,>=2024.12.1
20
+ Requires-Dist: dask-jobqueue==0.8.2
21
+ Requires-Dist: imagecodecs<2025.0.0,>=2024.12.30
22
+ Requires-Dist: mrcfile<2.0.0,>=1.5.3
23
+ Requires-Dist: natsort<9.0.0,>=8.4.0
24
+ Requires-Dist: numcodecs<0.16,>=0.13.0
25
+ Requires-Dist: pydantic-zarr<0.5.0,>=0.4.0
26
+ Requires-Dist: pint<1.0.0,>=0.20.0
27
+ Requires-Dist: tifffile<2026.0.0,>=2025.1.10
28
+ Requires-Dist: zarr==2.18.3
29
+ Requires-Dist: bokeh>=3.1.0
30
+ Provides-Extra: test
31
+ Requires-Dist: pytest; extra == "test"
32
+ Dynamic: license-file
33
+
34
+ # zarrify
35
+
36
+ Convert TIFF, MRC, and N5 files to OME-Zarr format.
37
+
38
+ ## Install
39
+
40
+ ```bash
41
+ pip install zarrify
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ ```bash
47
+ zarrify --src input.tiff --dest output.zarr --cluster local
48
+ ```
49
+
50
+ ## Python API
51
+
52
+ ```python
53
+ import zarrify
54
+ from zarrify.utils.dask_utils import initialize_dask_client
55
+
56
+ client = initialize_dask_client("local")
57
+ zarrify.to_zarr("input.tiff", "output.zarr", client)
58
+ ```
59
+
60
+ ## Supported formats
61
+
62
+ - TIFF stacks
63
+ - 3D TIFF files
64
+ - MRC files
65
+ - N5 containers
@@ -0,0 +1,20 @@
1
+ LICENSE.txt
2
+ README.md
3
+ pyproject.toml
4
+ src/zarrify/__about__.py
5
+ src/zarrify/__init__.py
6
+ src/zarrify/to_zarr.py
7
+ src/zarrify.egg-info/PKG-INFO
8
+ src/zarrify.egg-info/SOURCES.txt
9
+ src/zarrify.egg-info/dependency_links.txt
10
+ src/zarrify.egg-info/entry_points.txt
11
+ src/zarrify.egg-info/requires.txt
12
+ src/zarrify.egg-info/top_level.txt
13
+ src/zarrify/formats/__init__.py
14
+ src/zarrify/formats/mrc.py
15
+ src/zarrify/formats/n5.py
16
+ src/zarrify/formats/tiff.py
17
+ src/zarrify/formats/tiff_stack.py
18
+ src/zarrify/utils/__init__.py
19
+ src/zarrify/utils/dask_utils.py
20
+ src/zarrify/utils/volume.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ zarrify = zarrify.to_zarr:cli
@@ -0,0 +1,16 @@
1
+ click<9.0.0,>=8.1.8
2
+ colorama<0.5.0,>=0.4.6
3
+ dask<2025.0.0,>=2024.12.1
4
+ dask-jobqueue==0.8.2
5
+ imagecodecs<2025.0.0,>=2024.12.30
6
+ mrcfile<2.0.0,>=1.5.3
7
+ natsort<9.0.0,>=8.4.0
8
+ numcodecs<0.16,>=0.13.0
9
+ pydantic-zarr<0.5.0,>=0.4.0
10
+ pint<1.0.0,>=0.20.0
11
+ tifffile<2026.0.0,>=2025.1.10
12
+ zarr==2.18.3
13
+ bokeh>=3.1.0
14
+
15
+ [test]
16
+ pytest
@@ -0,0 +1 @@
1
+ zarrify
zarrify-0.0.2/PKG-INFO DELETED
@@ -1,24 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: zarrify
3
- Version: 0.0.2
4
- Summary:
5
- Author: Yurii Zubov
6
- Author-email: zubov452@gmail.com
7
- Requires-Python: >=3.10
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.10
10
- Classifier: Programming Language :: Python :: 3.11
11
- Classifier: Programming Language :: Python :: 3.12
12
- Classifier: Programming Language :: Python :: 3.13
13
- Requires-Dist: click (>=8.1.8,<9.0.0)
14
- Requires-Dist: colorama (>=0.4.6,<0.5.0)
15
- Requires-Dist: dask (>=2024.12.1,<2025.0.0)
16
- Requires-Dist: dask-jobqueue (==0.8.2)
17
- Requires-Dist: imagecodecs (>=2024.12.30,<2025.0.0)
18
- Requires-Dist: mrcfile (>=1.5.3,<2.0.0)
19
- Requires-Dist: natsort (>=8.4.0,<9.0.0)
20
- Requires-Dist: tifffile (>=2025.1.10,<2026.0.0)
21
- Requires-Dist: zarr (==2.16.1)
22
- Description-Content-Type: text/markdown
23
-
24
-
@@ -1,26 +0,0 @@
1
- [project]
2
- name = "zarrify"
3
- version = "0.0.2"
4
- description = ""
5
- authors = [
6
- {name = "Yurii Zubov",email = "zubov452@gmail.com"}
7
- ]
8
- readme = "README.md"
9
- requires-python = ">=3.10"
10
- dependencies = [
11
- "zarr (==2.16.1)",
12
- "mrcfile (>=1.5.3,<2.0.0)",
13
- "dask-jobqueue (==0.8.2)",
14
- "colorama (>=0.4.6,<0.5.0)",
15
- "imagecodecs (>=2024.12.30,<2025.0.0)",
16
- "dask (>=2024.12.1,<2025.0.0)",
17
- "tifffile (>=2025.1.10,<2026.0.0)",
18
- "natsort (>=8.4.0,<9.0.0)",
19
- "click (>=8.1.8,<9.0.0)"
20
- ]
21
- [project.scripts]
22
- zarrify = "zarrify.to_zarr:cli"
23
-
24
- [build-system]
25
- requires = ["poetry-core>=2.0.0,<3.0.0"]
26
- build-backend = "poetry.core.masonry.api"
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2025-present Yurii Zubov <zubov452@gmail.com>
2
-
3
- __version__ = "0.0.1"
@@ -1,8 +0,0 @@
1
- # SPDX-FileCopyrightText: 2025-present Yurii Zubov <zubov452@gmail.com>
2
- #
3
- # SPDX-License-Identifier: MIT
4
-
5
-
6
- from zarrify import to_zarr
7
-
8
- __all__ = ["to_zarr"]
@@ -1,44 +0,0 @@
1
- import zarr
2
- import os
3
-
4
- class N53D(Volume):
5
- def __init__(
6
- self,
7
- src_path: str,
8
- axes: list[str],
9
- scale: list[float],
10
- translation: list[float],
11
- units: list[str],
12
- ):
13
- """Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr.
14
-
15
- Args:
16
- input_filepath (str): path to source tiff file.
17
- """
18
- super().__init__(src_path, axes, scale, translation, units)
19
- self.store_path, self.arr_path = self.separate_store_path(src_path, '')
20
- self.n5_store = zarr.N5Store(self.store_path)
21
- self.n5_arr = zarr.open(store = self.n5_store, path=self.arr_path, mode='r')
22
-
23
- self.shape = self.n5_arr.shape
24
- self.dtype = self.n5_arr.dtype
25
- self.chunks = self.n5_arr.chunks
26
-
27
- def separate_store_path(store, path):
28
- """
29
- sometimes you can pass a total os path to node, leading to
30
- an empty('') node.path attribute.
31
- the correct way is to separate path to container(.n5, .zarr)
32
- from path to array within a container.
33
-
34
- Args:
35
- store (string): path to store
36
- path (string): path array/group (.n5 or .zarr)
37
-
38
- Returns:
39
- (string, string): returns regularized store and group/array path
40
- """
41
- new_store, path_prefix = os.path.split(store)
42
- if ".n5" in path_prefix:
43
- return store, path
44
- return separate_store_path(new_store, os.path.join(path_prefix, path))
@@ -1,115 +0,0 @@
1
- import zarr
2
- from numcodecs import Zstd
3
- import os
4
- import click
5
- import sys
6
- from dask.distributed import Client
7
- import time
8
- from zarrify.formats.tiff_stack import TiffStack
9
- from zarrify.formats.tiff import Tiff3D
10
- from zarrify.formats.mrc import Mrc3D
11
- from zarrify.formats.n5 import N53D
12
- from zarrify.utils.dask_utils import initialize_dask_client
13
-
14
-
15
- # @click.command("zarrify")
16
- # @click.option(
17
- # "--src",
18
- # "-s",
19
- # type=click.Path(exists=True),
20
- # help="Input file/directory location",
21
- # )
22
- # @click.option("--dest", "-s", type=click.STRING, help="Output .zarr file path.")
23
- # @click.option(
24
- # "--num_workers", "-w", default=100, type=click.INT, help="Number of dask workers"
25
- # )
26
- # @click.option(
27
- # "--cluster",
28
- # "-c",
29
- # default="",
30
- # type=click.STRING,
31
- # help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'",
32
- # )
33
- # @click.option(
34
- # "--zarr_chunks",
35
- # "-zc",
36
- # nargs=3,
37
- # default=(64, 128, 128),
38
- # type=click.INT,
39
- # help="Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)",
40
- # )
41
- # @click.option(
42
- # "--axes",
43
- # "-a",
44
- # nargs=3,
45
- # default=("z", "y", "x"),
46
- # type=str,
47
- # help="Metadata axis names. Order matters. \n Example: -a z y x",
48
- # )
49
- # @click.option(
50
- # "--translation",
51
- # "-t",
52
- # nargs=3,
53
- # default=(0.0, 0.0, 0.0),
54
- # type=float,
55
- # help="Metadata translation(offset) value. Order matters. \n Example: -t 1.0 2.0 3.0",
56
- # )
57
- # @click.option(
58
- # "--scale",
59
- # "-s",
60
- # nargs=3,
61
- # default=(1.0, 1.0, 1.0),
62
- # type=float,
63
- # help="Metadata scale value. Order matters. \n Example: -s 1.0 2.0 3.0",
64
- # )
65
- # @click.option(
66
- # "--units",
67
- # "-u",
68
- # nargs=3,
69
- # default=("nanometer", "nanometer", "nanometer"),
70
- # type=str,
71
- # help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer",
72
- # )
73
- # def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, units):
74
-
75
- # create a dask client to submit tasks
76
- #client = initialize_dask_client(cluster)
77
-
78
- def to_zarr(src : str,
79
- dest: str,
80
- client : Client,
81
- num_workers : int = 20,
82
- zarr_chunks : list[int] = [128]*3,
83
- axes : list[str] = ('z', 'y', 'x'),
84
- scale : list[float] = [1.0]*3,
85
- translation : list[float] = [0.0]*3,
86
- units: list[str] = ['nanometer']*3):
87
- if '.n5' in src:
88
- dataset = N53D(src, axes, scale, translation, units)
89
- if src.endswith(".mrc"):
90
- dataset = Mrc3D(src, axes, scale, translation, units)
91
- elif src.endswith(".tif") or src.endswith(".tiff"):
92
- dataset = Tiff3D(src, axes, scale, translation, units)
93
- if os.path.isdir(src):
94
- dataset = TiffStack(src, axes, scale, translation, units)
95
-
96
- z_store = zarr.NestedDirectoryStore(dest)
97
- z_root = zarr.open(store=z_store, mode="a")
98
- z_arr = z_root.require_dataset(
99
- name="s0",
100
- shape=dataset.shape,
101
- dtype=dataset.dtype,
102
- chunks=zarr_chunks,
103
- compressor=Zstd(level=6),
104
- )
105
-
106
- # write in parallel to zarr using dask
107
- client.cluster.scale(num_workers)
108
- dataset.write_to_zarr(z_arr, client)
109
- client.cluster.scale(0)
110
- # populate zarr metadata
111
- dataset.add_ome_metadata(z_root)
112
-
113
-
114
- # if __name__ == "__main__":
115
- # cli()
File without changes