PyPI - zarrify - Versions diffs - 0.0.2__tar.gz → 0.0.8__tar.gz - Mend

zarrify 0.0.2tar.gz → 0.0.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

zarrify-0.0.8/PKG-INFO +65 -0
zarrify-0.0.8/README.md +32 -0
zarrify-0.0.8/pyproject.toml +48 -0
zarrify-0.0.8/setup.cfg +4 -0
zarrify-0.0.8/src/zarrify/__about__.py +1 -0
zarrify-0.0.8/src/zarrify/__init__.py +4 -0
{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/formats/mrc.py +30 -18
zarrify-0.0.8/src/zarrify/formats/n5.py +217 -0
{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/formats/tiff.py +15 -5
{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/formats/tiff_stack.py +16 -6
zarrify-0.0.8/src/zarrify/to_zarr.py +163 -0
zarrify-0.0.8/src/zarrify/utils/__init__.py +0 -0
{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/utils/dask_utils.py +9 -6
{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/utils/volume.py +15 -1
zarrify-0.0.8/src/zarrify.egg-info/PKG-INFO +65 -0
zarrify-0.0.8/src/zarrify.egg-info/SOURCES.txt +20 -0
zarrify-0.0.8/src/zarrify.egg-info/dependency_links.txt +1 -0
zarrify-0.0.8/src/zarrify.egg-info/entry_points.txt +2 -0
zarrify-0.0.8/src/zarrify.egg-info/requires.txt +16 -0
zarrify-0.0.8/src/zarrify.egg-info/top_level.txt +1 -0
zarrify-0.0.2/PKG-INFO +0 -24
zarrify-0.0.2/pyproject.toml +0 -26
zarrify-0.0.2/src/zarrify/__about__.py +0 -3
zarrify-0.0.2/src/zarrify/__init__.py +0 -8
zarrify-0.0.2/src/zarrify/formats/n5.py +0 -44
zarrify-0.0.2/src/zarrify/to_zarr.py +0 -115
{zarrify-0.0.2 → zarrify-0.0.8}/LICENSE.txt +0 -0
/zarrify-0.0.2/README.md → /zarrify-0.0.8/src/zarrify/formats/__init__.py +0 -0

zarrify-0.0.8/PKG-INFO ADDED Viewed

@@ -0,0 +1,65 @@
+Metadata-Version: 2.4
+Name: zarrify
+Version: 0.0.8
+Summary: Convert scientific image formats (TIFF, MRC, N5) to OME-Zarr
+Author-email: Yurii Zubov <zubov452@gmail.com>
+License: MIT
+Keywords: zarr,ome-zarr,tiff,mrc,n5,scientific-imaging
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Topic :: Scientific/Engineering :: Image Processing
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE.txt
+Requires-Dist: click<9.0.0,>=8.1.8
+Requires-Dist: colorama<0.5.0,>=0.4.6
+Requires-Dist: dask<2025.0.0,>=2024.12.1
+Requires-Dist: dask-jobqueue==0.8.2
+Requires-Dist: imagecodecs<2025.0.0,>=2024.12.30
+Requires-Dist: mrcfile<2.0.0,>=1.5.3
+Requires-Dist: natsort<9.0.0,>=8.4.0
+Requires-Dist: numcodecs<0.16,>=0.13.0
+Requires-Dist: pydantic-zarr<0.5.0,>=0.4.0
+Requires-Dist: pint<1.0.0,>=0.20.0
+Requires-Dist: tifffile<2026.0.0,>=2025.1.10
+Requires-Dist: zarr==2.18.3
+Requires-Dist: bokeh>=3.1.0
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
+Dynamic: license-file
+# zarrify
+Convert TIFF, MRC, and N5 files to OME-Zarr format.
+## Install
+```bash
+pip install zarrify
+```
+## Usage
+```bash
+zarrify --src input.tiff --dest output.zarr --cluster local
+```
+## Python API
+```python
+import zarrify
+from zarrify.utils.dask_utils import initialize_dask_client
+client = initialize_dask_client("local")
+zarrify.to_zarr("input.tiff", "output.zarr", client)
+```
+## Supported formats
+- TIFF stacks
+- 3D TIFF files
+- MRC files
+- N5 containers

zarrify-0.0.8/README.md ADDED Viewed

@@ -0,0 +1,32 @@
+# zarrify
+Convert TIFF, MRC, and N5 files to OME-Zarr format.
+## Install
+```bash
+pip install zarrify
+```
+## Usage
+```bash
+zarrify --src input.tiff --dest output.zarr --cluster local
+```
+## Python API
+```python
+import zarrify
+from zarrify.utils.dask_utils import initialize_dask_client
+client = initialize_dask_client("local")
+zarrify.to_zarr("input.tiff", "output.zarr", client)
+```
+## Supported formats
+- TIFF stacks
+- 3D TIFF files
+- MRC files
+- N5 containers

zarrify-0.0.8/pyproject.toml ADDED Viewed

@@ -0,0 +1,48 @@
+[project]
+name = "zarrify"
+version = "0.0.8"
+description = "Convert scientific image formats (TIFF, MRC, N5) to OME-Zarr"
+authors = [
+    { name = "Yurii Zubov", email = "zubov452@gmail.com" }
+]
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "MIT" }
+keywords = ["zarr", "ome-zarr", "tiff", "mrc", "n5", "scientific-imaging"]
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Scientific/Engineering :: Image Processing",
+]
+dependencies = [
+    "click>=8.1.8,<9.0.0",
+    "colorama>=0.4.6,<0.5.0",
+    "dask>=2024.12.1,<2025.0.0",
+    "dask-jobqueue==0.8.2",
+    "imagecodecs>=2024.12.30,<2025.0.0",
+    "mrcfile>=1.5.3,<2.0.0",
+    "natsort>=8.4.0,<9.0.0",
+    "numcodecs>=0.13.0,<0.16",
+    "pydantic-zarr>=0.4.0,<0.5.0",
+    "pint>=0.20.0,<1.0.0",
+    "tifffile>=2025.1.10,<2026.0.0",
+    "zarr==2.18.3",
+    "bokeh>=3.1.0"
+]
+[project.scripts]
+zarrify = "zarrify.to_zarr:cli"
+[project.optional-dependencies]
+test = [
+    "pytest"
+]
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"

zarrify-0.0.8/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

zarrify-0.0.8/src/zarrify/__about__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.8"

zarrify-0.0.8/src/zarrify/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from zarrify.to_zarr import to_zarr
+from zarrify.__about__ import __version__
+__all__ = ["to_zarr", "__version__"]

{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/formats/mrc.py RENAMED Viewed

@@ -7,7 +7,10 @@ from dask.distributed import Client, wait
 from toolz import partition_all
 import time
 from zarrify.utils.volume import Volume
+from abc import ABCMeta
+from numcodecs import Zstd
+import logging
+import copy
 class Mrc3D(Volume):
@@ -28,25 +31,16 @@ class Mrc3D(Volume):
         self.memmap = mrcfile.mmap(self.src_path, mode="r")
         self.ndim = self.memmap.data.ndim
-        self.shape = self.memmap.shape
+        self.shape = self.memmap.data.shape
         self.dtype = self.memmap.data.dtype
-    def save_chunk(self, z_arr: zarr.Array, chunk_slice: Tuple[slice, ...]):
-        """Copies data from a particular part of the input mrc array into a specific chunk of the output zarr array.
-        Args:
-            z_arr (zarr.core.Array): output zarr array object
-            chunk_slice (Tuple[slice, ...]): slice of the mrc array to copy.
-        """
-        mrc_file = mrcfile.mmap(self.src_path, mode="r")
-        if not (mrc_file.data[chunk_slice] == 0).all():
-            z_arr[chunk_slice] = mrc_file.data[chunk_slice]
     def write_to_zarr(
         self,
-        z_arr: zarr.Array,
+        dest: str,
         client: Client,
+        zarr_chunks : list[int],
+        comp : ABCMeta = Zstd(level=6),
     ):
         """Use mrcfile memmap to access small parts of the mrc file and write them into zarr chunks.
@@ -54,7 +48,13 @@ class Mrc3D(Volume):
             dest_path (str): path to the zarr group where the output dataset is stored.
             client (Client): instance of a dask client
         """
+        logging.basicConfig(level=logging.INFO,
+                         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+        src_path = copy.copy(self.src_path)
+        z_arr = self.get_output_array(dest, zarr_chunks, comp)
         out_slices = slices_from_chunks(
             normalize_chunks(z_arr.chunks, shape=z_arr.shape)
         )
@@ -62,12 +62,24 @@ class Mrc3D(Volume):
         for idx, part in enumerate(out_slices_partitioned):
-            print(f"{idx + 1} / {len(out_slices_partitioned)}")
+            logging.info(f"{idx + 1} / {len(out_slices_partitioned)}")
             start = time.time()
-            fut = client.map(lambda v: self.save_chunk(z_arr, v), part)
-            print(
+            fut = client.map(lambda v: save_chunk(src_path, z_arr, v), part)
+            logging.info(
                 f"Submitted {len(part)} tasks to the scheduler in {time.time()- start}s"
             )
             # wait for all the futures to complete
             result = wait(fut)
-            print(f"Completed {len(part)} tasks in {time.time() - start}s")
+            logging.info(f"Completed {len(part)} tasks in {time.time() - start}s")
+def save_chunk(src_path, z_arr: zarr.Array, chunk_slice: Tuple[slice, ...]):
+    """Copies data from a particular part of the input mrc array into a specific chunk of the output zarr array.
+    Args:
+        z_arr (zarr.core.Array): output zarr array object
+        chunk_slice (Tuple[slice, ...]): slice of the mrc array to copy.
+    """
+    mrc_file = mrcfile.mmap(src_path, mode="r")
+    if not (mrc_file.data[chunk_slice] == 0).all():
+        z_arr[chunk_slice] = mrc_file.data[chunk_slice]

zarrify-0.0.8/src/zarrify/formats/n5.py ADDED Viewed

@@ -0,0 +1,217 @@
+import zarr
+import os
+from zarrify.utils.volume import Volume
+import json
+from itertools import chain
+import natsort
+from operator import itemgetter
+import pydantic_zarr as pz
+import dask.array as da
+from dask.array.core import slices_from_chunks, normalize_chunks
+from toolz import partition_all
+from dask.distributed import Client, wait
+from typing import Tuple
+import time
+import numpy as np
+import logging
+import pint
+from abc import ABCMeta
+from numcodecs import Zstd
+class N5Group(Volume):
+    def __init__(
+        self,
+        src_path: str,
+        axes: list[str],
+        scale: list[float],
+        translation: list[float],
+        units: list[str],
+    ):
+        """Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr.
+        Args:
+            input_filepath (str): path to source tiff file.
+        """
+        super().__init__(src_path, axes, scale, translation, units)
+        self.store_path, self.path = self.separate_store_path(src_path, '')
+        self.n5_store = zarr.N5Store(self.store_path)
+        self.n5_obj = zarr.open(store = self.n5_store, path=self.path, mode='r')
+    def separate_store_path(self,
+                            store : str,
+                            path : str):
+        """
+        sometimes you can pass a total os path to node, leading to
+        an empty('') node.path attribute.
+        the correct way is to separate path to container(.n5, .zarr)
+        from path to array within a container.
+        Args:
+            store (string): path to store
+            path (string): path array/group (.n5 or .zarr)
+        Returns:
+            (string, string): returns regularized store and group/array path
+        """
+        new_store, path_prefix = os.path.split(store)
+        if ".n5" in path_prefix:
+            return store, path
+        return self.separate_store_path(new_store, os.path.join(path_prefix, path))
+    #creates attributes.json recursively within n5 group, if missing
+    def reconstruct_json(self,
+                         n5src : str):
+        dir_list = os.listdir(n5src)
+        if "attributes.json" not in dir_list:
+            with open(os.path.join(n5src,"attributes.json"), "w") as jfile:
+                dict = {"n5": "2.0.0"}
+                jfile.write(json.dumps(dict, indent=4))
+        for obj in dir_list:
+            if os.path.isdir(os.path.join(n5src, obj)):
+                self.reconstruct_json(os.path.join(n5src, obj))
+    def apply_ome_template(self, zgroup : zarr.Group):
+        z_attrs = {"multiscales": [{}]}
+        # normalize input units, i.e. 'meter' or 'm'-> 'meter'
+        ureg = pint.UnitRegistry()
+        units_list = [ureg.Unit(unit) for unit in zgroup.attrs['units']]
+        #populate .zattrs
+        z_attrs['multiscales'][0]['axes'] = [{"name": axis,
+                                            "type": "space",
+                                            "unit": unit} for (axis, unit) in zip(zgroup.attrs['axes'],
+                                                                                    units_list)]
+        z_attrs['multiscales'][0]['version'] = '0.4'
+        z_attrs['multiscales'][0]['name'] = zgroup.name
+        z_attrs['multiscales'][0]['coordinateTransformations'] = [{"type": "scale",
+                        "scale": [1.0, 1.0, 1.0]}, {"type" : "translation", "translation" : [1.0, 1.0, 1.0]}]
+        return z_attrs
+    def normalize_to_omengff(self,
+                             zgroup : zarr.Group):
+        group_keys = zgroup.keys()
+        for key in chain(group_keys, '/'):
+            if isinstance(zgroup[key], zarr.hierarchy.Group):
+                if key!='/':
+                    self.normalize_to_omengff(zgroup[key])
+                if 'scales' in zgroup[key].attrs.asdict():
+                    zattrs = self.apply_ome_template(zgroup[key])
+                    zarrays = zgroup[key].arrays(recurse=True)
+                    unsorted_datasets = []
+                    for arr in zarrays:
+                        unsorted_datasets.append(self.ome_dataset_metadata(arr[1], zgroup[key]))
+                    #1.apply natural sort to organize datasets metadata array for different resolution degrees (s0 -> s10)
+                    #2.add datasets metadata to the omengff template
+                    zattrs['multiscales'][0]['datasets'] = natsort.natsorted(unsorted_datasets, key=itemgetter(*['path']))
+                    zgroup[key].attrs['multiscales'] = zattrs['multiscales']
+    def ome_dataset_metadata(n5arr : zarr.Array,
+                             group : zarr.Group):
+        arr_attrs_n5 = n5arr.attrs['transform']
+        dataset_meta =  {
+                        "path": os.path.relpath(n5arr.path, group.path),
+                        "coordinateTransformations": [{
+                            'type': 'scale',
+                            'scale': arr_attrs_n5['scale']},{
+                            'type': 'translation',
+                            'translation' : arr_attrs_n5['translate']
+                        }]}
+        return dataset_meta
+    # d=groupspec.to_dict(),
+    def normalize_groupspec(self, d, comp):
+        for k,v in d.items():
+            if k == "compressor":
+                d[k] = comp.get_config()
+            elif k == 'dimension_separator':
+                d[k] = '/'
+            elif isinstance(v,  dict):
+                self.normalize_groupspec(v, comp)
+    def copy_n5_tree(self, n5_root, z_store, comp):
+        spec_n5 = pz.GroupSpec.from_zarr(n5_root)
+        spec_n5_dict = spec_n5.dict()
+        self.normalize_groupspec(spec_n5_dict, comp)
+        spec_n5 = pz.GroupSpec(**spec_n5_dict)
+        return spec_n5.to_zarr(z_store, path= '')
+    #creates attributes.json, if missing
+    def reconstruct_json(self, n5src):
+        dir_list = os.listdir(n5src)
+        if "attributes.json" not in dir_list:
+            with open(os.path.join(n5src,"attributes.json"), "w") as jfile:
+                dict = {"n5": "2.0.0"}
+                jfile.write(json.dumps(dict, indent=4))
+        for obj in dir_list:
+            if os.path.isdir(os.path.join(n5src, obj)):
+                self.reconstruct_json(os.path.join(n5src, obj))
+    def save_chunk(
+        self,
+        source: zarr.Array,
+        dest: zarr.Array,
+        out_slices: Tuple[slice, ...],
+        invert: bool):
+        in_slices = tuple(out_slice for out_slice in out_slices)
+        source_data = source[in_slices]
+        # only store source_data if it is not all 0s
+        if not (source_data == 0).all():
+            if invert == True:
+                dest[out_slices] = np.invert(source_data)
+            else:
+                dest[out_slices] = source_data
+        return 1
+    def write_to_zarr(
+        self,
+        dest: str,
+        client: Client,
+        zarr_chunks : list[int],
+        comp : ABCMeta = Zstd(level=6),
+    ):
+        logging.basicConfig(level=logging.INFO,
+                         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+        self.reconstruct_json(os.path.join(self.store_path, self.path))
+        # input n5 arrays list to convert
+        n5_root = zarr.open_group(self.n5_store, mode = 'r')
+        zarr_arrays = n5_root.arrays(recurse=True)
+        # copy input n5 tree structure to output zarr and add ome-metadata, when N5 metadata is present
+        z_store = zarr.NestedDirectoryStore(dest)
+        zg = self.copy_n5_tree(n5_root, z_store, comp)
+        self.normalize_to_omengff(zg)
+        for item in zarr_arrays:
+            n5arr = item[1]
+            dest_arr = zarr.open_array(os.path.join(dest, n5arr.path), mode='a')
+            out_slices = slices_from_chunks(normalize_chunks(dest_arr.chunks, shape=dest_arr.shape))
+            # break the slices up into batches, to make things easier for the dask scheduler
+            out_slices_partitioned = tuple(partition_all(100000, out_slices))
+            for idx, part in enumerate(out_slices_partitioned):
+                logging.info(f'{idx + 1} / {len(out_slices_partitioned)}')
+                start = time.time()
+                fut = client.map(lambda v: self.save_chunk(n5arr, dest_arr, v, invert=False), part)
+                logging.info(f'Submitted {len(part)} tasks to the scheduler in {time.time()- start}s')
+                # wait for all the futures to complete
+                result = wait(fut)
+                logging.info(f'Completed {len(part)} tasks in {time.time() - start}s')

{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/formats/tiff.py RENAMED Viewed

@@ -7,6 +7,9 @@ import time
 import dask.array as da
 import copy
 from zarrify.utils.volume import Volume
+from abc import ABCMeta
+from numcodecs import Zstd
+import logging
 class Tiff3D(Volume):
@@ -32,22 +35,29 @@ class Tiff3D(Volume):
         self.shape = self.zarr_arr.shape
         self.dtype = self.zarr_arr.dtype
-    def write_to_zarr(self, zarray: zarr.Group, client: Client):
-        chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0])
+    def write_to_zarr(self,
+        dest: str,
+        client: Client,
+        zarr_chunks : list[int],
+        comp : ABCMeta = Zstd(level=6),
+        ):
+        z_arr = self.get_output_array(dest, zarr_chunks, comp)
+        chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
         src_path = copy.copy(self.src_path)
         start = time.time()
         fut = client.map(
-            lambda v: write_volume_slab_to_zarr(v, zarray, src_path), chunks_list
+            lambda v: write_volume_slab_to_zarr(v, z_arr, src_path), chunks_list
         )
-        print(
+        logging.info(
             f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
         )
         # wait for all the futures to complete
         result = wait(fut)
-        print(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
+        logging.info(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
         return 0

{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/formats/tiff_stack.py RENAMED Viewed

@@ -8,6 +8,9 @@ import dask.array as da
 from natsort import natsorted
 from glob import glob
 from zarrify.utils.volume import Volume
+from abc import ABCMeta
+from numcodecs import Zstd
+import logging
 class TiffStack(Volume):
@@ -54,7 +57,7 @@ class TiffStack(Volume):
             try:
                 image_tile = imread(src_volume[slab_index])
             except:
-                print(
+                logging.info(
                     f"Tiff tile with index {slab_index} is not present in tiff stack."
                 )
             np_slab[slab_index - chunk_num, :, :] = image_tile
@@ -63,20 +66,27 @@ class TiffStack(Volume):
         zarray[chunk_num : chunk_num + zarray.chunks[0], :, :] = np_slab
     # parallel writing of tiff stack into zarr array
-    def write_to_zarr(self, zarray: zarr.Array, client: Client):
-        chunks_list = np.arange(0, zarray.shape[0], zarray.chunks[0])
+    def write_to_zarr(self,
+        dest: str,
+        client: Client,
+        zarr_chunks : list[int],
+        comp : ABCMeta = Zstd(level=6),
+        ):
+        z_arr = self.get_output_array(dest, zarr_chunks, comp)
+        chunks_list = np.arange(0, z_arr.shape[0], z_arr.chunks[0])
         start = time.time()
         fut = client.map(
-            lambda v: self.write_tile_slab_to_zarr(v, zarray, self.stack_list),
+            lambda v: self.write_tile_slab_to_zarr(v, z_arr, self.stack_list),
             chunks_list,
         )
-        print(
+        logging.info(
             f"Submitted {len(chunks_list)} tasks to the scheduler in {time.time()- start}s"
         )
         # wait for all the futures to complete
         result = wait(fut)
-        print(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
+        logging.info(f"Completed {len(chunks_list)} tasks in {time.time() - start}s")
         return 0

zarrify-0.0.8/src/zarrify/to_zarr.py ADDED Viewed

@@ -0,0 +1,163 @@
+import zarr
+from numcodecs import Zstd
+from pathlib import Path
+import click
+import sys
+from dask.distributed import Client
+import time
+from zarrify.formats.tiff_stack import TiffStack
+from zarrify.formats.tiff import Tiff3D
+from zarrify.formats.mrc import Mrc3D
+from zarrify.formats.n5 import N5Group
+from zarrify.utils.dask_utils import initialize_dask_client
+from typing import Union
+def init_dataset(src :str,
+                 axes : list[str],
+                 scale : list[float],
+                 translation : list[float],
+                 units : list[str]) -> Union[TiffStack, Tiff3D, N5Group, Mrc3D]:
+    """Returns an instance of a dataset class (TiffStack, N5Group, Mrc3D, or Tiff3D), depending on the input file format.
+    Args:
+        src (str): source file/container location
+        axes (list[str]): axis order (for ome-zarr metadata)
+        scale (list[float]): voxel size (for ome-zarr metadata)
+        translation (list[float]): offset (for ome-zarr metadata)
+        units (list[str]): physical units (for ome-zarr metadata)
+    Raises:
+        ValueError: return value error if the input file format not in the list.
+    Returns:
+        Union[TiffStack, Tiff3D, N5Group, Mrc3D]: return a file format object depending on the input file format.
+        \n All different file formats objects have identical instance methods (write_to_zarr, add_ome_metadata) to emulate API abstraction.
+    """
+    src_path = Path(src)
+    params = (src, axes, scale, translation, units)
+    ext = src_path.suffix.lower()
+    if ext=='.n5':
+        return N5Group(*params)
+    elif src_path.is_dir():
+        return TiffStack(*params)
+    elif ext == ".mrc":
+        return Mrc3D(*params)
+    elif ext in (".tif", ".tiff"):
+        return Tiff3D(*params)
+    raise ValueError(f"Unsupported source type: {src}")
+def to_zarr(src : str,
+            dest: str,
+            client : Client,
+            num_workers : int = 20,
+            zarr_chunks : list[int] = [128]*3,
+            axes : list[str] = ['z', 'y', 'x'],
+            scale : list[float] = [1.0,]*3,
+            translation : list[float] = [0.0,]*3,
+            units: list[str] = ['nanometer',]*3):
+    """Convert Tiff stack, 3D Tiff, N5, or MRC file to OME-Zarr.
+    Args:
+        src (str): input data location.
+        dest (str): output zarr group location.
+        client (Client): dask client instance.
+        num_workers (int, optional): Number of dask workers. Defaults to 20.
+        zarr_chunks (list[int], optional): _description_. Defaults to [128,]*3.
+        axes (list[str], optional): axis order. Defaults to ['z', 'y', 'x'].
+        scale (list[float], optional): voxel size (in physical units). Defaults to [1.0,]*3.
+        translation (list[float], optional): offset (in physical units). Defaults to [0.0,]*3.
+        units (list[str], optional): physical units. Defaults to ['nanometer']*3.
+    """
+    dataset = init_dataset(src, axes, scale, translation, units)
+    # write in parallel to zarr using dask
+    client.cluster.scale(num_workers)
+    dataset.write_to_zarr(dest, client, zarr_chunks)
+    client.cluster.scale(0)
+    # populate zarr metadata
+    dataset.add_ome_metadata(dest)
+@click.command("zarrify")
+@click.option(
+    "--src",
+    "-s",
+    type=click.Path(exists=True),
+    help="Input file/directory location",
+)
+@click.option("--dest", "-d", type=click.STRING, help="Output .zarr file path.")
+@click.option(
+    "--num_workers", "-w", default=100, type=click.INT, help="Number of dask workers"
+)
+@click.option(
+    "--cluster",
+    "-c",
+    default=None,
+    type=click.STRING,
+    help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'",
+)
+@click.option(
+    "--zarr_chunks",
+    "-zc",
+    nargs=3,
+    default=(64, 128, 128),
+    type=click.INT,
+    help="Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)",
+)
+@click.option(
+    "--axes",
+    "-a",
+    nargs=3,
+    default=("z", "y", "x"),
+    type=str,
+    help="Metadata axis names. Order matters. \n Example: -a z y x",
+)
+@click.option(
+    "--translation",
+    "-t",
+    nargs=3,
+    default=(0.0, 0.0, 0.0),
+    type=float,
+    help="Metadata translation(offset) value. Order matters. \n Example: -t 1.0 2.0 3.0",
+)
+@click.option(
+    "--scale",
+    "-sc",
+    nargs=3,
+    default=(1.0, 1.0, 1.0),
+    type=float,
+    help="Metadata scale value. Order matters. \n Example: --scale 1.0 2.0 3.0",
+)
+@click.option(
+    "--units",
+    "-u",
+    nargs=3,
+    default=("nanometer", "nanometer", "nanometer"),
+    type=str,
+    help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer",
+)
+def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, units):
+    # create a dask client to submit tasks
+    client = initialize_dask_client(cluster)
+    # convert src dataset(n5, tiff, mrc) to zarr ome dataset
+    to_zarr(src,
+            dest,
+            client,
+            num_workers,
+            zarr_chunks,
+            axes,
+            scale,
+            translation,
+            units)
+if __name__ == "__main__":
+    cli()

zarrify-0.0.8/src/zarrify/utils/__init__.py ADDED Viewed

File without changes

{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/utils/dask_utils.py RENAMED Viewed

@@ -2,9 +2,10 @@ from dask_jobqueue import LSFCluster
 from dask.distributed import Client, LocalCluster
 import os
 import sys
+import logging
-def initialize_dask_client(cluster_type: str) -> Client:
+def initialize_dask_client(cluster_type: str | None = None) -> Client:
     """Initialize dask client.
     Args:
@@ -13,9 +14,8 @@ def initialize_dask_client(cluster_type: str) -> Client:
     Returns:
         (Client): instance of a dask client
     """
-    if cluster_type == "":
-        print("Did not specify which instance of the dask client to use!")
-        sys.exit(0)
+    if cluster_type == None:
+        raise ValueError("Cluster type must be specified")
     elif cluster_type == "lsf":
         num_cores = 1
         cluster = LSFCluster(
@@ -29,11 +29,14 @@ def initialize_dask_client(cluster_type: str) -> Client:
         )
     elif cluster_type == "local":
         cluster = LocalCluster()
+    else:
+        raise ValueError(f"Unsupported cluster type: {cluster_type}")
     client = Client(cluster)
+    print(str(client.dashboard_link))
     with open(
         os.path.join(os.getcwd(), "dask_dashboard_link" + ".txt"), "w"
     ) as text_file:
         text_file.write(str(client.dashboard_link))
-    print(client.dashboard_link)
-    return client
+    logging.info(client.dashboard_link)
+    return client

{zarrify-0.0.2 → zarrify-0.0.8}/src/zarrify/utils/volume.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import zarr
+from abc import ABCMeta
 class Volume:
@@ -18,13 +19,26 @@ class Volume:
             "scale": scale,
             "units": units,
         }
+    def get_output_array(self, dest: str, chunks: list[int], comp: ABCMeta) -> zarr.Array:
+        z_store = zarr.NestedDirectoryStore(dest)
+        z_root = zarr.open(store=z_store, mode="a")
+        return z_root.require_dataset(
+            name="s0",
+            shape=self.shape,
+            dtype=self.dtype,
+            chunks=chunks,
+            compressor=comp,
+        )
-    def add_ome_metadata(self, root: zarr.Group):
+    def add_ome_metadata(self, dest: str):
         """Add selected tiff metadata to zarr attributes file (.zattrs).
         Args:
             root (zarr.Group): root group of the output zarr array
         """
+        root = zarr.open(dest, mode = 'a')
         # json template for a multiscale structure
         z_attrs: dict = {"multiscales": [{}]}
         z_attrs["multiscales"][0]["axes"] = [

zarrify-0.0.8/src/zarrify.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,65 @@
+Metadata-Version: 2.4
+Name: zarrify
+Version: 0.0.8
+Summary: Convert scientific image formats (TIFF, MRC, N5) to OME-Zarr
+Author-email: Yurii Zubov <zubov452@gmail.com>
+License: MIT
+Keywords: zarr,ome-zarr,tiff,mrc,n5,scientific-imaging
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Topic :: Scientific/Engineering :: Image Processing
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE.txt
+Requires-Dist: click<9.0.0,>=8.1.8
+Requires-Dist: colorama<0.5.0,>=0.4.6
+Requires-Dist: dask<2025.0.0,>=2024.12.1
+Requires-Dist: dask-jobqueue==0.8.2
+Requires-Dist: imagecodecs<2025.0.0,>=2024.12.30
+Requires-Dist: mrcfile<2.0.0,>=1.5.3
+Requires-Dist: natsort<9.0.0,>=8.4.0
+Requires-Dist: numcodecs<0.16,>=0.13.0
+Requires-Dist: pydantic-zarr<0.5.0,>=0.4.0
+Requires-Dist: pint<1.0.0,>=0.20.0
+Requires-Dist: tifffile<2026.0.0,>=2025.1.10
+Requires-Dist: zarr==2.18.3
+Requires-Dist: bokeh>=3.1.0
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
+Dynamic: license-file
+# zarrify
+Convert TIFF, MRC, and N5 files to OME-Zarr format.
+## Install
+```bash
+pip install zarrify
+```
+## Usage
+```bash
+zarrify --src input.tiff --dest output.zarr --cluster local
+```
+## Python API
+```python
+import zarrify
+from zarrify.utils.dask_utils import initialize_dask_client
+client = initialize_dask_client("local")
+zarrify.to_zarr("input.tiff", "output.zarr", client)
+```
+## Supported formats
+- TIFF stacks
+- 3D TIFF files
+- MRC files
+- N5 containers

zarrify-0.0.8/src/zarrify.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,20 @@
+LICENSE.txt
+README.md
+pyproject.toml
+src/zarrify/__about__.py
+src/zarrify/__init__.py
+src/zarrify/to_zarr.py
+src/zarrify.egg-info/PKG-INFO
+src/zarrify.egg-info/SOURCES.txt
+src/zarrify.egg-info/dependency_links.txt
+src/zarrify.egg-info/entry_points.txt
+src/zarrify.egg-info/requires.txt
+src/zarrify.egg-info/top_level.txt
+src/zarrify/formats/__init__.py
+src/zarrify/formats/mrc.py
+src/zarrify/formats/n5.py
+src/zarrify/formats/tiff.py
+src/zarrify/formats/tiff_stack.py
+src/zarrify/utils/__init__.py
+src/zarrify/utils/dask_utils.py
+src/zarrify/utils/volume.py

zarrify-0.0.8/src/zarrify.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

zarrify-0.0.8/src/zarrify.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ zarrify = zarrify.to_zarr:cli

zarrify-0.0.8/src/zarrify.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,16 @@
+click<9.0.0,>=8.1.8
+colorama<0.5.0,>=0.4.6
+dask<2025.0.0,>=2024.12.1
+dask-jobqueue==0.8.2
+imagecodecs<2025.0.0,>=2024.12.30
+mrcfile<2.0.0,>=1.5.3
+natsort<9.0.0,>=8.4.0
+numcodecs<0.16,>=0.13.0
+pydantic-zarr<0.5.0,>=0.4.0
+pint<1.0.0,>=0.20.0
+tifffile<2026.0.0,>=2025.1.10
+zarr==2.18.3
+bokeh>=3.1.0
+[test]
+pytest

zarrify-0.0.8/src/zarrify.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ zarrify

zarrify-0.0.2/PKG-INFO DELETED Viewed

@@ -1,24 +0,0 @@
-Metadata-Version: 2.3
-Name: zarrify
-Version: 0.0.2
-Summary:
-Author: Yurii Zubov
-Author-email: zubov452@gmail.com
-Requires-Python: >=3.10
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Requires-Dist: click (>=8.1.8,<9.0.0)
-Requires-Dist: colorama (>=0.4.6,<0.5.0)
-Requires-Dist: dask (>=2024.12.1,<2025.0.0)
-Requires-Dist: dask-jobqueue (==0.8.2)
-Requires-Dist: imagecodecs (>=2024.12.30,<2025.0.0)
-Requires-Dist: mrcfile (>=1.5.3,<2.0.0)
-Requires-Dist: natsort (>=8.4.0,<9.0.0)
-Requires-Dist: tifffile (>=2025.1.10,<2026.0.0)
-Requires-Dist: zarr (==2.16.1)
-Description-Content-Type: text/markdown

zarrify-0.0.2/pyproject.toml DELETED Viewed

@@ -1,26 +0,0 @@
-[project]
-name = "zarrify"
-version = "0.0.2"
-description = ""
-authors = [
-    {name = "Yurii Zubov",email = "zubov452@gmail.com"}
-]
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = [
-    "zarr (==2.16.1)",
-    "mrcfile (>=1.5.3,<2.0.0)",
-    "dask-jobqueue (==0.8.2)",
-    "colorama (>=0.4.6,<0.5.0)",
-    "imagecodecs (>=2024.12.30,<2025.0.0)",
-    "dask (>=2024.12.1,<2025.0.0)",
-    "tifffile (>=2025.1.10,<2026.0.0)",
-    "natsort (>=8.4.0,<9.0.0)",
-    "click (>=8.1.8,<9.0.0)"
-]
-[project.scripts]
-zarrify = "zarrify.to_zarr:cli"
-[build-system]
-requires = ["poetry-core>=2.0.0,<3.0.0"]
-build-backend = "poetry.core.masonry.api"

zarrify-0.0.2/src/zarrify/__about__.py DELETED Viewed

@@ -1,3 +0,0 @@
-# SPDX-FileCopyrightText: 2025-present Yurii Zubov <zubov452@gmail.com>
-__version__ = "0.0.1"

zarrify-0.0.2/src/zarrify/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-# SPDX-FileCopyrightText: 2025-present Yurii Zubov <zubov452@gmail.com>
-#
-# SPDX-License-Identifier: MIT
-from zarrify import to_zarr
-__all__ = ["to_zarr"]

zarrify-0.0.2/src/zarrify/formats/n5.py DELETED Viewed

@@ -1,44 +0,0 @@
-import zarr
-import os
-class N53D(Volume):
-    def __init__(
-        self,
-        src_path: str,
-        axes: list[str],
-        scale: list[float],
-        translation: list[float],
-        units: list[str],
-    ):
-        """Construct all the necessary attributes for the proper conversion of tiff to OME-NGFF Zarr.
-        Args:
-            input_filepath (str): path to source tiff file.
-        """
-        super().__init__(src_path, axes, scale, translation, units)
-        self.store_path, self.arr_path = self.separate_store_path(src_path, '')
-        self.n5_store = zarr.N5Store(self.store_path)
-        self.n5_arr = zarr.open(store = self.n5_store, path=self.arr_path, mode='r')
-        self.shape = self.n5_arr.shape
-        self.dtype = self.n5_arr.dtype
-        self.chunks = self.n5_arr.chunks
-    def separate_store_path(store, path):
-        """
-        sometimes you can pass a total os path to node, leading to
-        an empty('') node.path attribute.
-        the correct way is to separate path to container(.n5, .zarr)
-        from path to array within a container.
-        Args:
-            store (string): path to store
-            path (string): path array/group (.n5 or .zarr)
-        Returns:
-            (string, string): returns regularized store and group/array path
-        """
-        new_store, path_prefix = os.path.split(store)
-        if ".n5" in path_prefix:
-            return store, path
-        return separate_store_path(new_store, os.path.join(path_prefix, path))

zarrify-0.0.2/src/zarrify/to_zarr.py DELETED Viewed

@@ -1,115 +0,0 @@
-import zarr
-from numcodecs import Zstd
-import os
-import click
-import sys
-from dask.distributed import Client
-import time
-from zarrify.formats.tiff_stack import TiffStack
-from zarrify.formats.tiff import Tiff3D
-from zarrify.formats.mrc import Mrc3D
-from zarrify.formats.n5 import N53D
-from zarrify.utils.dask_utils import initialize_dask_client
-# @click.command("zarrify")
-# @click.option(
-#     "--src",
-#     "-s",
-#     type=click.Path(exists=True),
-#     help="Input file/directory location",
-# )
-# @click.option("--dest", "-s", type=click.STRING, help="Output .zarr file path.")
-# @click.option(
-#     "--num_workers", "-w", default=100, type=click.INT, help="Number of dask workers"
-# )
-# @click.option(
-#     "--cluster",
-#     "-c",
-#     default="",
-#     type=click.STRING,
-#     help="Which instance of dask client to use. Local client - 'local', cluster 'lsf'",
-# )
-# @click.option(
-#     "--zarr_chunks",
-#     "-zc",
-#     nargs=3,
-#     default=(64, 128, 128),
-#     type=click.INT,
-#     help="Chunk size for (z, y, x) axis order. z-axis is normal to the tiff stack plane. Default (64, 128, 128)",
-# )
-# @click.option(
-#     "--axes",
-#     "-a",
-#     nargs=3,
-#     default=("z", "y", "x"),
-#     type=str,
-#     help="Metadata axis names. Order matters. \n Example: -a z y x",
-# )
-# @click.option(
-#     "--translation",
-#     "-t",
-#     nargs=3,
-#     default=(0.0, 0.0, 0.0),
-#     type=float,
-#     help="Metadata translation(offset) value. Order matters. \n Example: -t 1.0 2.0 3.0",
-# )
-# @click.option(
-#     "--scale",
-#     "-s",
-#     nargs=3,
-#     default=(1.0, 1.0, 1.0),
-#     type=float,
-#     help="Metadata scale value. Order matters. \n Example: -s 1.0 2.0 3.0",
-# )
-# @click.option(
-#     "--units",
-#     "-u",
-#     nargs=3,
-#     default=("nanometer", "nanometer", "nanometer"),
-#     type=str,
-#     help="Metadata unit names. Order matters. \n Example: -t nanometer nanometer nanometer",
-# )
-# def cli(src, dest, num_workers, cluster, zarr_chunks, axes, translation, scale, units):
-    # create a dask client to submit tasks
-#client = initialize_dask_client(cluster)
-def to_zarr(src : str,
-            dest: str,
-            client : Client,
-            num_workers : int = 20,
-            zarr_chunks : list[int] = [128]*3,
-            axes : list[str] = ('z', 'y', 'x'),
-            scale : list[float] = [1.0]*3,
-            translation : list[float] = [0.0]*3,
-            units: list[str] = ['nanometer']*3):
-    if '.n5' in src:
-        dataset = N53D(src, axes, scale, translation, units)
-    if src.endswith(".mrc"):
-        dataset = Mrc3D(src, axes, scale, translation, units)
-    elif src.endswith(".tif") or src.endswith(".tiff"):
-        dataset = Tiff3D(src, axes, scale, translation, units)
-    if os.path.isdir(src):
-        dataset = TiffStack(src, axes, scale, translation, units)
-    z_store = zarr.NestedDirectoryStore(dest)
-    z_root = zarr.open(store=z_store, mode="a")
-    z_arr = z_root.require_dataset(
-        name="s0",
-        shape=dataset.shape,
-        dtype=dataset.dtype,
-        chunks=zarr_chunks,
-        compressor=Zstd(level=6),
-    )
-    # write in parallel to zarr using dask
-    client.cluster.scale(num_workers)
-    dataset.write_to_zarr(z_arr, client)
-    client.cluster.scale(0)
-    # populate zarr metadata
-    dataset.add_ome_metadata(z_root)
-# if __name__ == "__main__":
-#     cli()

{zarrify-0.0.2 → zarrify-0.0.8}/LICENSE.txt RENAMED Viewed

File without changes

/zarrify-0.0.2/README.md → /zarrify-0.0.8/src/zarrify/formats/__init__.py RENAMED Viewed

File without changes

zarrify 0.0.2__tar.gz → 0.0.8__tar.gz

zarrify 0.0.2tar.gz → 0.0.8tar.gz