zarr-n5 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zarr_n5-0.1.0/PKG-INFO +60 -0
- zarr_n5-0.1.0/README.md +50 -0
- zarr_n5-0.1.0/pyproject.toml +37 -0
- zarr_n5-0.1.0/src/zarr_n5/__init__.py +13 -0
- zarr_n5-0.1.0/src/zarr_n5/cli/__init__.py +1 -0
- zarr_n5-0.1.0/src/zarr_n5/cli/convert.py +33 -0
- zarr_n5-0.1.0/src/zarr_n5/codec/__init__.py +5 -0
- zarr_n5-0.1.0/src/zarr_n5/codec/default.py +262 -0
- zarr_n5-0.1.0/src/zarr_n5/constants.py +7 -0
- zarr_n5-0.1.0/src/zarr_n5/convert.py +103 -0
- zarr_n5-0.1.0/src/zarr_n5/metadata.py +194 -0
- zarr_n5-0.1.0/src/zarr_n5/py.typed +0 -0
- zarr_n5-0.1.0/src/zarr_n5/storage/__init__.py +8 -0
- zarr_n5-0.1.0/src/zarr_n5/storage/implicit.py +78 -0
- zarr_n5-0.1.0/src/zarr_n5/storage/n5.py +159 -0
- zarr_n5-0.1.0/src/zarr_n5/util.py +115 -0
zarr_n5-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: zarr-n5
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Utilities for accessing N5 data through zarr v3.
|
|
5
|
+
Author: Chris Barnes
|
|
6
|
+
Author-email: Chris Barnes <chris.barnes@gerbi-gmb.de>
|
|
7
|
+
Requires-Dist: zarr>=3.1.5
|
|
8
|
+
Requires-Python: >=3.12, <4.0
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# zarr-python-n5
|
|
12
|
+
|
|
13
|
+
[N5](https://github.com/saalfeldlab/n5) utilities for [zarr-python](https://github.com/zarr-developers/zarr-python).
|
|
14
|
+
|
|
15
|
+
- Documentation: <https://zarr-python-n5.readthedocs.io>
|
|
16
|
+
|
|
17
|
+
## Codecs
|
|
18
|
+
|
|
19
|
+
### N5 Default Codec
|
|
20
|
+
|
|
21
|
+
[As described here](https://github.com/zarr-developers/zarr-extensions/tree/main/codecs/n5_default).
|
|
22
|
+
|
|
23
|
+
Only whole-chunk reading is supported.
|
|
24
|
+
|
|
25
|
+
#### N5 Compressor support
|
|
26
|
+
|
|
27
|
+
| N5 compressor | Supported | Zarr bytes-to-bytes codec | Notes |
|
|
28
|
+
| ------------- | --------- | ------------------------- | ----- |
|
|
29
|
+
| `raw` | yes | n/a | Equivalent to omitted bytes-to-bytes codec |
|
|
30
|
+
| `blosc` | yes | `blosc` | |
|
|
31
|
+
| `gzip` | yes | `gzip` | |
|
|
32
|
+
| `zstd` | yes | `zstd` | |
|
|
33
|
+
| `lz4` | no | | [Incompatible codecs](https://github.com/zarr-developers/numcodecs/issues/175) |
|
|
34
|
+
| `xz` | no | | No equivalent Zarr codec |
|
|
35
|
+
| `jpeg` | no | | Needs [N5 documentation](https://github.com/saalfeldlab/n5-jpeg/issues/1), [Zarr codec](https://github.com/zarr-developers/zarr-extensions/issues/15) |
|
|
36
|
+
| `bzip2` | no | | No equivalent Zarr codec |
|
|
37
|
+
|
|
38
|
+
## Stores
|
|
39
|
+
|
|
40
|
+
`N5WrapperStore` allows reading N5 data with DEFAULT-mode blocks through any Zarr store by converting metadata on the fly.
|
|
41
|
+
By default, this does not replicate the N5 behaviour of inferring an empty group where a metadata document does not exist.
|
|
42
|
+
To achieve this, wrap it in the provided `ImplicitGroupWrapperStore`.
|
|
43
|
+
|
|
44
|
+
## Tools
|
|
45
|
+
|
|
46
|
+
This package provides `n5tozarr`, a command-line interface for converting N5 data to Zarr in-place.
|
|
47
|
+
The N5 metadata are left untouched, and no chunk data is altered, moved, or copied.
|
|
48
|
+
A `zarr.json` file is simply added to each Zarr node.
|
|
49
|
+
|
|
50
|
+
N5 attributes are extracted and added to the `zarr.json` attributes.
|
|
51
|
+
|
|
52
|
+
The full N5 metadata document is accessible inside the `zarr.json` in an attribute called `_n5`.
|
|
53
|
+
If a directory/prefix was empty and the existence of an N5 group was inferred,
|
|
54
|
+
the `zarr.json` attribute `_implicit` will be `true`.
|
|
55
|
+
|
|
56
|
+
## Contributing
|
|
57
|
+
|
|
58
|
+
Use [`uv`](https://docs.astral.sh/uv/) for project management.
|
|
59
|
+
|
|
60
|
+
Use [`just`](https://github.com/casey/just) for common development tasks.
|
zarr_n5-0.1.0/README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# zarr-python-n5
|
|
2
|
+
|
|
3
|
+
[N5](https://github.com/saalfeldlab/n5) utilities for [zarr-python](https://github.com/zarr-developers/zarr-python).
|
|
4
|
+
|
|
5
|
+
- Documentation: <https://zarr-python-n5.readthedocs.io>
|
|
6
|
+
|
|
7
|
+
## Codecs
|
|
8
|
+
|
|
9
|
+
### N5 Default Codec
|
|
10
|
+
|
|
11
|
+
[As described here](https://github.com/zarr-developers/zarr-extensions/tree/main/codecs/n5_default).
|
|
12
|
+
|
|
13
|
+
Only whole-chunk reading is supported.
|
|
14
|
+
|
|
15
|
+
#### N5 Compressor support
|
|
16
|
+
|
|
17
|
+
| N5 compressor | Supported | Zarr bytes-to-bytes codec | Notes |
|
|
18
|
+
| ------------- | --------- | ------------------------- | ----- |
|
|
19
|
+
| `raw` | yes | n/a | Equivalent to omitted bytes-to-bytes codec |
|
|
20
|
+
| `blosc` | yes | `blosc` | |
|
|
21
|
+
| `gzip` | yes | `gzip` | |
|
|
22
|
+
| `zstd` | yes | `zstd` | |
|
|
23
|
+
| `lz4` | no | | [Incompatible codecs](https://github.com/zarr-developers/numcodecs/issues/175) |
|
|
24
|
+
| `xz` | no | | No equivalent Zarr codec |
|
|
25
|
+
| `jpeg` | no | | Needs [N5 documentation](https://github.com/saalfeldlab/n5-jpeg/issues/1), [Zarr codec](https://github.com/zarr-developers/zarr-extensions/issues/15) |
|
|
26
|
+
| `bzip2` | no | | No equivalent Zarr codec |
|
|
27
|
+
|
|
28
|
+
## Stores
|
|
29
|
+
|
|
30
|
+
`N5WrapperStore` allows reading N5 data with DEFAULT-mode blocks through any Zarr store by converting metadata on the fly.
|
|
31
|
+
By default, this does not replicate the N5 behaviour of inferring an empty group where a metadata document does not exist.
|
|
32
|
+
To achieve this, wrap it in the provided `ImplicitGroupWrapperStore`.
|
|
33
|
+
|
|
34
|
+
## Tools
|
|
35
|
+
|
|
36
|
+
This package provides `n5tozarr`, a command-line interface for converting N5 data to Zarr in-place.
|
|
37
|
+
The N5 metadata are left untouched, and no chunk data is altered, moved, or copied.
|
|
38
|
+
A `zarr.json` file is simply added to each Zarr node.
|
|
39
|
+
|
|
40
|
+
N5 attributes are extracted and added to the `zarr.json` attributes.
|
|
41
|
+
|
|
42
|
+
The full N5 metadata document is accessible inside the `zarr.json` in an attribute called `_n5`.
|
|
43
|
+
If a directory/prefix was empty and the existence of an N5 group was inferred,
|
|
44
|
+
the `zarr.json` attribute `_implicit` will be `true`.
|
|
45
|
+
|
|
46
|
+
## Contributing
|
|
47
|
+
|
|
48
|
+
Use [`uv`](https://docs.astral.sh/uv/) for project management.
|
|
49
|
+
|
|
50
|
+
Use [`just`](https://github.com/casey/just) for common development tasks.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "zarr-n5"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Utilities for accessing N5 data through zarr v3."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [{ name = "Chris Barnes", email = "chris.barnes@gerbi-gmb.de" }]
|
|
7
|
+
requires-python = ">=3.12,<4.0"
|
|
8
|
+
dependencies = ["zarr>=3.1.5"]
|
|
9
|
+
|
|
10
|
+
[project.entry-points."zarr.codecs"]
|
|
11
|
+
"n5_default" = "zarr_n5:N5DefaultCodec"
|
|
12
|
+
|
|
13
|
+
[project.scripts]
|
|
14
|
+
n5tozarr = "zarr_n5.cli.convert:main"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["uv_build>=0.9.8,<0.10.0"]
|
|
19
|
+
build-backend = "uv_build"
|
|
20
|
+
|
|
21
|
+
[dependency-groups]
|
|
22
|
+
dev = [
|
|
23
|
+
{ include-group = "lint" },
|
|
24
|
+
{ include-group = "test" },
|
|
25
|
+
{ include-group = "doc" },
|
|
26
|
+
]
|
|
27
|
+
doc = [
|
|
28
|
+
"pdoc>=16.0.0",
|
|
29
|
+
]
|
|
30
|
+
lint = [
|
|
31
|
+
"mypy>=1.19.1",
|
|
32
|
+
"ruff>=0.15.6",
|
|
33
|
+
]
|
|
34
|
+
test = [
|
|
35
|
+
"pytest>=9.0.2",
|
|
36
|
+
"tensorstore>=0.1.82",
|
|
37
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for working with [N5](https://github.com/saalfeldlab/n5) data through [zarr-python](https://github.com/zarr-developers/zarr-python).
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from zarr.registry import register_codec
|
|
6
|
+
|
|
7
|
+
from .codec.default import N5DefaultCodec
|
|
8
|
+
from .storage.n5 import N5WrapperStore
|
|
9
|
+
from .storage.implicit import ImplicitGroupWrapperStore
|
|
10
|
+
|
|
11
|
+
__all__ = ["N5WrapperStore", "ImplicitGroupWrapperStore", "N5DefaultCodec"]
|
|
12
|
+
|
|
13
|
+
register_codec("n5_default", N5DefaultCodec, qualname="zarr_n5.N5DefaultCodec")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Modules for command line interfaces."""
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""n5tozarr command line interface."""
|
|
2
|
+
|
|
3
|
+
from argparse import ArgumentParser
|
|
4
|
+
import asyncio
|
|
5
|
+
|
|
6
|
+
from ..convert import convert_hierarchy, DEFAULT_TASKS
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def main(raw_args=None):
|
|
10
|
+
"""n5tozarr main function."""
|
|
11
|
+
parser = ArgumentParser("n5tozarr")
|
|
12
|
+
parser.add_argument("url", help="URL to Zarr store, using fsspec format")
|
|
13
|
+
parser.add_argument(
|
|
14
|
+
"path", help="paths within the Zarr store to process", nargs="?"
|
|
15
|
+
)
|
|
16
|
+
parser.add_argument(
|
|
17
|
+
"-t", "--tasks", type=int, default=DEFAULT_TASKS, help="asynchronous task count"
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"-d", "--max-depth", type=int, help="how far to recurse; default no maximum"
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"-I",
|
|
24
|
+
"--no-infer-groups",
|
|
25
|
+
action="store_true",
|
|
26
|
+
help="do not infer N5 groups from empty directories/ prefixes",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
args = parser.parse_args(raw_args)
|
|
30
|
+
fut = convert_hierarchy(
|
|
31
|
+
args.url, args.path or "", not args.no_infer_groups, args.max_depth, args.tasks
|
|
32
|
+
)
|
|
33
|
+
asyncio.run(fut)
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
N5 Default codec module.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Self
|
|
8
|
+
|
|
9
|
+
from zarr.abc.codec import ArrayBytesCodec, Codec, BytesBytesCodec, CodecPipeline
|
|
10
|
+
from zarr.core.array_spec import ArraySpec
|
|
11
|
+
from zarr.core.buffer.core import Buffer, NDBuffer
|
|
12
|
+
from zarr.core.chunk_grids import ChunkGrid
|
|
13
|
+
from zarr.core.dtype.wrapper import TBaseDType, ZDType, TBaseScalar
|
|
14
|
+
from zarr.core.common import JSON, parse_named_configuration
|
|
15
|
+
from zarr.core.metadata.v3 import parse_codecs
|
|
16
|
+
from zarr.codecs import BytesCodec, Endian, TransposeCodec
|
|
17
|
+
from zarr.registry import get_pipeline_class
|
|
18
|
+
|
|
19
|
+
from ..metadata import COMPATIBLE_DATA_TYPES
|
|
20
|
+
|
|
21
|
+
from ..util import N5BlockHeader
|
|
22
|
+
|
|
23
|
+
__all__ = ["N5DefaultCodec"]
|
|
24
|
+
|
|
25
|
+
N5_DEFAULT_NAME = "n5_default"
|
|
26
|
+
N5_ENDIAN = Endian.big
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def check_valid_transpose(codec: Codec):
|
|
30
|
+
if not isinstance(codec, TransposeCodec):
|
|
31
|
+
raise ValueError("not transpose codec")
|
|
32
|
+
if codec.order != tuple(sorted(codec.order, reverse=True)):
|
|
33
|
+
raise ValueError("not a full transpose")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def check_valid_bytes(codec: Codec):
|
|
37
|
+
if not isinstance(codec, BytesCodec):
|
|
38
|
+
raise ValueError("not bytes codec")
|
|
39
|
+
if codec.endian is not None and codec.endian != N5_ENDIAN:
|
|
40
|
+
raise ValueError("bytes codec must be big-endian")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def check_valid_compressor(codec: Codec):
|
|
44
|
+
if not isinstance(codec, BytesBytesCodec):
|
|
45
|
+
raise ValueError("codec is not bytes-to-bytes")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
CodecTuple = (
|
|
49
|
+
tuple[TransposeCodec, BytesCodec]
|
|
50
|
+
| tuple[TransposeCodec, BytesCodec, BytesBytesCodec]
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class N5DefaultCodec(ArrayBytesCodec):
|
|
56
|
+
"""Zarr codec for default-mode N5 data.
|
|
57
|
+
|
|
58
|
+
Only full-chunk reads are supported.
|
|
59
|
+
Use the `N5DefaultCodec.from_compressor` constructor if initialising manually.
|
|
60
|
+
|
|
61
|
+
- Reads and validates the N5 block header
|
|
62
|
+
- Applies the wrapped codecs to the N5 block body
|
|
63
|
+
- Truncates or pads the resulting array to match the requested chunk
|
|
64
|
+
|
|
65
|
+
Should be the only codec present.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
codecs: CodecTuple
|
|
69
|
+
"""Codecs to be applied to the N5 block body."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, *, codecs: Iterable[Codec | dict[str, JSON]]) -> None:
|
|
72
|
+
cs = parse_codecs(codecs)
|
|
73
|
+
if not 2 <= len(cs) <= 3:
|
|
74
|
+
raise ValueError(f"expected 2-3 codecs, got {len(cs)}")
|
|
75
|
+
check_valid_transpose(cs[0])
|
|
76
|
+
check_valid_bytes(cs[1])
|
|
77
|
+
if len(cs) > 2:
|
|
78
|
+
check_valid_compressor(cs[2])
|
|
79
|
+
|
|
80
|
+
object.__setattr__(self, "codecs", cs)
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def codec_pipeline(self) -> CodecPipeline:
|
|
84
|
+
"""Get the `CodecPipeline` comprising the wrapped codecs."""
|
|
85
|
+
return get_pipeline_class().from_codecs(self.codecs)
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_compressor(cls, ndim: int, compressor: BytesBytesCodec | None = None):
|
|
89
|
+
"""Construct the codec from minimal information."""
|
|
90
|
+
transpose = cls.make_transpose(ndim)
|
|
91
|
+
endian = cls.make_bytes()
|
|
92
|
+
codecs: CodecTuple
|
|
93
|
+
if compressor is None:
|
|
94
|
+
codecs = (transpose, endian)
|
|
95
|
+
else:
|
|
96
|
+
codecs = (transpose, endian, compressor)
|
|
97
|
+
return cls(codecs=codecs)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def make_transpose(cls, ndim: int) -> TransposeCodec:
|
|
101
|
+
"""Generate the `TransposeCodec` needed for this data.
|
|
102
|
+
|
|
103
|
+
N5 data is always fully transposed.
|
|
104
|
+
"""
|
|
105
|
+
order = list(range(ndim))
|
|
106
|
+
return TransposeCodec(order=tuple(reversed(order)))
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def make_bytes(cls) -> BytesCodec:
|
|
110
|
+
"""
|
|
111
|
+
Generate the `BytesCodec` needed for this data.
|
|
112
|
+
|
|
113
|
+
N5 data is always big-endian.
|
|
114
|
+
"""
|
|
115
|
+
return BytesCodec(endian=N5_ENDIAN)
|
|
116
|
+
|
|
117
|
+
def compute_encoded_size(
|
|
118
|
+
self, input_byte_length: int, chunk_spec: ArraySpec
|
|
119
|
+
) -> int:
|
|
120
|
+
header_length = N5BlockHeader.calc_size(chunk_spec.ndim, False)
|
|
121
|
+
|
|
122
|
+
for c in self.codecs:
|
|
123
|
+
input_byte_length = c.compute_encoded_size(input_byte_length, chunk_spec)
|
|
124
|
+
chunk_spec = c.resolve_metadata(chunk_spec)
|
|
125
|
+
|
|
126
|
+
return input_byte_length + header_length
|
|
127
|
+
|
|
128
|
+
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
|
|
129
|
+
for c in self.codecs:
|
|
130
|
+
chunk_spec = c.resolve_metadata(chunk_spec)
|
|
131
|
+
return chunk_spec
|
|
132
|
+
|
|
133
|
+
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
|
|
134
|
+
transpose = self.make_transpose(array_spec.ndim).evolve_from_array_spec(
|
|
135
|
+
array_spec
|
|
136
|
+
)
|
|
137
|
+
endian = self.make_bytes().evolve_from_array_spec(array_spec)
|
|
138
|
+
|
|
139
|
+
codecs: CodecTuple
|
|
140
|
+
match len(self.codecs):
|
|
141
|
+
case 2:
|
|
142
|
+
codecs = (transpose, endian)
|
|
143
|
+
case 3:
|
|
144
|
+
compressor: BytesBytesCodec = self.codecs[2].evolve_from_array_spec( # type:ignore
|
|
145
|
+
array_spec
|
|
146
|
+
)
|
|
147
|
+
codecs = (transpose, endian, compressor)
|
|
148
|
+
case _:
|
|
149
|
+
raise ValueError("unsupported number of codecs")
|
|
150
|
+
|
|
151
|
+
return type(self)(codecs=codecs)
|
|
152
|
+
|
|
153
|
+
def validate(
|
|
154
|
+
self,
|
|
155
|
+
*,
|
|
156
|
+
shape: tuple[int, ...],
|
|
157
|
+
dtype: ZDType[TBaseDType, TBaseScalar],
|
|
158
|
+
chunk_grid: ChunkGrid,
|
|
159
|
+
) -> None:
|
|
160
|
+
expected_ndim = len(self.codecs[0].order)
|
|
161
|
+
if len(shape) != expected_ndim:
|
|
162
|
+
raise ValueError(f"array is {len(shape)}D, codec is {expected_ndim}D")
|
|
163
|
+
if dtype._zarr_v3_name not in COMPATIBLE_DATA_TYPES:
|
|
164
|
+
raise ValueError(f"N5 does not support data type {dtype._zarr_v3_name}")
|
|
165
|
+
|
|
166
|
+
return super().validate(shape=shape, dtype=dtype, chunk_grid=chunk_grid)
|
|
167
|
+
|
|
168
|
+
async def _decode_single(
|
|
169
|
+
self, chunk_data: Buffer, chunk_spec: ArraySpec
|
|
170
|
+
) -> NDBuffer:
|
|
171
|
+
b = chunk_data.as_buffer_like()
|
|
172
|
+
header = N5BlockHeader.from_bytes(b)
|
|
173
|
+
offset = header.size()
|
|
174
|
+
|
|
175
|
+
body_buf = chunk_data[offset:]
|
|
176
|
+
body_nd = chunk_spec.prototype.nd_buffer.empty(
|
|
177
|
+
header.shape, chunk_spec.dtype.to_native_dtype(), chunk_spec.order
|
|
178
|
+
)
|
|
179
|
+
if header.shape == chunk_spec.shape:
|
|
180
|
+
body_spec = chunk_spec
|
|
181
|
+
all_eq = True
|
|
182
|
+
else:
|
|
183
|
+
body_spec = ArraySpec(
|
|
184
|
+
header.shape,
|
|
185
|
+
chunk_spec.dtype,
|
|
186
|
+
chunk_spec.fill_value,
|
|
187
|
+
chunk_spec.config,
|
|
188
|
+
chunk_spec.prototype,
|
|
189
|
+
)
|
|
190
|
+
all_eq = False
|
|
191
|
+
maybe_body_nd, *_ = await self.codec_pipeline.decode([(body_buf, body_spec)])
|
|
192
|
+
# TODO: use codec_pipeline.read() instead; this should avoid the copy for truncated-block cases
|
|
193
|
+
if maybe_body_nd is None:
|
|
194
|
+
raise RuntimeError("unexpected nullish buffer")
|
|
195
|
+
else:
|
|
196
|
+
body_nd = maybe_body_nd
|
|
197
|
+
|
|
198
|
+
if all_eq:
|
|
199
|
+
# don't need to truncate or pad
|
|
200
|
+
return body_nd
|
|
201
|
+
|
|
202
|
+
# whether we can get the chunk we want by trimming down the N5 block body
|
|
203
|
+
can_trim = True
|
|
204
|
+
|
|
205
|
+
min_shape = []
|
|
206
|
+
slice_lst = []
|
|
207
|
+
for hs, cs in zip(header.shape, chunk_spec.shape):
|
|
208
|
+
if cs > hs:
|
|
209
|
+
# requested chunk is larger than the N5 block in some dimension
|
|
210
|
+
can_trim = False
|
|
211
|
+
min_len = min(hs, cs)
|
|
212
|
+
min_shape.append(min_len)
|
|
213
|
+
slice_lst.append(slice(0, min_len))
|
|
214
|
+
|
|
215
|
+
slicing = tuple(slice_lst)
|
|
216
|
+
|
|
217
|
+
if can_trim:
|
|
218
|
+
return body_nd[slicing]
|
|
219
|
+
|
|
220
|
+
out = chunk_spec.prototype.nd_buffer.create(
|
|
221
|
+
shape=chunk_spec.shape,
|
|
222
|
+
dtype=chunk_spec.dtype.to_native_dtype(),
|
|
223
|
+
order=chunk_spec.order,
|
|
224
|
+
fill_value=chunk_spec.fill_value,
|
|
225
|
+
)
|
|
226
|
+
out[slicing] = body_nd[slicing]
|
|
227
|
+
return out
|
|
228
|
+
|
|
229
|
+
# async def _encode_single(
|
|
230
|
+
# self, chunk_data: NDBuffer, chunk_spec: ArraySpec
|
|
231
|
+
# ) -> Buffer | None:
|
|
232
|
+
# header = N5BlockHeader(N5Mode.DEFAULT, chunk_spec.shape)
|
|
233
|
+
# for c in self.codecs:
|
|
234
|
+
# chunk_data = c._encode_single(chunk_data, chunk_spec) # type:ignore
|
|
235
|
+
# chunk_spec = c.resolve_metadata(chunk_spec)
|
|
236
|
+
|
|
237
|
+
# buf: Buffer = chunk_data # type: ignore
|
|
238
|
+
|
|
239
|
+
# bio = BytesIO()
|
|
240
|
+
# bio.write(header.to_bytes())
|
|
241
|
+
# # TODO: avoid this copy?
|
|
242
|
+
# bio.write(buf.as_buffer_like())
|
|
243
|
+
# return Buffer.from_bytes(bio.getbuffer())
|
|
244
|
+
|
|
245
|
+
@classmethod
|
|
246
|
+
def from_dict(
|
|
247
|
+
cls,
|
|
248
|
+
data: dict[str, JSON],
|
|
249
|
+
) -> Self:
|
|
250
|
+
_, configuration_parsed = parse_named_configuration(
|
|
251
|
+
data, N5_DEFAULT_NAME, require_configuration=True
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return cls(**configuration_parsed) # type: ignore[arg-type]
|
|
255
|
+
|
|
256
|
+
def to_dict(
|
|
257
|
+
self,
|
|
258
|
+
) -> dict[str, JSON]:
|
|
259
|
+
return {
|
|
260
|
+
"name": N5_DEFAULT_NAME,
|
|
261
|
+
"configuration": {"codecs": [c.to_dict() for c in self.codecs]},
|
|
262
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import zarr.api.asynchronous as zarr_async
|
|
5
|
+
from zarr.abc.store import Store
|
|
6
|
+
from zarr.core.metadata.io import save_metadata
|
|
7
|
+
from zarr.storage import StoreLike, StorePath
|
|
8
|
+
from zarr.storage._common import make_store
|
|
9
|
+
from zarr.core.group import AsyncGroup
|
|
10
|
+
from zarr.core.array import AsyncArray
|
|
11
|
+
from .storage import ImplicitGroupWrapperStore, N5WrapperStore
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
__all__ = ["N5ToZarr", "convert_hierarchy"]
|
|
16
|
+
|
|
17
|
+
DEFAULT_TASKS = 10
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Finished:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class N5ToZarr:
|
|
25
|
+
def __init__(self, store: Store, infer_groups: bool = True) -> None:
|
|
26
|
+
self.inner_store = store
|
|
27
|
+
|
|
28
|
+
self.n5_store: Store
|
|
29
|
+
if infer_groups:
|
|
30
|
+
self.n5_store = ImplicitGroupWrapperStore(N5WrapperStore(store))
|
|
31
|
+
else:
|
|
32
|
+
self.n5_store = N5WrapperStore(store)
|
|
33
|
+
|
|
34
|
+
self.queue: asyncio.Queue[AsyncArray | AsyncGroup | Finished] = asyncio.Queue()
|
|
35
|
+
|
|
36
|
+
async def convert_hierarchy(
|
|
37
|
+
self, path: str = "", max_depth: int | None = -1, n_tasks=10
|
|
38
|
+
):
|
|
39
|
+
member = await zarr_async.open(store=self.n5_store, path=path)
|
|
40
|
+
total = await self.convert_member(member)
|
|
41
|
+
if total == 0:
|
|
42
|
+
return total
|
|
43
|
+
if isinstance(member, AsyncArray):
|
|
44
|
+
return total
|
|
45
|
+
if max_depth is not None and max_depth >= 0:
|
|
46
|
+
return total
|
|
47
|
+
|
|
48
|
+
new_depth = None if max_depth is None else max_depth - 1
|
|
49
|
+
tasks = [self._spawn_worker() for _ in range(n_tasks)]
|
|
50
|
+
total = 0
|
|
51
|
+
async for _, child in member.members(new_depth):
|
|
52
|
+
await self.queue.put(child)
|
|
53
|
+
total += 1
|
|
54
|
+
logger.info("Enqueued %s nodes", total)
|
|
55
|
+
|
|
56
|
+
await self.queue.put(Finished())
|
|
57
|
+
count = sum(await asyncio.gather(*tasks))
|
|
58
|
+
logger.info("Converted %s nodes", count)
|
|
59
|
+
return count
|
|
60
|
+
|
|
61
|
+
def _spawn_worker(self, name: str | None = None):
|
|
62
|
+
"""Schedule a task for execution wrapping a worker function."""
|
|
63
|
+
return asyncio.create_task(self._worker(), name=name)
|
|
64
|
+
|
|
65
|
+
async def _worker(self):
|
|
66
|
+
"""Create a worker which reads Zarr nodes from the queue and processes them."""
|
|
67
|
+
total = 0
|
|
68
|
+
while True:
|
|
69
|
+
value = await self.queue.get()
|
|
70
|
+
if isinstance(value, Finished):
|
|
71
|
+
await self.queue.put(value)
|
|
72
|
+
return total
|
|
73
|
+
total += await self.convert_member(value)
|
|
74
|
+
self.queue.task_done()
|
|
75
|
+
|
|
76
|
+
async def convert_member(self, member: AsyncArray | AsyncGroup) -> int:
|
|
77
|
+
"""Returns 0 or 1 for whether the node was skipped or converted."""
|
|
78
|
+
try:
|
|
79
|
+
_ = await zarr_async.open(
|
|
80
|
+
store=self.inner_store, mode="r", path=member.path
|
|
81
|
+
)
|
|
82
|
+
logger.info("Found existing zarr node at %s, ignoring", member.path)
|
|
83
|
+
return 0
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
await save_metadata(
|
|
88
|
+
StorePath(store=self.inner_store, path=member.path), member.metadata, False
|
|
89
|
+
)
|
|
90
|
+
logger.info("Converted N5 entry %s to Zarr", member.path)
|
|
91
|
+
return 1
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def convert_hierarchy(
|
|
95
|
+
store: StoreLike,
|
|
96
|
+
path: str = "",
|
|
97
|
+
infer_groups: bool = True,
|
|
98
|
+
max_depth=None,
|
|
99
|
+
n_tasks=DEFAULT_TASKS,
|
|
100
|
+
) -> int:
|
|
101
|
+
inner_store = await make_store(store, mode="r+")
|
|
102
|
+
converter = N5ToZarr(inner_store, infer_groups)
|
|
103
|
+
return await converter.convert_hierarchy(path, max_depth=max_depth, n_tasks=n_tasks)
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for parsing, representing, and converting N5 metadata.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
import itertools
|
|
8
|
+
from typing import Any, TYPE_CHECKING, Self
|
|
9
|
+
from zarr.core.group import GroupMetadata
|
|
10
|
+
from zarr.core.metadata.v3 import ArrayV3Metadata
|
|
11
|
+
from zarr.core.dtype import ZDType
|
|
12
|
+
from zarr.core import dtype as zdt
|
|
13
|
+
from zarr.core.chunk_grids import RegularChunkGrid
|
|
14
|
+
from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding
|
|
15
|
+
from zarr.abc.codec import BytesBytesCodec
|
|
16
|
+
from zarr.codecs import blosc
|
|
17
|
+
from zarr.codecs import GzipCodec, ZstdCodec
|
|
18
|
+
|
|
19
|
+
from .util import N5Mode
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from typing import Self
|
|
23
|
+
from zarr.core.common import JSON
|
|
24
|
+
|
|
25
|
+
__all__ = ["N5GroupMetadata", "N5ArrayMetadata", "COMPATIBLE_DATA_TYPES"]
|
|
26
|
+
|
|
27
|
+
COMPATIBLE_DATA_TYPES: dict[str, tuple[ZDType, int]] = {
|
|
28
|
+
"uint8": (zdt.UInt8(), 1),
|
|
29
|
+
"uint16": (zdt.UInt16(), 2),
|
|
30
|
+
"uint32": (zdt.UInt32(), 4),
|
|
31
|
+
"uint64": (zdt.UInt64(), 8),
|
|
32
|
+
"int8": (zdt.Int8(), 1),
|
|
33
|
+
"int16": (zdt.Int16(), 2),
|
|
34
|
+
"int32": (zdt.Int32(), 4),
|
|
35
|
+
"int64": (zdt.Int64(), 8),
|
|
36
|
+
"float32": (zdt.Float32(), 4),
|
|
37
|
+
"float64": (zdt.Float64(), 8),
|
|
38
|
+
}
|
|
39
|
+
"""Data types which exist in both Zarr and N5.
|
|
40
|
+
|
|
41
|
+
Maps to the Zarr data type and item size."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class N5GroupMetadata:
|
|
45
|
+
def __init__(
|
|
46
|
+
self, n5: str | None = None, attrs: dict[str, JSON] | None = None
|
|
47
|
+
) -> None:
|
|
48
|
+
self.n5: str | None = n5
|
|
49
|
+
self.attributes: dict[str, Any] = attrs or dict()
|
|
50
|
+
|
|
51
|
+
def to_jso(self) -> dict[str, JSON]:
|
|
52
|
+
out = deepcopy(self.attributes)
|
|
53
|
+
if self.n5 is not None:
|
|
54
|
+
out["n5"] = self.n5
|
|
55
|
+
return out
|
|
56
|
+
|
|
57
|
+
def is_root(self):
|
|
58
|
+
return self.n5 is not None
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_jso(cls, jso: dict[str, JSON]) -> Self:
|
|
62
|
+
n5 = jso.pop("n5", None)
|
|
63
|
+
if n5 is not None and not isinstance(n5, str):
|
|
64
|
+
raise ValueError("n5 attribute is not a string")
|
|
65
|
+
return cls(n5, jso)
|
|
66
|
+
|
|
67
|
+
def to_zarr(self):
|
|
68
|
+
attrs = deepcopy(self.attributes)
|
|
69
|
+
attrs["_n5"] = self.to_jso()
|
|
70
|
+
return GroupMetadata(attrs)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class N5ArrayMetadata(N5GroupMetadata):
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
dimensions: list[int],
|
|
77
|
+
block_size: list[int],
|
|
78
|
+
data_type: str,
|
|
79
|
+
compression: dict[str, Any],
|
|
80
|
+
n5: str | None = None,
|
|
81
|
+
attrs: dict[str, JSON] | None = None,
|
|
82
|
+
):
|
|
83
|
+
super().__init__(n5, attrs)
|
|
84
|
+
if len(dimensions) != len(block_size):
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"dimensions {dimensions} and block size {block_size} must have same dimensionality"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if any(not is_nonzero_int(s) for s in itertools.chain(dimensions, block_size)):
|
|
90
|
+
raise ValueError("dimensions and block size must be positive integers")
|
|
91
|
+
|
|
92
|
+
self.dimensions = dimensions
|
|
93
|
+
self.block_size = block_size
|
|
94
|
+
self.data_type = data_type
|
|
95
|
+
ctype = compression.get("type")
|
|
96
|
+
if not isinstance(ctype, str):
|
|
97
|
+
raise ValueError(f"compression must have a string type, got {ctype}")
|
|
98
|
+
self.compression = compression
|
|
99
|
+
|
|
100
|
+
def to_jso(self) -> dict[str, JSON]:
|
|
101
|
+
jso = super().to_jso()
|
|
102
|
+
jso["dimensions"] = self.dimensions
|
|
103
|
+
jso["blockSize"] = self.block_size
|
|
104
|
+
jso["dataType"] = self.data_type
|
|
105
|
+
jso["compression"] = self.compression
|
|
106
|
+
return jso
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def from_group(cls, grp: N5GroupMetadata) -> Self:
|
|
110
|
+
attrs = deepcopy(grp.attributes)
|
|
111
|
+
dimensions = attrs.pop("dimensions")
|
|
112
|
+
block_size = attrs.pop("blockSize")
|
|
113
|
+
data_type = attrs.pop("dataType")
|
|
114
|
+
compression = attrs.pop("compression")
|
|
115
|
+
return cls(dimensions, block_size, data_type, compression, grp.n5, attrs)
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def from_jso(cls, jso: dict[str, JSON]) -> Self:
|
|
119
|
+
grp = super().from_jso(jso)
|
|
120
|
+
return cls.from_group(grp)
|
|
121
|
+
|
|
122
|
+
def to_zarr(self, mode: N5Mode = N5Mode.DEFAULT):
|
|
123
|
+
from .codec.default import N5DefaultCodec
|
|
124
|
+
|
|
125
|
+
if mode != N5Mode.DEFAULT:
|
|
126
|
+
raise NotImplementedError("Only default-mode N5 is supported")
|
|
127
|
+
compressor = self._to_zarr_codec()
|
|
128
|
+
attrs = deepcopy(self.attributes)
|
|
129
|
+
attrs["_n5"] = self.to_jso()
|
|
130
|
+
return ArrayV3Metadata(
|
|
131
|
+
shape=self.dimensions,
|
|
132
|
+
data_type=COMPATIBLE_DATA_TYPES[self.data_type][0],
|
|
133
|
+
chunk_grid=RegularChunkGrid(chunk_shape=self.block_size),
|
|
134
|
+
chunk_key_encoding=V2ChunkKeyEncoding("/"),
|
|
135
|
+
fill_value=0,
|
|
136
|
+
dimension_names=None,
|
|
137
|
+
codecs=[
|
|
138
|
+
N5DefaultCodec.from_compressor(
|
|
139
|
+
len(self.dimensions),
|
|
140
|
+
compressor,
|
|
141
|
+
),
|
|
142
|
+
],
|
|
143
|
+
attributes=attrs,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def _to_zarr_codec(self) -> BytesBytesCodec | None:
|
|
147
|
+
tp = self.compression.get("type")
|
|
148
|
+
match tp:
|
|
149
|
+
case "raw":
|
|
150
|
+
return None
|
|
151
|
+
case "blosc":
|
|
152
|
+
item_size = COMPATIBLE_DATA_TYPES[self.data_type][1]
|
|
153
|
+
return parse_blosc(self.compression, item_size)
|
|
154
|
+
case "gzip":
|
|
155
|
+
return parse_gzip(self.compression)
|
|
156
|
+
case "zstd":
|
|
157
|
+
return parse_zstd(self.compression)
|
|
158
|
+
case _:
|
|
159
|
+
raise ValueError(f"unsupported codec with type {tp}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def is_nonzero_int(n) -> bool:
|
|
163
|
+
if not isinstance(n, int):
|
|
164
|
+
return False
|
|
165
|
+
return n > 0
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def parse_blosc(d: dict[str, JSON], typesize: int | None) -> blosc.BloscCodec:
|
|
169
|
+
cname = d.get("cname", "blosclz")
|
|
170
|
+
clevel = d.get("clevel", 6)
|
|
171
|
+
blocksize = d.get("blocksize", 0)
|
|
172
|
+
shuffle_int = d.get("shuffle", 0)
|
|
173
|
+
shuffle = blosc.BloscShuffle.from_int(shuffle_int) # type: ignore
|
|
174
|
+
return blosc.BloscCodec(
|
|
175
|
+
typesize=typesize,
|
|
176
|
+
cname=cname, # type: ignore
|
|
177
|
+
clevel=clevel, # type:ignore
|
|
178
|
+
blocksize=blocksize, # type:ignore
|
|
179
|
+
shuffle=shuffle,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def parse_gzip(d: dict[str, JSON]) -> GzipCodec:
|
|
184
|
+
level = d.get("level", -1)
|
|
185
|
+
if level == -1:
|
|
186
|
+
return GzipCodec()
|
|
187
|
+
else:
|
|
188
|
+
level = int(level) # type: ignore
|
|
189
|
+
return GzipCodec(level=level)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def parse_zstd(d: dict[str, JSON]) -> ZstdCodec:
|
|
193
|
+
level = d.get("level", 3)
|
|
194
|
+
return ZstdCodec(level=level) # type: ignore
|
|
File without changes
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing `ImplicitGroupWrapperStore`,
|
|
3
|
+
for inferring groups with missing metadata.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Final
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
import json
|
|
9
|
+
|
|
10
|
+
from zarr.abc.store import (
|
|
11
|
+
Store,
|
|
12
|
+
ByteRequest,
|
|
13
|
+
)
|
|
14
|
+
from zarr.storage import WrapperStore
|
|
15
|
+
from zarr.core.group import GroupMetadata
|
|
16
|
+
from zarr.core.buffer import BufferPrototype, Buffer
|
|
17
|
+
|
|
18
|
+
from ..util import slice_buf, is_zarr3_metadata
|
|
19
|
+
|
|
20
|
+
__all__ = ["ImplicitGroupWrapperStore"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def make_implicit_group_bytes() -> bytes:
|
|
24
|
+
g = GroupMetadata()
|
|
25
|
+
g.attributes["_implicit"] = True
|
|
26
|
+
|
|
27
|
+
return json.dumps(g.to_dict()).encode()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
IMPLICIT_GROUP_BYTES: Final[bytes] = make_implicit_group_bytes()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ImplicitGroupWrapperStore[T: Store](WrapperStore):
|
|
34
|
+
"""A store which supplies empty group metadata documents if they do not exist.
|
|
35
|
+
|
|
36
|
+
Used to replicate N5's behaviour where any directory (or prefix) is a valid group,
|
|
37
|
+
even when no metadata document exists.
|
|
38
|
+
Wrap over an `N5WrapperStore`.
|
|
39
|
+
|
|
40
|
+
Inferred group metadata's attributes will contain the key/value `"_implicit": true`.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
_store: T
|
|
44
|
+
|
|
45
|
+
async def get(
|
|
46
|
+
self,
|
|
47
|
+
key: str,
|
|
48
|
+
prototype: BufferPrototype,
|
|
49
|
+
byte_range: ByteRequest | None = None,
|
|
50
|
+
) -> Buffer | None:
|
|
51
|
+
res = await self._store.get(key, prototype, byte_range)
|
|
52
|
+
if res is not None or not is_zarr3_metadata(key):
|
|
53
|
+
return res
|
|
54
|
+
|
|
55
|
+
b = slice_buf(IMPLICIT_GROUP_BYTES, byte_range)
|
|
56
|
+
return prototype.buffer.from_bytes(b)
|
|
57
|
+
|
|
58
|
+
async def get_partial_values(
|
|
59
|
+
self,
|
|
60
|
+
prototype: BufferPrototype,
|
|
61
|
+
key_ranges: Iterable[tuple[str, ByteRequest | None]],
|
|
62
|
+
) -> list[Buffer | None]:
|
|
63
|
+
key_ranges = list(key_ranges)
|
|
64
|
+
reses = await super().get_partial_values(prototype, key_ranges)
|
|
65
|
+
out = []
|
|
66
|
+
for (key, byte_range), res in zip(key_ranges, reses):
|
|
67
|
+
if res is None and is_zarr3_metadata(key):
|
|
68
|
+
res = prototype.buffer.from_bytes(
|
|
69
|
+
slice_buf(IMPLICIT_GROUP_BYTES, byte_range)
|
|
70
|
+
)
|
|
71
|
+
out.append(res)
|
|
72
|
+
|
|
73
|
+
return out
|
|
74
|
+
|
|
75
|
+
async def exists(self, key: str) -> bool:
|
|
76
|
+
if is_zarr3_metadata(key):
|
|
77
|
+
return True
|
|
78
|
+
return await super().exists(key)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module containing `N5WrapperStore`,
|
|
3
|
+
for silently converting N5 nodes to Zarr nodes.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from collections.abc import AsyncIterator, Iterable
|
|
8
|
+
from zarr.storage import WrapperStore
|
|
9
|
+
from zarr.abc.store import (
|
|
10
|
+
Store,
|
|
11
|
+
ByteRequest,
|
|
12
|
+
)
|
|
13
|
+
from zarr.core.buffer import Buffer, BufferPrototype
|
|
14
|
+
import json
|
|
15
|
+
import asyncio
|
|
16
|
+
|
|
17
|
+
from ..constants import N5_METADATA_KEY, ZARR_V3_METADATA_KEY
|
|
18
|
+
from ..metadata import N5GroupMetadata, N5ArrayMetadata
|
|
19
|
+
from ..util import slice_buf, is_zarr3_metadata, N5Mode
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class N5WrapperStore[T: Store](WrapperStore):
|
|
23
|
+
"""A read-only store for opening N5 hierarchies.
|
|
24
|
+
|
|
25
|
+
Requests for Zarr metadata documents are redirected to N5 attributes,
|
|
26
|
+
and Zarr metadata calculated on the fly.
|
|
27
|
+
|
|
28
|
+
Note that N5 attributes can be omitted in groups.
|
|
29
|
+
You may want to wrap this in an `ImplicitGroupWrapperStore` to replicate that behaviour.
|
|
30
|
+
|
|
31
|
+
Only compatible with DEFAULT-mode N5 arrays.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
_store: T
|
|
35
|
+
|
|
36
|
+
def intercept_metadata(self, key: str) -> None | str:
|
|
37
|
+
"""If the given key is for Zarr v3 metadata, return the key for N5 metadata in the equivalent node.
|
|
38
|
+
|
|
39
|
+
Otherwise, return None.
|
|
40
|
+
"""
|
|
41
|
+
if "/" in key:
|
|
42
|
+
pref, fname = key.rsplit("/", 1)
|
|
43
|
+
else:
|
|
44
|
+
pref = None
|
|
45
|
+
fname = key
|
|
46
|
+
|
|
47
|
+
if fname != ZARR_V3_METADATA_KEY:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
if pref is None:
|
|
51
|
+
k2 = N5_METADATA_KEY
|
|
52
|
+
else:
|
|
53
|
+
k2 = f"{pref}/{N5_METADATA_KEY}"
|
|
54
|
+
|
|
55
|
+
return k2
|
|
56
|
+
|
|
57
|
+
async def get(
|
|
58
|
+
self,
|
|
59
|
+
key: str,
|
|
60
|
+
prototype: BufferPrototype,
|
|
61
|
+
byte_range: ByteRequest | None = None,
|
|
62
|
+
) -> Buffer | None:
|
|
63
|
+
k2 = self.intercept_metadata(key)
|
|
64
|
+
if k2 is None:
|
|
65
|
+
return await self._store.get(key, prototype, byte_range)
|
|
66
|
+
|
|
67
|
+
b = await self._store.get(k2, prototype)
|
|
68
|
+
|
|
69
|
+
if b is None:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
d = json.loads(b.to_bytes())
|
|
73
|
+
n5_meta = N5GroupMetadata.from_jso(d)
|
|
74
|
+
try:
|
|
75
|
+
n5_meta = N5ArrayMetadata.from_group(n5_meta)
|
|
76
|
+
out_d = n5_meta.to_zarr(N5Mode.DEFAULT)
|
|
77
|
+
except KeyError:
|
|
78
|
+
out_d = n5_meta.to_zarr()
|
|
79
|
+
|
|
80
|
+
b2 = json.dumps(out_d.to_dict()).encode()
|
|
81
|
+
b2 = slice_buf(b2, byte_range)
|
|
82
|
+
|
|
83
|
+
return prototype.buffer.from_bytes(b2)
|
|
84
|
+
|
|
85
|
+
async def get_partial_values(
|
|
86
|
+
self,
|
|
87
|
+
prototype: BufferPrototype,
|
|
88
|
+
key_ranges: Iterable[tuple[str, ByteRequest | None]],
|
|
89
|
+
) -> list[Buffer | None]:
|
|
90
|
+
|
|
91
|
+
# Split the key ranges into metadata requests and other (chunk) requests.
|
|
92
|
+
# We always need to read the whole N5 metadata file
|
|
93
|
+
# to convert it into Zarr v3 metadata before slicing it,
|
|
94
|
+
# so this prevents reading it multiple times.
|
|
95
|
+
meta_reqs: defaultdict[str, list[tuple[int, ByteRequest | None]]] = defaultdict(
|
|
96
|
+
list
|
|
97
|
+
)
|
|
98
|
+
other_reqs: list[tuple[int, tuple[str, ByteRequest | None]]] = []
|
|
99
|
+
count = 0
|
|
100
|
+
for idx, (key, byte_range) in enumerate(key_ranges):
|
|
101
|
+
if is_zarr3_metadata(key):
|
|
102
|
+
meta_reqs[key].append((idx, byte_range))
|
|
103
|
+
else:
|
|
104
|
+
other_reqs.append((idx, (key, byte_range)))
|
|
105
|
+
count += 1
|
|
106
|
+
|
|
107
|
+
other_reqs_fut = self._store.get_partial_values(
|
|
108
|
+
prototype, (tup[1] for tup in other_reqs)
|
|
109
|
+
)
|
|
110
|
+
meta_req_list = list(meta_reqs.items())
|
|
111
|
+
meta_reqs_fut = asyncio.gather(
|
|
112
|
+
*(self.get(k, prototype) for k, _ in meta_req_list)
|
|
113
|
+
)
|
|
114
|
+
# Gather all requests to run concurrently
|
|
115
|
+
other_res, meta_res = await asyncio.gather(other_reqs_fut, meta_reqs_fut)
|
|
116
|
+
out: list[None | Buffer] = [None for _ in range(count)]
|
|
117
|
+
|
|
118
|
+
# Slice and insert the metadata responses into the pre-allocated output list
|
|
119
|
+
for res, (_, meta_req) in zip(meta_res, meta_req_list):
|
|
120
|
+
if res is None:
|
|
121
|
+
continue
|
|
122
|
+
blike = res.as_buffer_like()
|
|
123
|
+
for idx, byte_range in meta_req:
|
|
124
|
+
out[idx] = Buffer.from_bytes(slice_buf(blike, byte_range))
|
|
125
|
+
|
|
126
|
+
# Insert the non-metadata responses into the output;
|
|
127
|
+
# these are already sliced by the underlying store.
|
|
128
|
+
for res, (idx, _) in zip(other_res, other_reqs):
|
|
129
|
+
out[idx] = res
|
|
130
|
+
|
|
131
|
+
return out
|
|
132
|
+
|
|
133
|
+
async def exists(self, key: str) -> bool:
|
|
134
|
+
k2 = self.intercept_metadata(key)
|
|
135
|
+
return await self._store.exists(k2 or key)
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def supports_writes(self) -> bool:
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def supports_deletes(self) -> bool:
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
async def delete(self, key: str) -> None:
|
|
146
|
+
raise NotImplementedError
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def supports_listing(self) -> bool:
|
|
150
|
+
return self._store.supports_listing
|
|
151
|
+
|
|
152
|
+
def list(self) -> AsyncIterator[str]:
|
|
153
|
+
return self._store.list()
|
|
154
|
+
|
|
155
|
+
def list_prefix(self, prefix: str) -> AsyncIterator[str]:
|
|
156
|
+
return self._store.list_prefix(prefix)
|
|
157
|
+
|
|
158
|
+
def list_dir(self, prefix: str) -> AsyncIterator[str]:
|
|
159
|
+
return self._store.list_dir(prefix)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
General utilities.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from zarr.abc.store import (
|
|
6
|
+
ByteRequest,
|
|
7
|
+
RangeByteRequest,
|
|
8
|
+
OffsetByteRequest,
|
|
9
|
+
SuffixByteRequest,
|
|
10
|
+
)
|
|
11
|
+
from .constants import ZARR_V3_METADATA_KEY
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from enum import IntEnum
|
|
14
|
+
from typing import Self, Any
|
|
15
|
+
import struct
|
|
16
|
+
|
|
17
|
+
__all__ = ["N5Mode", "N5BlockHeader"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class N5Mode(IntEnum):
|
|
21
|
+
"""N5 block mode"""
|
|
22
|
+
|
|
23
|
+
DEFAULT = 0
|
|
24
|
+
VARLENGTH = 1
|
|
25
|
+
OBJECT = 2
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class N5BlockHeader:
|
|
30
|
+
"""Parsed representation of the N5 block header."""
|
|
31
|
+
|
|
32
|
+
mode: N5Mode
|
|
33
|
+
"""Stored as >u16"""
|
|
34
|
+
|
|
35
|
+
shape: tuple[int, ...]
|
|
36
|
+
"""Length stored as >u16, elements stored as >u32"""
|
|
37
|
+
|
|
38
|
+
num_elem: int | None = None
|
|
39
|
+
"""Stored as >u32 if mode == VARLENGTH"""
|
|
40
|
+
|
|
41
|
+
def __post_init__(self):
|
|
42
|
+
if self.num_elem is not None and self.mode != N5Mode.VARLENGTH:
|
|
43
|
+
raise ValueError("num_elem must be None if mode is not VARLENGTH")
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def calc_size(cls, ndim: int, is_varlength: bool = False) -> int:
|
|
47
|
+
"""Calculate the number of bytes in an N5 block header."""
|
|
48
|
+
base = 2 + 2 + 4 * ndim
|
|
49
|
+
if is_varlength:
|
|
50
|
+
base += 4
|
|
51
|
+
return base
|
|
52
|
+
|
|
53
|
+
def size(self) -> int:
|
|
54
|
+
"""Determine the number of bytes this header will take."""
|
|
55
|
+
return self.calc_size(len(self.shape), self.mode == N5Mode.VARLENGTH)
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def from_bytes(cls, b: bytes) -> Self:
|
|
59
|
+
p = StructParser(b, ">")
|
|
60
|
+
mode_num, ndim = p.unpack("HH")
|
|
61
|
+
mode = N5Mode(mode_num)
|
|
62
|
+
|
|
63
|
+
shape = p.unpack("I" * ndim)
|
|
64
|
+
|
|
65
|
+
if mode == N5Mode.VARLENGTH:
|
|
66
|
+
numel = p.unpack("I")[0]
|
|
67
|
+
else:
|
|
68
|
+
numel = None
|
|
69
|
+
|
|
70
|
+
return cls(mode=mode, shape=shape, num_elem=numel)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def ndim(self):
|
|
74
|
+
return len(self.shape)
|
|
75
|
+
|
|
76
|
+
def to_bytes(self) -> bytes:
|
|
77
|
+
fmt = ">HH" + "I" * self.ndim
|
|
78
|
+
args = [self.mode, self.ndim, *self.shape]
|
|
79
|
+
if self.num_elem is not None:
|
|
80
|
+
fmt += "I"
|
|
81
|
+
args.append(self.num_elem)
|
|
82
|
+
return struct.pack(fmt, *args)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class StructParser:
|
|
86
|
+
def __init__(self, buf: bytes, endian: str = "") -> None:
|
|
87
|
+
self.endian = endian
|
|
88
|
+
self.buf = buf
|
|
89
|
+
self.offset = 0
|
|
90
|
+
|
|
91
|
+
def unpack(self, fmt: str) -> tuple[Any, ...]:
|
|
92
|
+
fmt = self.endian + fmt
|
|
93
|
+
sz = struct.calcsize(fmt)
|
|
94
|
+
out = struct.unpack(fmt, self.buf[self.offset : self.offset + sz])
|
|
95
|
+
self.offset += sz
|
|
96
|
+
return out
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def slice_buf(b: bytes, byte_range: ByteRequest | None = None) -> bytes:
|
|
100
|
+
"""Optionally slice a byte buffer."""
|
|
101
|
+
if byte_range is None:
|
|
102
|
+
return b
|
|
103
|
+
elif isinstance(byte_range, RangeByteRequest):
|
|
104
|
+
b = b[byte_range.start : byte_range.end]
|
|
105
|
+
elif isinstance(byte_range, OffsetByteRequest):
|
|
106
|
+
b = b[byte_range.offset :]
|
|
107
|
+
elif isinstance(byte_range, SuffixByteRequest):
|
|
108
|
+
b = b[-byte_range.suffix :]
|
|
109
|
+
|
|
110
|
+
raise TypeError(f"byte_range argument has unknown type {type(byte_range)}")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def is_zarr3_metadata(key: str):
|
|
114
|
+
"""Whether a key belongs to a Zarr v3 metadata object."""
|
|
115
|
+
return key.split("/")[-1] == ZARR_V3_METADATA_KEY
|