zarrs 0.1.1__cp311-abi3-macosx_10_12_x86_64.whl → 0.2.2__cp311-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zarrs/__init__.py +0 -4
- zarrs/_internal.abi3.so +0 -0
- zarrs/_internal.pyi +24 -37
- zarrs/pipeline.py +154 -59
- zarrs/utils.py +45 -22
- {zarrs-0.1.1.dist-info → zarrs-0.2.2.dist-info}/METADATA +27 -52
- zarrs-0.2.2.dist-info/RECORD +11 -0
- {zarrs-0.1.1.dist-info → zarrs-0.2.2.dist-info}/WHEEL +1 -1
- zarrs-0.2.2.dist-info/entry_points.txt +2 -0
- zarrs-0.1.1.dist-info/RECORD +0 -10
- {zarrs-0.1.1.dist-info → zarrs-0.2.2.dist-info}/licenses/LICENSE +0 -0
zarrs/__init__.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from zarr.registry import register_pipeline
|
|
2
|
-
|
|
3
1
|
from ._internal import __version__
|
|
4
2
|
from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
|
|
5
3
|
from .utils import CollapsedDimensionError, DiscontiguousArrayError
|
|
@@ -10,8 +8,6 @@ class ZarrsCodecPipeline(_ZarrsCodecPipeline):
|
|
|
10
8
|
pass
|
|
11
9
|
|
|
12
10
|
|
|
13
|
-
register_pipeline(ZarrsCodecPipeline)
|
|
14
|
-
|
|
15
11
|
__all__ = [
|
|
16
12
|
"ZarrsCodecPipeline",
|
|
17
13
|
"DiscontiguousArrayError",
|
zarrs/_internal.abi3.so
CHANGED
|
Binary file
|
zarrs/_internal.pyi
CHANGED
|
@@ -1,57 +1,44 @@
|
|
|
1
1
|
# This file is automatically generated by pyo3_stub_gen
|
|
2
2
|
# ruff: noqa: E501, F401
|
|
3
3
|
|
|
4
|
+
import builtins
|
|
4
5
|
import typing
|
|
5
|
-
from enum import Enum, auto
|
|
6
6
|
|
|
7
|
-
import numpy
|
|
8
7
|
import numpy.typing
|
|
8
|
+
import zarr.abc.store
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
@typing.final
|
|
11
|
+
class ChunkItem:
|
|
12
|
+
def __new__(
|
|
13
|
+
cls,
|
|
14
|
+
key: builtins.str,
|
|
15
|
+
chunk_subset: typing.Sequence[slice],
|
|
16
|
+
chunk_shape: typing.Sequence[builtins.int],
|
|
17
|
+
subset: typing.Sequence[slice],
|
|
18
|
+
shape: typing.Sequence[builtins.int],
|
|
19
|
+
) -> ChunkItem: ...
|
|
13
20
|
|
|
21
|
+
@typing.final
|
|
14
22
|
class CodecPipelineImpl:
|
|
15
23
|
def __new__(
|
|
16
24
|
cls,
|
|
17
|
-
|
|
25
|
+
array_metadata: builtins.str,
|
|
26
|
+
store_config: zarr.abc.store.Store,
|
|
18
27
|
*,
|
|
19
|
-
validate_checksums
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
): ...
|
|
28
|
+
validate_checksums: builtins.bool = False,
|
|
29
|
+
chunk_concurrent_minimum: builtins.int | None = None,
|
|
30
|
+
chunk_concurrent_maximum: builtins.int | None = None,
|
|
31
|
+
num_threads: builtins.int | None = None,
|
|
32
|
+
direct_io: builtins.bool = False,
|
|
33
|
+
) -> CodecPipelineImpl: ...
|
|
25
34
|
def retrieve_chunks_and_apply_index(
|
|
26
35
|
self,
|
|
27
|
-
chunk_descriptions: typing.Sequence[
|
|
36
|
+
chunk_descriptions: typing.Sequence[ChunkItem],
|
|
28
37
|
value: numpy.typing.NDArray[typing.Any],
|
|
29
38
|
) -> None: ...
|
|
30
|
-
def retrieve_chunks(
|
|
31
|
-
self, chunk_descriptions: typing.Sequence[Basic]
|
|
32
|
-
) -> list[numpy.typing.NDArray[numpy.uint8]]: ...
|
|
33
39
|
def store_chunks_with_indices(
|
|
34
40
|
self,
|
|
35
|
-
chunk_descriptions: typing.Sequence[
|
|
41
|
+
chunk_descriptions: typing.Sequence[ChunkItem],
|
|
36
42
|
value: numpy.typing.NDArray[typing.Any],
|
|
43
|
+
write_empty_chunks: builtins.bool,
|
|
37
44
|
) -> None: ...
|
|
38
|
-
|
|
39
|
-
class FilesystemStoreConfig:
|
|
40
|
-
root: str
|
|
41
|
-
|
|
42
|
-
class HttpStoreConfig:
|
|
43
|
-
endpoint: str
|
|
44
|
-
|
|
45
|
-
class WithSubset:
|
|
46
|
-
def __new__(
|
|
47
|
-
cls,
|
|
48
|
-
item: Basic,
|
|
49
|
-
chunk_subset: typing.Sequence[slice],
|
|
50
|
-
subset: typing.Sequence[slice],
|
|
51
|
-
shape: typing.Sequence[int],
|
|
52
|
-
): ...
|
|
53
|
-
...
|
|
54
|
-
|
|
55
|
-
class StoreConfig(Enum):
|
|
56
|
-
Filesystem = auto()
|
|
57
|
-
Http = auto()
|
zarrs/pipeline.py
CHANGED
|
@@ -4,44 +4,84 @@ import asyncio
|
|
|
4
4
|
import json
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import TYPE_CHECKING, TypedDict
|
|
7
|
+
from warnings import warn
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
from zarr.abc.codec import Codec, CodecPipeline
|
|
11
|
+
from zarr.codecs._v2 import V2Codec
|
|
12
|
+
from zarr.core import BatchedCodecPipeline
|
|
10
13
|
from zarr.core.config import config
|
|
14
|
+
from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
|
|
11
15
|
|
|
12
16
|
if TYPE_CHECKING:
|
|
13
17
|
from collections.abc import Iterable, Iterator
|
|
14
|
-
from typing import
|
|
18
|
+
from typing import Self
|
|
15
19
|
|
|
16
|
-
from zarr.abc.store import ByteGetter, ByteSetter
|
|
20
|
+
from zarr.abc.store import ByteGetter, ByteSetter, Store
|
|
17
21
|
from zarr.core.array_spec import ArraySpec
|
|
18
22
|
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
|
|
19
23
|
from zarr.core.chunk_grids import ChunkGrid
|
|
20
|
-
from zarr.core.common import ChunkCoords
|
|
21
24
|
from zarr.core.indexing import SelectorTuple
|
|
25
|
+
from zarr.dtype import ZDType
|
|
22
26
|
|
|
23
27
|
from ._internal import CodecPipelineImpl
|
|
24
28
|
from .utils import (
|
|
25
29
|
CollapsedDimensionError,
|
|
26
30
|
DiscontiguousArrayError,
|
|
27
|
-
|
|
31
|
+
FillValueNoneError,
|
|
28
32
|
make_chunk_info_for_rust_with_indices,
|
|
29
33
|
)
|
|
30
34
|
|
|
31
35
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
36
|
+
class UnsupportedDataTypeError(Exception):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class UnsupportedMetadataError(Exception):
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_codec_pipeline_impl(
|
|
45
|
+
metadata: ArrayMetadata, store: Store, *, strict: bool
|
|
46
|
+
) -> CodecPipelineImpl | None:
|
|
47
|
+
try:
|
|
48
|
+
array_metadata_json = json.dumps(metadata.to_dict())
|
|
49
|
+
# Maintain old behavior: https://github.com/zarrs/zarrs-python/tree/b36ba797cafec77f5f41a25316be02c718a2b4f8?tab=readme-ov-file#configuration
|
|
50
|
+
validate_checksums = config.get("codec_pipeline.validate_checksums", True)
|
|
51
|
+
if validate_checksums is None:
|
|
52
|
+
validate_checksums = True
|
|
53
|
+
return CodecPipelineImpl(
|
|
54
|
+
array_metadata_json,
|
|
55
|
+
store_config=store,
|
|
56
|
+
validate_checksums=validate_checksums,
|
|
57
|
+
chunk_concurrent_minimum=config.get(
|
|
58
|
+
"codec_pipeline.chunk_concurrent_minimum", None
|
|
59
|
+
),
|
|
60
|
+
chunk_concurrent_maximum=config.get(
|
|
61
|
+
"codec_pipeline.chunk_concurrent_maximum", None
|
|
62
|
+
),
|
|
63
|
+
num_threads=config.get("threading.max_workers", None),
|
|
64
|
+
direct_io=config.get("codec_pipeline.direct_io", False),
|
|
65
|
+
)
|
|
66
|
+
except TypeError as e:
|
|
67
|
+
if strict:
|
|
68
|
+
raise UnsupportedMetadataError() from e
|
|
69
|
+
|
|
70
|
+
warn(
|
|
71
|
+
f"Array is unsupported by ZarrsCodecPipeline: {e}",
|
|
72
|
+
category=UserWarning,
|
|
73
|
+
)
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_codec_pipeline_fallback(
|
|
78
|
+
metadata: ArrayMetadata, *, strict: bool
|
|
79
|
+
) -> BatchedCodecPipeline | None:
|
|
80
|
+
if strict:
|
|
81
|
+
return None
|
|
82
|
+
else:
|
|
83
|
+
codecs = array_metadata_to_codecs(metadata)
|
|
84
|
+
return BatchedCodecPipeline.from_codecs(codecs)
|
|
45
85
|
|
|
46
86
|
|
|
47
87
|
class ZarrsCodecPipelineState(TypedDict):
|
|
@@ -49,35 +89,48 @@ class ZarrsCodecPipelineState(TypedDict):
|
|
|
49
89
|
codecs: tuple[Codec, ...]
|
|
50
90
|
|
|
51
91
|
|
|
92
|
+
def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
|
|
93
|
+
if isinstance(metadata, ArrayV3Metadata):
|
|
94
|
+
return metadata.codecs
|
|
95
|
+
elif isinstance(metadata, ArrayV2Metadata):
|
|
96
|
+
v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
|
|
97
|
+
return [v2_codec]
|
|
98
|
+
|
|
99
|
+
|
|
52
100
|
@dataclass
|
|
53
101
|
class ZarrsCodecPipeline(CodecPipeline):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
102
|
+
metadata: ArrayMetadata
|
|
103
|
+
store: Store
|
|
104
|
+
impl: CodecPipelineImpl | None
|
|
105
|
+
python_impl: BatchedCodecPipeline | None
|
|
57
106
|
|
|
58
107
|
def __getstate__(self) -> ZarrsCodecPipelineState:
|
|
59
|
-
return {"
|
|
108
|
+
return {"metadata": self.metadata, "store": self.store}
|
|
60
109
|
|
|
61
110
|
def __setstate__(self, state: ZarrsCodecPipelineState):
|
|
62
|
-
self.
|
|
63
|
-
self.
|
|
64
|
-
|
|
111
|
+
self.metadata = state["metadata"]
|
|
112
|
+
self.store = state["store"]
|
|
113
|
+
strict = config.get("codec_pipeline.strict", False)
|
|
114
|
+
self.impl = get_codec_pipeline_impl(self.metadata, self.store, strict=strict)
|
|
115
|
+
self.python_impl = get_codec_pipeline_fallback(self.metadata, strict=strict)
|
|
65
116
|
|
|
66
117
|
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
|
|
67
|
-
|
|
118
|
+
return self
|
|
68
119
|
|
|
69
120
|
@classmethod
|
|
70
121
|
def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
122
|
+
return BatchedCodecPipeline.from_codecs(codecs)
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def from_array_metadata_and_store(
|
|
126
|
+
cls, array_metadata: ArrayMetadata, store: Store
|
|
127
|
+
) -> Self:
|
|
128
|
+
strict = config.get("codec_pipeline.strict", False)
|
|
77
129
|
return cls(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
impl=get_codec_pipeline_impl(
|
|
130
|
+
metadata=array_metadata,
|
|
131
|
+
store=store,
|
|
132
|
+
impl=get_codec_pipeline_impl(array_metadata, store, strict=strict),
|
|
133
|
+
python_impl=get_codec_pipeline_fallback(array_metadata, strict=strict),
|
|
81
134
|
)
|
|
82
135
|
|
|
83
136
|
@property
|
|
@@ -92,7 +145,7 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
92
145
|
yield from self.codecs
|
|
93
146
|
|
|
94
147
|
def validate(
|
|
95
|
-
self, *, shape:
|
|
148
|
+
self, *, shape: tuple[int, ...], dtype: ZDType, chunk_grid: ChunkGrid
|
|
96
149
|
) -> None:
|
|
97
150
|
raise NotImplementedError("validate")
|
|
98
151
|
|
|
@@ -114,52 +167,94 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
114
167
|
async def read(
|
|
115
168
|
self,
|
|
116
169
|
batch_info: Iterable[
|
|
117
|
-
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
170
|
+
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
118
171
|
],
|
|
119
172
|
out: NDBuffer, # type: ignore
|
|
120
173
|
drop_axes: tuple[int, ...] = (), # FIXME: unused
|
|
121
174
|
) -> None:
|
|
122
175
|
# FIXME: Error if array is not in host memory
|
|
123
|
-
out: NDArrayLike = out.as_ndarray_like()
|
|
124
176
|
if not out.dtype.isnative:
|
|
125
177
|
raise RuntimeError("Non-native byte order not supported")
|
|
126
178
|
try:
|
|
179
|
+
if self.impl is None:
|
|
180
|
+
raise UnsupportedMetadataError()
|
|
181
|
+
self._raise_error_on_unsupported_batch_dtype(batch_info)
|
|
127
182
|
chunks_desc = make_chunk_info_for_rust_with_indices(
|
|
128
183
|
batch_info, drop_axes, out.shape
|
|
129
184
|
)
|
|
130
|
-
except (
|
|
131
|
-
|
|
185
|
+
except (
|
|
186
|
+
UnsupportedMetadataError,
|
|
187
|
+
DiscontiguousArrayError,
|
|
188
|
+
CollapsedDimensionError,
|
|
189
|
+
UnsupportedDataTypeError,
|
|
190
|
+
FillValueNoneError,
|
|
191
|
+
):
|
|
192
|
+
if self.python_impl is None:
|
|
193
|
+
raise
|
|
194
|
+
await self.python_impl.read(batch_info, out, drop_axes)
|
|
195
|
+
return None
|
|
132
196
|
else:
|
|
197
|
+
out: NDArrayLike = out.as_ndarray_like()
|
|
133
198
|
await asyncio.to_thread(
|
|
134
199
|
self.impl.retrieve_chunks_and_apply_index,
|
|
135
|
-
chunks_desc,
|
|
200
|
+
chunks_desc.chunk_info_with_indices,
|
|
136
201
|
out,
|
|
137
202
|
)
|
|
138
203
|
return None
|
|
139
|
-
chunks = await asyncio.to_thread(self.impl.retrieve_chunks, chunks_desc)
|
|
140
|
-
for chunk, (_, spec, selection, out_selection) in zip(chunks, batch_info):
|
|
141
|
-
chunk_reshaped = chunk.view(spec.dtype).reshape(spec.shape)
|
|
142
|
-
chunk_selected = chunk_reshaped[selection]
|
|
143
|
-
if drop_axes:
|
|
144
|
-
chunk_selected = np.squeeze(chunk_selected, axis=drop_axes)
|
|
145
|
-
out[out_selection] = chunk_selected
|
|
146
204
|
|
|
147
205
|
async def write(
|
|
148
206
|
self,
|
|
149
207
|
batch_info: Iterable[
|
|
150
|
-
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
208
|
+
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
151
209
|
],
|
|
152
210
|
value: NDBuffer, # type: ignore
|
|
153
211
|
drop_axes: tuple[int, ...] = (),
|
|
154
212
|
) -> None:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
213
|
+
try:
|
|
214
|
+
if self.impl is None:
|
|
215
|
+
raise UnsupportedMetadataError()
|
|
216
|
+
self._raise_error_on_unsupported_batch_dtype(batch_info)
|
|
217
|
+
chunks_desc = make_chunk_info_for_rust_with_indices(
|
|
218
|
+
batch_info, drop_axes, value.shape
|
|
219
|
+
)
|
|
220
|
+
except (
|
|
221
|
+
UnsupportedMetadataError,
|
|
222
|
+
DiscontiguousArrayError,
|
|
223
|
+
CollapsedDimensionError,
|
|
224
|
+
UnsupportedDataTypeError,
|
|
225
|
+
FillValueNoneError,
|
|
226
|
+
):
|
|
227
|
+
if self.python_impl is None:
|
|
228
|
+
raise
|
|
229
|
+
await self.python_impl.write(batch_info, value, drop_axes)
|
|
230
|
+
return None
|
|
231
|
+
else:
|
|
232
|
+
# FIXME: Error if array is not in host memory
|
|
233
|
+
value_np: NDArrayLike | np.ndarray = value.as_ndarray_like()
|
|
234
|
+
if not value_np.dtype.isnative:
|
|
235
|
+
value_np = np.ascontiguousarray(
|
|
236
|
+
value_np, dtype=value_np.dtype.newbyteorder("=")
|
|
237
|
+
)
|
|
238
|
+
elif not value_np.flags.c_contiguous:
|
|
239
|
+
value_np = np.ascontiguousarray(value_np)
|
|
240
|
+
await asyncio.to_thread(
|
|
241
|
+
self.impl.store_chunks_with_indices,
|
|
242
|
+
chunks_desc.chunk_info_with_indices,
|
|
243
|
+
value_np,
|
|
244
|
+
chunks_desc.write_empty_chunks,
|
|
245
|
+
)
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
def _raise_error_on_unsupported_batch_dtype(
|
|
249
|
+
self,
|
|
250
|
+
batch_info: Iterable[
|
|
251
|
+
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
252
|
+
],
|
|
253
|
+
):
|
|
254
|
+
# https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
|
|
255
|
+
# Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
|
|
256
|
+
if any(
|
|
257
|
+
info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
|
|
258
|
+
for (_, info, _, _, _) in batch_info
|
|
259
|
+
):
|
|
260
|
+
raise UnsupportedDataTypeError()
|
zarrs/utils.py
CHANGED
|
@@ -2,20 +2,22 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
4
|
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
5
6
|
from functools import reduce
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
10
|
+
from zarr.core.array_spec import ArraySpec
|
|
9
11
|
from zarr.core.indexing import SelectorTuple, is_integer
|
|
10
12
|
|
|
11
|
-
from zarrs._internal import
|
|
13
|
+
from zarrs._internal import ChunkItem
|
|
12
14
|
|
|
13
15
|
if TYPE_CHECKING:
|
|
14
16
|
from collections.abc import Iterable
|
|
15
17
|
from types import EllipsisType
|
|
16
18
|
|
|
17
19
|
from zarr.abc.store import ByteGetter, ByteSetter
|
|
18
|
-
from zarr.
|
|
20
|
+
from zarr.dtype import ZDType
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
# adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
|
|
@@ -31,6 +33,10 @@ class CollapsedDimensionError(Exception):
|
|
|
31
33
|
pass
|
|
32
34
|
|
|
33
35
|
|
|
36
|
+
class FillValueNoneError(Exception):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
34
40
|
# This is a (mostly) copy of the function from zarr.core.indexing that fixes:
|
|
35
41
|
# DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated
|
|
36
42
|
# TODO: Upstream this fix
|
|
@@ -134,17 +140,44 @@ def get_shape_for_selector(
|
|
|
134
140
|
return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
|
|
135
141
|
|
|
136
142
|
|
|
143
|
+
def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
|
|
144
|
+
if fill_value is None:
|
|
145
|
+
fill_value = dtype.default_scalar()
|
|
146
|
+
return fill_value
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass(frozen=True)
|
|
150
|
+
class RustChunkInfo:
|
|
151
|
+
chunk_info_with_indices: list[ChunkItem]
|
|
152
|
+
write_empty_chunks: bool
|
|
153
|
+
|
|
154
|
+
|
|
137
155
|
def make_chunk_info_for_rust_with_indices(
|
|
138
156
|
batch_info: Iterable[
|
|
139
|
-
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
157
|
+
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
140
158
|
],
|
|
141
159
|
drop_axes: tuple[int, ...],
|
|
142
160
|
shape: tuple[int, ...],
|
|
143
|
-
) ->
|
|
161
|
+
) -> RustChunkInfo:
|
|
144
162
|
shape = shape if shape else (1,) # constant array
|
|
145
|
-
chunk_info_with_indices: list[
|
|
146
|
-
|
|
147
|
-
|
|
163
|
+
chunk_info_with_indices: list[ChunkItem] = []
|
|
164
|
+
write_empty_chunks: bool = True
|
|
165
|
+
for (
|
|
166
|
+
byte_getter,
|
|
167
|
+
chunk_spec,
|
|
168
|
+
chunk_selection,
|
|
169
|
+
out_selection,
|
|
170
|
+
_,
|
|
171
|
+
) in batch_info:
|
|
172
|
+
write_empty_chunks = chunk_spec.config.write_empty_chunks
|
|
173
|
+
if chunk_spec.fill_value is None:
|
|
174
|
+
chunk_spec = ArraySpec(
|
|
175
|
+
chunk_spec.shape,
|
|
176
|
+
chunk_spec.dtype,
|
|
177
|
+
get_implicit_fill_value(chunk_spec.dtype, chunk_spec.fill_value),
|
|
178
|
+
chunk_spec.config,
|
|
179
|
+
chunk_spec.prototype,
|
|
180
|
+
)
|
|
148
181
|
out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
|
|
149
182
|
chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
|
|
150
183
|
shape_chunk_selection_slices = get_shape_for_selector(
|
|
@@ -161,22 +194,12 @@ def make_chunk_info_for_rust_with_indices(
|
|
|
161
194
|
f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
|
|
162
195
|
)
|
|
163
196
|
chunk_info_with_indices.append(
|
|
164
|
-
|
|
165
|
-
|
|
197
|
+
ChunkItem(
|
|
198
|
+
key=byte_getter.path,
|
|
166
199
|
chunk_subset=chunk_selection_as_slices,
|
|
200
|
+
chunk_shape=chunk_spec.shape,
|
|
167
201
|
subset=out_selection_as_slices,
|
|
168
202
|
shape=shape,
|
|
169
203
|
)
|
|
170
204
|
)
|
|
171
|
-
return chunk_info_with_indices
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def make_chunk_info_for_rust(
|
|
175
|
-
batch_info: Iterable[
|
|
176
|
-
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
177
|
-
],
|
|
178
|
-
) -> list[Basic]:
|
|
179
|
-
return [
|
|
180
|
-
Basic(byte_interface, chunk_spec)
|
|
181
|
-
for (byte_interface, chunk_spec, _, _) in batch_info
|
|
182
|
-
]
|
|
205
|
+
return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
|
|
@@ -1,47 +1,16 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: zarrs
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
7
|
Classifier: Typing :: Typed
|
|
8
|
-
Requires-Dist:
|
|
9
|
-
Requires-Dist:
|
|
10
|
-
Requires-Dist: fasteners
|
|
11
|
-
Requires-Dist: numcodecs[msgpack] >=0.10.0
|
|
12
|
-
Requires-Dist: fsspec >2024
|
|
13
|
-
Requires-Dist: crc32c
|
|
14
|
-
Requires-Dist: zstandard
|
|
15
|
-
Requires-Dist: typing-extensions
|
|
16
|
-
Requires-Dist: donfig
|
|
17
|
-
Requires-Dist: pytest
|
|
18
|
-
Requires-Dist: universal-pathlib >=0.2.0
|
|
19
|
-
Requires-Dist: zarr >=3.0.0
|
|
20
|
-
Requires-Dist: coverage ; extra == 'test'
|
|
21
|
-
Requires-Dist: pytest ; extra == 'test'
|
|
22
|
-
Requires-Dist: pytest-cov ; extra == 'test'
|
|
23
|
-
Requires-Dist: msgpack ; extra == 'test'
|
|
24
|
-
Requires-Dist: lmdb ; extra == 'test'
|
|
25
|
-
Requires-Dist: s3fs ; extra == 'test'
|
|
26
|
-
Requires-Dist: pytest-asyncio ; extra == 'test'
|
|
27
|
-
Requires-Dist: moto[s3] ; extra == 'test'
|
|
28
|
-
Requires-Dist: flask-cors ; extra == 'test'
|
|
29
|
-
Requires-Dist: flask ; extra == 'test'
|
|
30
|
-
Requires-Dist: requests ; extra == 'test'
|
|
31
|
-
Requires-Dist: mypy ; extra == 'test'
|
|
32
|
-
Requires-Dist: hypothesis ; extra == 'test'
|
|
33
|
-
Requires-Dist: pytest-xdist ; extra == 'test'
|
|
34
|
-
Requires-Dist: maturin ; extra == 'dev'
|
|
35
|
-
Requires-Dist: pip ; extra == 'dev'
|
|
36
|
-
Requires-Dist: pre-commit ; extra == 'dev'
|
|
37
|
-
Requires-Dist: sphinx >=7.4.6 ; extra == 'doc'
|
|
38
|
-
Requires-Dist: myst-parser ; extra == 'doc'
|
|
39
|
-
Provides-Extra: test
|
|
40
|
-
Provides-Extra: dev
|
|
41
|
-
Provides-Extra: doc
|
|
8
|
+
Requires-Dist: numpy>=1.24
|
|
9
|
+
Requires-Dist: zarr>=3.1
|
|
42
10
|
License-File: LICENSE
|
|
11
|
+
Summary: A CodecPipeline for zarr-python backed by the zarrs Rust crate
|
|
43
12
|
Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
|
|
44
|
-
License: MIT
|
|
13
|
+
License-Expression: MIT
|
|
45
14
|
Requires-Python: >=3.11
|
|
46
15
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
47
16
|
|
|
@@ -50,9 +19,9 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
50
19
|
[](https://pypi.org/project/zarrs)
|
|
51
20
|
[](https://pepy.tech/project/zarrs)
|
|
52
21
|
[](https://pepy.tech/project/zarrs)
|
|
53
|
-
[](https://github.com/zarrs/zarrs-python/stargazers)
|
|
23
|
+

|
|
24
|
+

|
|
56
25
|
|
|
57
26
|
This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) (Rust) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) (`zarr-python`) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o (see [`zarr_benchmarks`](https://github.com/LDeakin/zarr_benchmarks)).
|
|
58
27
|
|
|
@@ -60,7 +29,6 @@ To use the project, simply install our package (which depends on `zarr-python>=3
|
|
|
60
29
|
|
|
61
30
|
```python
|
|
62
31
|
import zarr
|
|
63
|
-
import zarrs
|
|
64
32
|
zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
|
|
65
33
|
```
|
|
66
34
|
|
|
@@ -68,16 +36,15 @@ You can then use your `zarr` as normal (with some caveats)!
|
|
|
68
36
|
|
|
69
37
|
## API
|
|
70
38
|
|
|
71
|
-
We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here.
|
|
39
|
+
We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here.
|
|
72
40
|
|
|
73
41
|
At the moment, we only support a subset of the `zarr-python` stores:
|
|
74
42
|
|
|
75
|
-
- [
|
|
76
|
-
- [
|
|
77
|
-
|
|
43
|
+
- [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem)
|
|
44
|
+
- [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage)
|
|
45
|
+
- [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore)
|
|
78
46
|
|
|
79
47
|
A `NotImplementedError` will be raised if a store is not supported.
|
|
80
|
-
We intend to support more stores in the future: https://github.com/ilan-gold/zarrs-python/issues/44.
|
|
81
48
|
|
|
82
49
|
### Configuration
|
|
83
50
|
|
|
@@ -95,7 +62,11 @@ The `ZarrsCodecPipeline` specific options are:
|
|
|
95
62
|
- `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
|
|
96
63
|
- Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
|
|
97
64
|
- `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
|
|
98
|
-
- Defaults to
|
|
65
|
+
- Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
|
|
66
|
+
- `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
|
|
67
|
+
- Defaults to `False`.
|
|
68
|
+
- `codec_pipeline.strict`: raise exceptions for unsupported operations instead of falling back to the default codec pipeline of `zarr-python`.
|
|
69
|
+
- Defaults to `False`.
|
|
99
70
|
|
|
100
71
|
For example:
|
|
101
72
|
```python
|
|
@@ -105,14 +76,15 @@ zarr.config.set({
|
|
|
105
76
|
"codec_pipeline": {
|
|
106
77
|
"path": "zarrs.ZarrsCodecPipeline",
|
|
107
78
|
"validate_checksums": True,
|
|
108
|
-
"store_empty_chunks": False,
|
|
109
79
|
"chunk_concurrent_maximum": None,
|
|
110
80
|
"chunk_concurrent_minimum": 4,
|
|
81
|
+
"direct_io": False,
|
|
82
|
+
"strict": False
|
|
111
83
|
}
|
|
112
84
|
})
|
|
113
85
|
```
|
|
114
86
|
|
|
115
|
-
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `
|
|
87
|
+
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
|
|
116
88
|
|
|
117
89
|
## Concurrency
|
|
118
90
|
|
|
@@ -133,7 +105,7 @@ Chunk concurrency is typically favored because:
|
|
|
133
105
|
|
|
134
106
|
## Supported Indexing Methods
|
|
135
107
|
|
|
136
|
-
|
|
108
|
+
The following methods will trigger use with the old zarr-python pipeline:
|
|
137
109
|
|
|
138
110
|
1. Any `oindex` or `vindex` integer `np.ndarray` indexing with dimensionality >=3 i.e.,
|
|
139
111
|
|
|
@@ -163,7 +135,10 @@ We **do not** officially support the following indexing methods. Some of these
|
|
|
163
135
|
arr[0:10, ..., 0:5]
|
|
164
136
|
```
|
|
165
137
|
|
|
166
|
-
Otherwise, we believe that we support your indexing case: slices, ints, and all integer `np.ndarray` indices in 2D for reading, contiguous integer `np.ndarray` indices along one axis for writing etc. Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot of these issues!
|
|
167
138
|
|
|
168
|
-
|
|
139
|
+
Furthermore, using anything except contiguous (i.e., slices or consecutive integer) `np.ndarray` for numeric data will fall back to the default `zarr-python` implementation.
|
|
140
|
+
|
|
141
|
+
Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot the use of the rust pipeline for that use-case (very useful for mini-batch training perhaps!).
|
|
142
|
+
|
|
143
|
+
Further, any codecs not supported by `zarrs` will also automatically fall back to the python implementation.
|
|
169
144
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
zarrs-0.2.2.dist-info/METADATA,sha256=8K1AOS_SVQgRLzQ2rWtNbJxCAb12XmTLRrT6QklQgOI,8054
|
|
2
|
+
zarrs-0.2.2.dist-info/WHEEL,sha256=N8W3-0eDM6igWj-H12r7VkxoMaJIqJLxUyWCFstEaGg,105
|
|
3
|
+
zarrs-0.2.2.dist-info/entry_points.txt,sha256=EzI6yCIUPDHBHzjDdexuGGYbOLXf8x2ICokOJXnuX3k,68
|
|
4
|
+
zarrs-0.2.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
|
|
5
|
+
zarrs/__init__.py,sha256=lRVtAPzCzJkGs4vQrW4UgANq-pC-khS0ZF7HTj4__Hg,489
|
|
6
|
+
zarrs/_internal.abi3.so,sha256=tnP5IiuDmhfwB15cX4yTyu1mVZgShtJy1lnS87TzK1o,14928348
|
|
7
|
+
zarrs/_internal.pyi,sha256=a_D4yx99r4xeQX1ntY_A_Q4wVmLeLwJZHWAQV_mVu9A,1308
|
|
8
|
+
zarrs/pipeline.py,sha256=YfB13GWNfxELerXVtJ_ipFwSL7bN-YuPys6jCB9lnms,9008
|
|
9
|
+
zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
zarrs/utils.py,sha256=W2XCjJDVrdHYJgtVaRKN533Ljw1MF7o0YwXuz5ZAk2g,7020
|
|
11
|
+
zarrs-0.2.2.dist-info/RECORD,,
|
zarrs-0.1.1.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
zarrs-0.1.1.dist-info/METADATA,sha256=jwXKTiA6MIVeg8i_zrb3I4H7GTsqG5lA-H8GicZUDTE,9215
|
|
2
|
-
zarrs-0.1.1.dist-info/WHEEL,sha256=LZygbeT1PTQw7a9tONPp78bbG4FZc86U59Z0RFJcoR8,105
|
|
3
|
-
zarrs-0.1.1.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
|
|
4
|
-
zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
|
|
5
|
-
zarrs/_internal.pyi,sha256=fWWKwTSf8bHFlK6XusyHlDzahTlcjcfth6c1nJyrYdE,1375
|
|
6
|
-
zarrs/utils.py,sha256=PNZWAjU46s2_ZjWN4cxr2JsZHl88H2NAiaPzFf7SPUE,6403
|
|
7
|
-
zarrs/pipeline.py,sha256=M2r7QIauWIyQrNkd722osMZOeQ1LqYP1aX8SypplaAY,6021
|
|
8
|
-
zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
zarrs/_internal.abi3.so,sha256=fUxAxUdypeC3THpaKKO1nppCP0traxV7bFsmGB63oLc,6804296
|
|
10
|
-
zarrs-0.1.1.dist-info/RECORD,,
|
|
File without changes
|