zarrs 0.1.2__cp311-abi3-macosx_11_0_arm64.whl → 0.2.2__cp311-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zarrs/__init__.py +0 -4
- zarrs/_internal.abi3.so +0 -0
- zarrs/_internal.pyi +24 -33
- zarrs/pipeline.py +83 -64
- zarrs/utils.py +27 -12
- {zarrs-0.1.2.dist-info → zarrs-0.2.2.dist-info}/METADATA +20 -48
- zarrs-0.2.2.dist-info/RECORD +11 -0
- {zarrs-0.1.2.dist-info → zarrs-0.2.2.dist-info}/WHEEL +1 -1
- zarrs-0.2.2.dist-info/entry_points.txt +2 -0
- zarrs-0.1.2.dist-info/RECORD +0 -10
- {zarrs-0.1.2.dist-info → zarrs-0.2.2.dist-info}/licenses/LICENSE +0 -0
zarrs/__init__.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from zarr.registry import register_pipeline
|
|
2
|
-
|
|
3
1
|
from ._internal import __version__
|
|
4
2
|
from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
|
|
5
3
|
from .utils import CollapsedDimensionError, DiscontiguousArrayError
|
|
@@ -10,8 +8,6 @@ class ZarrsCodecPipeline(_ZarrsCodecPipeline):
|
|
|
10
8
|
pass
|
|
11
9
|
|
|
12
10
|
|
|
13
|
-
register_pipeline(ZarrsCodecPipeline)
|
|
14
|
-
|
|
15
11
|
__all__ = [
|
|
16
12
|
"ZarrsCodecPipeline",
|
|
17
13
|
"DiscontiguousArrayError",
|
zarrs/_internal.abi3.so
CHANGED
|
Binary file
|
zarrs/_internal.pyi
CHANGED
|
@@ -1,53 +1,44 @@
|
|
|
1
1
|
# This file is automatically generated by pyo3_stub_gen
|
|
2
2
|
# ruff: noqa: E501, F401
|
|
3
3
|
|
|
4
|
+
import builtins
|
|
4
5
|
import typing
|
|
5
|
-
from enum import Enum, auto
|
|
6
6
|
|
|
7
7
|
import numpy.typing
|
|
8
|
+
import zarr.abc.store
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
@typing.final
|
|
11
|
+
class ChunkItem:
|
|
12
|
+
def __new__(
|
|
13
|
+
cls,
|
|
14
|
+
key: builtins.str,
|
|
15
|
+
chunk_subset: typing.Sequence[slice],
|
|
16
|
+
chunk_shape: typing.Sequence[builtins.int],
|
|
17
|
+
subset: typing.Sequence[slice],
|
|
18
|
+
shape: typing.Sequence[builtins.int],
|
|
19
|
+
) -> ChunkItem: ...
|
|
12
20
|
|
|
21
|
+
@typing.final
|
|
13
22
|
class CodecPipelineImpl:
|
|
14
23
|
def __new__(
|
|
15
24
|
cls,
|
|
16
|
-
|
|
25
|
+
array_metadata: builtins.str,
|
|
26
|
+
store_config: zarr.abc.store.Store,
|
|
17
27
|
*,
|
|
18
|
-
validate_checksums
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
): ...
|
|
28
|
+
validate_checksums: builtins.bool = False,
|
|
29
|
+
chunk_concurrent_minimum: builtins.int | None = None,
|
|
30
|
+
chunk_concurrent_maximum: builtins.int | None = None,
|
|
31
|
+
num_threads: builtins.int | None = None,
|
|
32
|
+
direct_io: builtins.bool = False,
|
|
33
|
+
) -> CodecPipelineImpl: ...
|
|
24
34
|
def retrieve_chunks_and_apply_index(
|
|
25
35
|
self,
|
|
26
|
-
chunk_descriptions: typing.Sequence[
|
|
36
|
+
chunk_descriptions: typing.Sequence[ChunkItem],
|
|
27
37
|
value: numpy.typing.NDArray[typing.Any],
|
|
28
38
|
) -> None: ...
|
|
29
39
|
def store_chunks_with_indices(
|
|
30
40
|
self,
|
|
31
|
-
chunk_descriptions: typing.Sequence[
|
|
41
|
+
chunk_descriptions: typing.Sequence[ChunkItem],
|
|
32
42
|
value: numpy.typing.NDArray[typing.Any],
|
|
43
|
+
write_empty_chunks: builtins.bool,
|
|
33
44
|
) -> None: ...
|
|
34
|
-
|
|
35
|
-
class FilesystemStoreConfig:
|
|
36
|
-
root: str
|
|
37
|
-
|
|
38
|
-
class HttpStoreConfig:
|
|
39
|
-
endpoint: str
|
|
40
|
-
|
|
41
|
-
class WithSubset:
|
|
42
|
-
def __new__(
|
|
43
|
-
cls,
|
|
44
|
-
item: Basic,
|
|
45
|
-
chunk_subset: typing.Sequence[slice],
|
|
46
|
-
subset: typing.Sequence[slice],
|
|
47
|
-
shape: typing.Sequence[int],
|
|
48
|
-
): ...
|
|
49
|
-
...
|
|
50
|
-
|
|
51
|
-
class StoreConfig(Enum):
|
|
52
|
-
Filesystem = auto()
|
|
53
|
-
Http = auto()
|
zarrs/pipeline.py
CHANGED
|
@@ -2,27 +2,29 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
-
import re
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from typing import TYPE_CHECKING, TypedDict
|
|
7
|
+
from warnings import warn
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
from zarr.abc.codec import Codec, CodecPipeline
|
|
11
|
+
from zarr.codecs._v2 import V2Codec
|
|
11
12
|
from zarr.core import BatchedCodecPipeline
|
|
12
13
|
from zarr.core.config import config
|
|
14
|
+
from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
|
|
13
15
|
|
|
14
16
|
if TYPE_CHECKING:
|
|
15
|
-
from collections.abc import
|
|
16
|
-
from typing import
|
|
17
|
+
from collections.abc import Iterable, Iterator
|
|
18
|
+
from typing import Self
|
|
17
19
|
|
|
18
|
-
from zarr.abc.store import ByteGetter, ByteSetter
|
|
20
|
+
from zarr.abc.store import ByteGetter, ByteSetter, Store
|
|
19
21
|
from zarr.core.array_spec import ArraySpec
|
|
20
22
|
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
|
|
21
23
|
from zarr.core.chunk_grids import ChunkGrid
|
|
22
|
-
from zarr.core.common import ChunkCoords
|
|
23
24
|
from zarr.core.indexing import SelectorTuple
|
|
25
|
+
from zarr.dtype import ZDType
|
|
24
26
|
|
|
25
|
-
from ._internal import CodecPipelineImpl
|
|
27
|
+
from ._internal import CodecPipelineImpl
|
|
26
28
|
from .utils import (
|
|
27
29
|
CollapsedDimensionError,
|
|
28
30
|
DiscontiguousArrayError,
|
|
@@ -39,12 +41,19 @@ class UnsupportedMetadataError(Exception):
|
|
|
39
41
|
pass
|
|
40
42
|
|
|
41
43
|
|
|
42
|
-
def get_codec_pipeline_impl(
|
|
44
|
+
def get_codec_pipeline_impl(
|
|
45
|
+
metadata: ArrayMetadata, store: Store, *, strict: bool
|
|
46
|
+
) -> CodecPipelineImpl | None:
|
|
43
47
|
try:
|
|
48
|
+
array_metadata_json = json.dumps(metadata.to_dict())
|
|
49
|
+
# Maintain old behavior: https://github.com/zarrs/zarrs-python/tree/b36ba797cafec77f5f41a25316be02c718a2b4f8?tab=readme-ov-file#configuration
|
|
50
|
+
validate_checksums = config.get("codec_pipeline.validate_checksums", True)
|
|
51
|
+
if validate_checksums is None:
|
|
52
|
+
validate_checksums = True
|
|
44
53
|
return CodecPipelineImpl(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
54
|
+
array_metadata_json,
|
|
55
|
+
store_config=store,
|
|
56
|
+
validate_checksums=validate_checksums,
|
|
48
57
|
chunk_concurrent_minimum=config.get(
|
|
49
58
|
"codec_pipeline.chunk_concurrent_minimum", None
|
|
50
59
|
),
|
|
@@ -52,34 +61,27 @@ def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | Non
|
|
|
52
61
|
"codec_pipeline.chunk_concurrent_maximum", None
|
|
53
62
|
),
|
|
54
63
|
num_threads=config.get("threading.max_workers", None),
|
|
64
|
+
direct_io=config.get("codec_pipeline.direct_io", False),
|
|
55
65
|
)
|
|
56
66
|
except TypeError as e:
|
|
57
|
-
if
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
compressor = json.dumps(codec_dict.get("compressor").get_config())
|
|
76
|
-
else:
|
|
77
|
-
compressor = None
|
|
78
|
-
codecs_v3 = codec_metadata_v2_to_v3(filters, compressor)
|
|
79
|
-
for codec in codecs_v3:
|
|
80
|
-
yield json.loads(codec)
|
|
81
|
-
else:
|
|
82
|
-
yield codec.to_dict()
|
|
67
|
+
if strict:
|
|
68
|
+
raise UnsupportedMetadataError() from e
|
|
69
|
+
|
|
70
|
+
warn(
|
|
71
|
+
f"Array is unsupported by ZarrsCodecPipeline: {e}",
|
|
72
|
+
category=UserWarning,
|
|
73
|
+
)
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_codec_pipeline_fallback(
|
|
78
|
+
metadata: ArrayMetadata, *, strict: bool
|
|
79
|
+
) -> BatchedCodecPipeline | None:
|
|
80
|
+
if strict:
|
|
81
|
+
return None
|
|
82
|
+
else:
|
|
83
|
+
codecs = array_metadata_to_codecs(metadata)
|
|
84
|
+
return BatchedCodecPipeline.from_codecs(codecs)
|
|
83
85
|
|
|
84
86
|
|
|
85
87
|
class ZarrsCodecPipelineState(TypedDict):
|
|
@@ -87,38 +89,48 @@ class ZarrsCodecPipelineState(TypedDict):
|
|
|
87
89
|
codecs: tuple[Codec, ...]
|
|
88
90
|
|
|
89
91
|
|
|
92
|
+
def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
|
|
93
|
+
if isinstance(metadata, ArrayV3Metadata):
|
|
94
|
+
return metadata.codecs
|
|
95
|
+
elif isinstance(metadata, ArrayV2Metadata):
|
|
96
|
+
v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
|
|
97
|
+
return [v2_codec]
|
|
98
|
+
|
|
99
|
+
|
|
90
100
|
@dataclass
|
|
91
101
|
class ZarrsCodecPipeline(CodecPipeline):
|
|
92
|
-
|
|
102
|
+
metadata: ArrayMetadata
|
|
103
|
+
store: Store
|
|
93
104
|
impl: CodecPipelineImpl | None
|
|
94
|
-
|
|
95
|
-
python_impl: BatchedCodecPipeline
|
|
105
|
+
python_impl: BatchedCodecPipeline | None
|
|
96
106
|
|
|
97
107
|
def __getstate__(self) -> ZarrsCodecPipelineState:
|
|
98
|
-
return {"
|
|
108
|
+
return {"metadata": self.metadata, "store": self.store}
|
|
99
109
|
|
|
100
110
|
def __setstate__(self, state: ZarrsCodecPipelineState):
|
|
101
|
-
self.
|
|
102
|
-
self.
|
|
103
|
-
|
|
104
|
-
self.
|
|
111
|
+
self.metadata = state["metadata"]
|
|
112
|
+
self.store = state["store"]
|
|
113
|
+
strict = config.get("codec_pipeline.strict", False)
|
|
114
|
+
self.impl = get_codec_pipeline_impl(self.metadata, self.store, strict=strict)
|
|
115
|
+
self.python_impl = get_codec_pipeline_fallback(self.metadata, strict=strict)
|
|
105
116
|
|
|
106
117
|
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
|
|
107
|
-
|
|
118
|
+
return self
|
|
108
119
|
|
|
109
120
|
@classmethod
|
|
110
121
|
def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
122
|
+
return BatchedCodecPipeline.from_codecs(codecs)
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def from_array_metadata_and_store(
|
|
126
|
+
cls, array_metadata: ArrayMetadata, store: Store
|
|
127
|
+
) -> Self:
|
|
128
|
+
strict = config.get("codec_pipeline.strict", False)
|
|
117
129
|
return cls(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
impl=get_codec_pipeline_impl(
|
|
121
|
-
python_impl=
|
|
130
|
+
metadata=array_metadata,
|
|
131
|
+
store=store,
|
|
132
|
+
impl=get_codec_pipeline_impl(array_metadata, store, strict=strict),
|
|
133
|
+
python_impl=get_codec_pipeline_fallback(array_metadata, strict=strict),
|
|
122
134
|
)
|
|
123
135
|
|
|
124
136
|
@property
|
|
@@ -133,7 +145,7 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
133
145
|
yield from self.codecs
|
|
134
146
|
|
|
135
147
|
def validate(
|
|
136
|
-
self, *, shape:
|
|
148
|
+
self, *, shape: tuple[int, ...], dtype: ZDType, chunk_grid: ChunkGrid
|
|
137
149
|
) -> None:
|
|
138
150
|
raise NotImplementedError("validate")
|
|
139
151
|
|
|
@@ -155,7 +167,7 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
155
167
|
async def read(
|
|
156
168
|
self,
|
|
157
169
|
batch_info: Iterable[
|
|
158
|
-
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
170
|
+
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
159
171
|
],
|
|
160
172
|
out: NDBuffer, # type: ignore
|
|
161
173
|
drop_axes: tuple[int, ...] = (), # FIXME: unused
|
|
@@ -177,13 +189,15 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
177
189
|
UnsupportedDataTypeError,
|
|
178
190
|
FillValueNoneError,
|
|
179
191
|
):
|
|
192
|
+
if self.python_impl is None:
|
|
193
|
+
raise
|
|
180
194
|
await self.python_impl.read(batch_info, out, drop_axes)
|
|
181
195
|
return None
|
|
182
196
|
else:
|
|
183
197
|
out: NDArrayLike = out.as_ndarray_like()
|
|
184
198
|
await asyncio.to_thread(
|
|
185
199
|
self.impl.retrieve_chunks_and_apply_index,
|
|
186
|
-
chunks_desc,
|
|
200
|
+
chunks_desc.chunk_info_with_indices,
|
|
187
201
|
out,
|
|
188
202
|
)
|
|
189
203
|
return None
|
|
@@ -191,7 +205,7 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
191
205
|
async def write(
|
|
192
206
|
self,
|
|
193
207
|
batch_info: Iterable[
|
|
194
|
-
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
208
|
+
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
195
209
|
],
|
|
196
210
|
value: NDBuffer, # type: ignore
|
|
197
211
|
drop_axes: tuple[int, ...] = (),
|
|
@@ -210,6 +224,8 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
210
224
|
UnsupportedDataTypeError,
|
|
211
225
|
FillValueNoneError,
|
|
212
226
|
):
|
|
227
|
+
if self.python_impl is None:
|
|
228
|
+
raise
|
|
213
229
|
await self.python_impl.write(batch_info, value, drop_axes)
|
|
214
230
|
return None
|
|
215
231
|
else:
|
|
@@ -222,20 +238,23 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
222
238
|
elif not value_np.flags.c_contiguous:
|
|
223
239
|
value_np = np.ascontiguousarray(value_np)
|
|
224
240
|
await asyncio.to_thread(
|
|
225
|
-
self.impl.store_chunks_with_indices,
|
|
241
|
+
self.impl.store_chunks_with_indices,
|
|
242
|
+
chunks_desc.chunk_info_with_indices,
|
|
243
|
+
value_np,
|
|
244
|
+
chunks_desc.write_empty_chunks,
|
|
226
245
|
)
|
|
227
246
|
return None
|
|
228
247
|
|
|
229
248
|
def _raise_error_on_unsupported_batch_dtype(
|
|
230
249
|
self,
|
|
231
250
|
batch_info: Iterable[
|
|
232
|
-
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
251
|
+
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
233
252
|
],
|
|
234
253
|
):
|
|
235
254
|
# https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
|
|
236
|
-
# Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object is also out
|
|
255
|
+
# Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
|
|
237
256
|
if any(
|
|
238
|
-
info.dtype.kind in {"V", "S", "U", "M", "m", "O"}
|
|
239
|
-
for (_, info, _, _) in batch_info
|
|
257
|
+
info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
|
|
258
|
+
for (_, info, _, _, _) in batch_info
|
|
240
259
|
):
|
|
241
260
|
raise UnsupportedDataTypeError()
|
zarrs/utils.py
CHANGED
|
@@ -2,21 +2,22 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
4
|
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
5
6
|
from functools import reduce
|
|
6
7
|
from typing import TYPE_CHECKING, Any
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
from zarr.core.array_spec import ArraySpec
|
|
10
11
|
from zarr.core.indexing import SelectorTuple, is_integer
|
|
11
|
-
from zarr.core.metadata.v2 import _default_fill_value
|
|
12
12
|
|
|
13
|
-
from zarrs._internal import
|
|
13
|
+
from zarrs._internal import ChunkItem
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from collections.abc import Iterable
|
|
17
17
|
from types import EllipsisType
|
|
18
18
|
|
|
19
19
|
from zarr.abc.store import ByteGetter, ByteSetter
|
|
20
|
+
from zarr.dtype import ZDType
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
# adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
|
|
@@ -139,22 +140,36 @@ def get_shape_for_selector(
|
|
|
139
140
|
return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
|
|
140
141
|
|
|
141
142
|
|
|
142
|
-
def get_implicit_fill_value(dtype:
|
|
143
|
+
def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
|
|
143
144
|
if fill_value is None:
|
|
144
|
-
fill_value =
|
|
145
|
+
fill_value = dtype.default_scalar()
|
|
145
146
|
return fill_value
|
|
146
147
|
|
|
147
148
|
|
|
149
|
+
@dataclass(frozen=True)
|
|
150
|
+
class RustChunkInfo:
|
|
151
|
+
chunk_info_with_indices: list[ChunkItem]
|
|
152
|
+
write_empty_chunks: bool
|
|
153
|
+
|
|
154
|
+
|
|
148
155
|
def make_chunk_info_for_rust_with_indices(
|
|
149
156
|
batch_info: Iterable[
|
|
150
|
-
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
157
|
+
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
151
158
|
],
|
|
152
159
|
drop_axes: tuple[int, ...],
|
|
153
160
|
shape: tuple[int, ...],
|
|
154
|
-
) ->
|
|
161
|
+
) -> RustChunkInfo:
|
|
155
162
|
shape = shape if shape else (1,) # constant array
|
|
156
|
-
chunk_info_with_indices: list[
|
|
157
|
-
|
|
163
|
+
chunk_info_with_indices: list[ChunkItem] = []
|
|
164
|
+
write_empty_chunks: bool = True
|
|
165
|
+
for (
|
|
166
|
+
byte_getter,
|
|
167
|
+
chunk_spec,
|
|
168
|
+
chunk_selection,
|
|
169
|
+
out_selection,
|
|
170
|
+
_,
|
|
171
|
+
) in batch_info:
|
|
172
|
+
write_empty_chunks = chunk_spec.config.write_empty_chunks
|
|
158
173
|
if chunk_spec.fill_value is None:
|
|
159
174
|
chunk_spec = ArraySpec(
|
|
160
175
|
chunk_spec.shape,
|
|
@@ -163,7 +178,6 @@ def make_chunk_info_for_rust_with_indices(
|
|
|
163
178
|
chunk_spec.config,
|
|
164
179
|
chunk_spec.prototype,
|
|
165
180
|
)
|
|
166
|
-
chunk_info = Basic(byte_getter, chunk_spec)
|
|
167
181
|
out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
|
|
168
182
|
chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
|
|
169
183
|
shape_chunk_selection_slices = get_shape_for_selector(
|
|
@@ -180,11 +194,12 @@ def make_chunk_info_for_rust_with_indices(
|
|
|
180
194
|
f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
|
|
181
195
|
)
|
|
182
196
|
chunk_info_with_indices.append(
|
|
183
|
-
|
|
184
|
-
|
|
197
|
+
ChunkItem(
|
|
198
|
+
key=byte_getter.path,
|
|
185
199
|
chunk_subset=chunk_selection_as_slices,
|
|
200
|
+
chunk_shape=chunk_spec.shape,
|
|
186
201
|
subset=out_selection_as_slices,
|
|
187
202
|
shape=shape,
|
|
188
203
|
)
|
|
189
204
|
)
|
|
190
|
-
return chunk_info_with_indices
|
|
205
|
+
return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
|
|
@@ -1,47 +1,16 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: zarrs
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
7
|
Classifier: Typing :: Typed
|
|
8
|
-
Requires-Dist:
|
|
9
|
-
Requires-Dist:
|
|
10
|
-
Requires-Dist: fasteners
|
|
11
|
-
Requires-Dist: numcodecs[msgpack] >=0.10.0
|
|
12
|
-
Requires-Dist: fsspec >2024
|
|
13
|
-
Requires-Dist: crc32c
|
|
14
|
-
Requires-Dist: zstandard
|
|
15
|
-
Requires-Dist: typing-extensions
|
|
16
|
-
Requires-Dist: donfig
|
|
17
|
-
Requires-Dist: pytest
|
|
18
|
-
Requires-Dist: universal-pathlib >=0.2.0
|
|
19
|
-
Requires-Dist: zarr
|
|
20
|
-
Requires-Dist: coverage ; extra == 'test'
|
|
21
|
-
Requires-Dist: pytest ; extra == 'test'
|
|
22
|
-
Requires-Dist: pytest-cov ; extra == 'test'
|
|
23
|
-
Requires-Dist: msgpack ; extra == 'test'
|
|
24
|
-
Requires-Dist: lmdb ; extra == 'test'
|
|
25
|
-
Requires-Dist: s3fs ; extra == 'test'
|
|
26
|
-
Requires-Dist: pytest-asyncio ; extra == 'test'
|
|
27
|
-
Requires-Dist: moto[s3] ; extra == 'test'
|
|
28
|
-
Requires-Dist: flask-cors ; extra == 'test'
|
|
29
|
-
Requires-Dist: flask ; extra == 'test'
|
|
30
|
-
Requires-Dist: requests ; extra == 'test'
|
|
31
|
-
Requires-Dist: mypy ; extra == 'test'
|
|
32
|
-
Requires-Dist: hypothesis ; extra == 'test'
|
|
33
|
-
Requires-Dist: pytest-xdist ; extra == 'test'
|
|
34
|
-
Requires-Dist: maturin ; extra == 'dev'
|
|
35
|
-
Requires-Dist: pip ; extra == 'dev'
|
|
36
|
-
Requires-Dist: pre-commit ; extra == 'dev'
|
|
37
|
-
Requires-Dist: sphinx >=7.4.6 ; extra == 'doc'
|
|
38
|
-
Requires-Dist: myst-parser ; extra == 'doc'
|
|
39
|
-
Provides-Extra: test
|
|
40
|
-
Provides-Extra: dev
|
|
41
|
-
Provides-Extra: doc
|
|
8
|
+
Requires-Dist: numpy>=1.24
|
|
9
|
+
Requires-Dist: zarr>=3.1
|
|
42
10
|
License-File: LICENSE
|
|
11
|
+
Summary: A CodecPipeline for zarr-python backed by the zarrs Rust crate
|
|
43
12
|
Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
|
|
44
|
-
License: MIT
|
|
13
|
+
License-Expression: MIT
|
|
45
14
|
Requires-Python: >=3.11
|
|
46
15
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
47
16
|
|
|
@@ -50,9 +19,9 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
50
19
|
[](https://pypi.org/project/zarrs)
|
|
51
20
|
[](https://pepy.tech/project/zarrs)
|
|
52
21
|
[](https://pepy.tech/project/zarrs)
|
|
53
|
-
[](https://github.com/zarrs/zarrs-python/stargazers)
|
|
23
|
+

|
|
24
|
+

|
|
56
25
|
|
|
57
26
|
This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) (Rust) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) (`zarr-python`) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o (see [`zarr_benchmarks`](https://github.com/LDeakin/zarr_benchmarks)).
|
|
58
27
|
|
|
@@ -60,7 +29,6 @@ To use the project, simply install our package (which depends on `zarr-python>=3
|
|
|
60
29
|
|
|
61
30
|
```python
|
|
62
31
|
import zarr
|
|
63
|
-
import zarrs
|
|
64
32
|
zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
|
|
65
33
|
```
|
|
66
34
|
|
|
@@ -72,12 +40,11 @@ We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class b
|
|
|
72
40
|
|
|
73
41
|
At the moment, we only support a subset of the `zarr-python` stores:
|
|
74
42
|
|
|
75
|
-
- [
|
|
76
|
-
- [
|
|
77
|
-
|
|
43
|
+
- [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem)
|
|
44
|
+
- [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage)
|
|
45
|
+
- [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore)
|
|
78
46
|
|
|
79
47
|
A `NotImplementedError` will be raised if a store is not supported.
|
|
80
|
-
We intend to support more stores in the future: https://github.com/ilan-gold/zarrs-python/issues/44.
|
|
81
48
|
|
|
82
49
|
### Configuration
|
|
83
50
|
|
|
@@ -95,7 +62,11 @@ The `ZarrsCodecPipeline` specific options are:
|
|
|
95
62
|
- `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
|
|
96
63
|
- Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
|
|
97
64
|
- `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
|
|
98
|
-
- Defaults to
|
|
65
|
+
- Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
|
|
66
|
+
- `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
|
|
67
|
+
- Defaults to `False`.
|
|
68
|
+
- `codec_pipeline.strict`: raise exceptions for unsupported operations instead of falling back to the default codec pipeline of `zarr-python`.
|
|
69
|
+
- Defaults to `False`.
|
|
99
70
|
|
|
100
71
|
For example:
|
|
101
72
|
```python
|
|
@@ -105,14 +76,15 @@ zarr.config.set({
|
|
|
105
76
|
"codec_pipeline": {
|
|
106
77
|
"path": "zarrs.ZarrsCodecPipeline",
|
|
107
78
|
"validate_checksums": True,
|
|
108
|
-
"store_empty_chunks": False,
|
|
109
79
|
"chunk_concurrent_maximum": None,
|
|
110
80
|
"chunk_concurrent_minimum": 4,
|
|
81
|
+
"direct_io": False,
|
|
82
|
+
"strict": False
|
|
111
83
|
}
|
|
112
84
|
})
|
|
113
85
|
```
|
|
114
86
|
|
|
115
|
-
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `
|
|
87
|
+
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
|
|
116
88
|
|
|
117
89
|
## Concurrency
|
|
118
90
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
zarrs-0.2.2.dist-info/METADATA,sha256=8K1AOS_SVQgRLzQ2rWtNbJxCAb12XmTLRrT6QklQgOI,8054
|
|
2
|
+
zarrs-0.2.2.dist-info/WHEEL,sha256=cVaoL47Ex1FxzwnkO_WCjy3a1Wl6mtZbBPTvTiNCHdY,103
|
|
3
|
+
zarrs-0.2.2.dist-info/entry_points.txt,sha256=EzI6yCIUPDHBHzjDdexuGGYbOLXf8x2ICokOJXnuX3k,68
|
|
4
|
+
zarrs-0.2.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
|
|
5
|
+
zarrs/__init__.py,sha256=lRVtAPzCzJkGs4vQrW4UgANq-pC-khS0ZF7HTj4__Hg,489
|
|
6
|
+
zarrs/_internal.abi3.so,sha256=LZocl0zbfU1hDhFn6G_V5YpY3SQ8blerNZFcjAQ0iOE,13652224
|
|
7
|
+
zarrs/_internal.pyi,sha256=a_D4yx99r4xeQX1ntY_A_Q4wVmLeLwJZHWAQV_mVu9A,1308
|
|
8
|
+
zarrs/pipeline.py,sha256=YfB13GWNfxELerXVtJ_ipFwSL7bN-YuPys6jCB9lnms,9008
|
|
9
|
+
zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
zarrs/utils.py,sha256=W2XCjJDVrdHYJgtVaRKN533Ljw1MF7o0YwXuz5ZAk2g,7020
|
|
11
|
+
zarrs-0.2.2.dist-info/RECORD,,
|
zarrs-0.1.2.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
zarrs-0.1.2.dist-info/METADATA,sha256=KRSJHcPn_6Db3Fnf_3hm8zFTfu929LazZRSc8aTGp6Y,8842
|
|
2
|
-
zarrs-0.1.2.dist-info/WHEEL,sha256=8JKaGEbIvFI3ESICMOCnA2alRxR2MhR5AYZnD5AAa6k,103
|
|
3
|
-
zarrs-0.1.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
|
|
4
|
-
zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
|
|
5
|
-
zarrs/_internal.pyi,sha256=-2Vy3NqBAS2g-ShALrzknK7Kqxpha17FjO6o35khlyY,1226
|
|
6
|
-
zarrs/utils.py,sha256=-SgsM1P6D8ClRgLdyUEvUUvQ-23i6Ie6SiTQ9aaWjSg,6705
|
|
7
|
-
zarrs/pipeline.py,sha256=-0xLL-Z6p_7nEq-BD8SiOmUy8mp-yjrIsD_lvsL3X68,8610
|
|
8
|
-
zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
zarrs/_internal.abi3.so,sha256=5YxB-4wxOfzOHvp-PSrHDNI-MklBhfx7EYf1m6Hydmo,6197968
|
|
10
|
-
zarrs-0.1.2.dist-info/RECORD,,
|
|
File without changes
|