zarrs 0.1.0__cp311-abi3-macosx_10_12_x86_64.whl → 0.2.2__cp311-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zarrs/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- from zarr.registry import register_pipeline
2
-
3
1
  from ._internal import __version__
4
2
  from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
5
3
  from .utils import CollapsedDimensionError, DiscontiguousArrayError
@@ -10,8 +8,6 @@ class ZarrsCodecPipeline(_ZarrsCodecPipeline):
10
8
  pass
11
9
 
12
10
 
13
- register_pipeline(ZarrsCodecPipeline)
14
-
15
11
  __all__ = [
16
12
  "ZarrsCodecPipeline",
17
13
  "DiscontiguousArrayError",
zarrs/_internal.abi3.so CHANGED
Binary file
zarrs/_internal.pyi CHANGED
@@ -1,47 +1,44 @@
1
1
  # This file is automatically generated by pyo3_stub_gen
2
2
  # ruff: noqa: E501, F401
3
3
 
4
+ import builtins
4
5
  import typing
5
6
 
6
- import numpy
7
7
  import numpy.typing
8
+ import zarr.abc.store
8
9
 
10
+ @typing.final
11
+ class ChunkItem:
12
+ def __new__(
13
+ cls,
14
+ key: builtins.str,
15
+ chunk_subset: typing.Sequence[slice],
16
+ chunk_shape: typing.Sequence[builtins.int],
17
+ subset: typing.Sequence[slice],
18
+ shape: typing.Sequence[builtins.int],
19
+ ) -> ChunkItem: ...
20
+
21
+ @typing.final
9
22
  class CodecPipelineImpl:
10
23
  def __new__(
11
24
  cls,
12
- metadata,
25
+ array_metadata: builtins.str,
26
+ store_config: zarr.abc.store.Store,
13
27
  *,
14
- validate_checksums=...,
15
- store_empty_chunks=...,
16
- chunk_concurrent_minimum=...,
17
- chunk_concurrent_maximum=...,
18
- num_threads=...,
19
- ): ...
28
+ validate_checksums: builtins.bool = False,
29
+ chunk_concurrent_minimum: builtins.int | None = None,
30
+ chunk_concurrent_maximum: builtins.int | None = None,
31
+ num_threads: builtins.int | None = None,
32
+ direct_io: builtins.bool = False,
33
+ ) -> CodecPipelineImpl: ...
20
34
  def retrieve_chunks_and_apply_index(
21
35
  self,
22
- chunk_descriptions: typing.Sequence[
23
- tuple[
24
- tuple[str, typing.Sequence[int], str, typing.Sequence[int]],
25
- typing.Sequence[slice],
26
- typing.Sequence[slice],
27
- ]
28
- ],
29
- value: numpy.NDArray[typing.Any],
36
+ chunk_descriptions: typing.Sequence[ChunkItem],
37
+ value: numpy.typing.NDArray[typing.Any],
30
38
  ) -> None: ...
31
- def retrieve_chunks(
32
- self,
33
- chunk_descriptions: typing.Sequence[
34
- tuple[str, typing.Sequence[int], str, typing.Sequence[int]]
35
- ],
36
- ) -> list[numpy.typing.NDArray[numpy.uint8]]: ...
37
39
  def store_chunks_with_indices(
38
40
  self,
39
- chunk_descriptions: typing.Sequence[
40
- tuple[
41
- tuple[str, typing.Sequence[int], str, typing.Sequence[int]],
42
- typing.Sequence[slice],
43
- typing.Sequence[slice],
44
- ]
45
- ],
46
- value: numpy.NDArray[typing.Any],
41
+ chunk_descriptions: typing.Sequence[ChunkItem],
42
+ value: numpy.typing.NDArray[typing.Any],
43
+ write_empty_chunks: builtins.bool,
47
44
  ) -> None: ...
zarrs/pipeline.py CHANGED
@@ -3,68 +3,134 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  import json
5
5
  from dataclasses import dataclass
6
- from typing import TYPE_CHECKING, Any
6
+ from typing import TYPE_CHECKING, TypedDict
7
+ from warnings import warn
7
8
 
8
9
  import numpy as np
9
- from zarr.abc.codec import (
10
- Codec,
11
- CodecPipeline,
12
- )
10
+ from zarr.abc.codec import Codec, CodecPipeline
11
+ from zarr.codecs._v2 import V2Codec
12
+ from zarr.core import BatchedCodecPipeline
13
13
  from zarr.core.config import config
14
+ from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
14
15
 
15
16
  if TYPE_CHECKING:
16
17
  from collections.abc import Iterable, Iterator
17
18
  from typing import Self
18
19
 
19
- from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.abc.store import ByteGetter, ByteSetter, Store
20
21
  from zarr.core.array_spec import ArraySpec
21
- from zarr.core.buffer import Buffer, NDBuffer
22
+ from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
22
23
  from zarr.core.chunk_grids import ChunkGrid
23
- from zarr.core.common import ChunkCoords
24
24
  from zarr.core.indexing import SelectorTuple
25
+ from zarr.dtype import ZDType
25
26
 
26
27
  from ._internal import CodecPipelineImpl
27
28
  from .utils import (
28
29
  CollapsedDimensionError,
29
30
  DiscontiguousArrayError,
30
- make_chunk_info_for_rust,
31
+ FillValueNoneError,
31
32
  make_chunk_info_for_rust_with_indices,
32
33
  )
33
34
 
34
35
 
35
- @dataclass(frozen=True)
36
- class ZarrsCodecPipeline(CodecPipeline):
36
+ class UnsupportedDataTypeError(Exception):
37
+ pass
38
+
39
+
40
+ class UnsupportedMetadataError(Exception):
41
+ pass
42
+
43
+
44
+ def get_codec_pipeline_impl(
45
+ metadata: ArrayMetadata, store: Store, *, strict: bool
46
+ ) -> CodecPipelineImpl | None:
47
+ try:
48
+ array_metadata_json = json.dumps(metadata.to_dict())
49
+ # Maintain old behavior: https://github.com/zarrs/zarrs-python/tree/b36ba797cafec77f5f41a25316be02c718a2b4f8?tab=readme-ov-file#configuration
50
+ validate_checksums = config.get("codec_pipeline.validate_checksums", True)
51
+ if validate_checksums is None:
52
+ validate_checksums = True
53
+ return CodecPipelineImpl(
54
+ array_metadata_json,
55
+ store_config=store,
56
+ validate_checksums=validate_checksums,
57
+ chunk_concurrent_minimum=config.get(
58
+ "codec_pipeline.chunk_concurrent_minimum", None
59
+ ),
60
+ chunk_concurrent_maximum=config.get(
61
+ "codec_pipeline.chunk_concurrent_maximum", None
62
+ ),
63
+ num_threads=config.get("threading.max_workers", None),
64
+ direct_io=config.get("codec_pipeline.direct_io", False),
65
+ )
66
+ except TypeError as e:
67
+ if strict:
68
+ raise UnsupportedMetadataError() from e
69
+
70
+ warn(
71
+ f"Array is unsupported by ZarrsCodecPipeline: {e}",
72
+ category=UserWarning,
73
+ )
74
+ return None
75
+
76
+
77
+ def get_codec_pipeline_fallback(
78
+ metadata: ArrayMetadata, *, strict: bool
79
+ ) -> BatchedCodecPipeline | None:
80
+ if strict:
81
+ return None
82
+ else:
83
+ codecs = array_metadata_to_codecs(metadata)
84
+ return BatchedCodecPipeline.from_codecs(codecs)
85
+
86
+
87
+ class ZarrsCodecPipelineState(TypedDict):
88
+ codec_metadata_json: str
37
89
  codecs: tuple[Codec, ...]
38
- impl: CodecPipelineImpl
90
+
91
+
92
+ def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
93
+ if isinstance(metadata, ArrayV3Metadata):
94
+ return metadata.codecs
95
+ elif isinstance(metadata, ArrayV2Metadata):
96
+ v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
97
+ return [v2_codec]
98
+
99
+
100
+ @dataclass
101
+ class ZarrsCodecPipeline(CodecPipeline):
102
+ metadata: ArrayMetadata
103
+ store: Store
104
+ impl: CodecPipelineImpl | None
105
+ python_impl: BatchedCodecPipeline | None
106
+
107
+ def __getstate__(self) -> ZarrsCodecPipelineState:
108
+ return {"metadata": self.metadata, "store": self.store}
109
+
110
+ def __setstate__(self, state: ZarrsCodecPipelineState):
111
+ self.metadata = state["metadata"]
112
+ self.store = state["store"]
113
+ strict = config.get("codec_pipeline.strict", False)
114
+ self.impl = get_codec_pipeline_impl(self.metadata, self.store, strict=strict)
115
+ self.python_impl = get_codec_pipeline_fallback(self.metadata, strict=strict)
39
116
 
40
117
  def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
41
- raise NotImplementedError("evolve_from_array_spec")
118
+ return self
42
119
 
43
120
  @classmethod
44
121
  def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
45
- codec_metadata = [codec.to_dict() for codec in codecs]
46
- codec_metadata_json = json.dumps(codec_metadata)
47
- # TODO: upstream zarr-python has not settled on how to deal with configs yet
48
- # Should they be checked when an array is created, or when an operation is performed?
49
- # https://github.com/zarr-developers/zarr-python/issues/2409
50
- # https://github.com/zarr-developers/zarr-python/pull/2429
122
+ return BatchedCodecPipeline.from_codecs(codecs)
123
+
124
+ @classmethod
125
+ def from_array_metadata_and_store(
126
+ cls, array_metadata: ArrayMetadata, store: Store
127
+ ) -> Self:
128
+ strict = config.get("codec_pipeline.strict", False)
51
129
  return cls(
52
- codecs=tuple(codecs),
53
- impl=CodecPipelineImpl(
54
- codec_metadata_json,
55
- validate_checksums=config.get(
56
- "codec_pipeline.validate_checksums", None
57
- ),
58
- # TODO: upstream zarr-python array.write_empty_chunks is not merged yet #2429
59
- store_empty_chunks=config.get("array.write_empty_chunks", None),
60
- chunk_concurrent_minimum=config.get(
61
- "codec_pipeline.chunk_concurrent_minimum", None
62
- ),
63
- chunk_concurrent_maximum=config.get(
64
- "codec_pipeline.chunk_concurrent_maximum", None
65
- ),
66
- num_threads=config.get("threading.max_workers", None),
67
- ),
130
+ metadata=array_metadata,
131
+ store=store,
132
+ impl=get_codec_pipeline_impl(array_metadata, store, strict=strict),
133
+ python_impl=get_codec_pipeline_fallback(array_metadata, strict=strict),
68
134
  )
69
135
 
70
136
  @property
@@ -79,7 +145,7 @@ class ZarrsCodecPipeline(CodecPipeline):
79
145
  yield from self.codecs
80
146
 
81
147
  def validate(
82
- self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
148
+ self, *, shape: tuple[int, ...], dtype: ZDType, chunk_grid: ChunkGrid
83
149
  ) -> None:
84
150
  raise NotImplementedError("validate")
85
151
 
@@ -101,55 +167,94 @@ class ZarrsCodecPipeline(CodecPipeline):
101
167
  async def read(
102
168
  self,
103
169
  batch_info: Iterable[
104
- tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]
170
+ tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
105
171
  ],
106
- out: NDBuffer,
172
+ out: NDBuffer, # type: ignore
107
173
  drop_axes: tuple[int, ...] = (), # FIXME: unused
108
174
  ) -> None:
109
- out = out.as_ndarray_like() # FIXME: Error if array is not in host memory
175
+ # FIXME: Error if array is not in host memory
110
176
  if not out.dtype.isnative:
111
177
  raise RuntimeError("Non-native byte order not supported")
112
178
  try:
113
- chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
114
- index_in_rust = True
115
- except (DiscontiguousArrayError, CollapsedDimensionError):
116
- chunks_desc = make_chunk_info_for_rust(batch_info)
117
- index_in_rust = False
118
- if index_in_rust:
179
+ if self.impl is None:
180
+ raise UnsupportedMetadataError()
181
+ self._raise_error_on_unsupported_batch_dtype(batch_info)
182
+ chunks_desc = make_chunk_info_for_rust_with_indices(
183
+ batch_info, drop_axes, out.shape
184
+ )
185
+ except (
186
+ UnsupportedMetadataError,
187
+ DiscontiguousArrayError,
188
+ CollapsedDimensionError,
189
+ UnsupportedDataTypeError,
190
+ FillValueNoneError,
191
+ ):
192
+ if self.python_impl is None:
193
+ raise
194
+ await self.python_impl.read(batch_info, out, drop_axes)
195
+ return None
196
+ else:
197
+ out: NDArrayLike = out.as_ndarray_like()
119
198
  await asyncio.to_thread(
120
199
  self.impl.retrieve_chunks_and_apply_index,
121
- chunks_desc,
200
+ chunks_desc.chunk_info_with_indices,
122
201
  out,
123
202
  )
124
203
  return None
125
- chunks = await asyncio.to_thread(self.impl.retrieve_chunks, chunks_desc)
126
- for chunk, chunk_info in zip(chunks, batch_info):
127
- out_selection = chunk_info[3]
128
- selection = chunk_info[2]
129
- spec = chunk_info[1]
130
- chunk_reshaped = chunk.view(spec.dtype).reshape(spec.shape)
131
- chunk_selected = chunk_reshaped[selection]
132
- if drop_axes:
133
- chunk_selected = np.squeeze(chunk_selected, axis=drop_axes)
134
- out[out_selection] = chunk_selected
135
204
 
136
205
  async def write(
137
206
  self,
138
207
  batch_info: Iterable[
139
- tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
208
+ tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
140
209
  ],
141
- value: NDBuffer,
210
+ value: NDBuffer, # type: ignore
142
211
  drop_axes: tuple[int, ...] = (),
143
212
  ) -> None:
144
- value = value.as_ndarray_like() # FIXME: Error if array is not in host memory
145
- if not value.dtype.isnative:
146
- value = np.ascontiguousarray(value, dtype=value.dtype.newbyteorder("="))
147
- elif not value.flags.c_contiguous:
148
- value = np.ascontiguousarray(value)
149
- chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
150
- await asyncio.to_thread(
151
- self.impl.store_chunks_with_indices,
152
- chunks_desc,
153
- value,
154
- )
155
- return None
213
+ try:
214
+ if self.impl is None:
215
+ raise UnsupportedMetadataError()
216
+ self._raise_error_on_unsupported_batch_dtype(batch_info)
217
+ chunks_desc = make_chunk_info_for_rust_with_indices(
218
+ batch_info, drop_axes, value.shape
219
+ )
220
+ except (
221
+ UnsupportedMetadataError,
222
+ DiscontiguousArrayError,
223
+ CollapsedDimensionError,
224
+ UnsupportedDataTypeError,
225
+ FillValueNoneError,
226
+ ):
227
+ if self.python_impl is None:
228
+ raise
229
+ await self.python_impl.write(batch_info, value, drop_axes)
230
+ return None
231
+ else:
232
+ # FIXME: Error if array is not in host memory
233
+ value_np: NDArrayLike | np.ndarray = value.as_ndarray_like()
234
+ if not value_np.dtype.isnative:
235
+ value_np = np.ascontiguousarray(
236
+ value_np, dtype=value_np.dtype.newbyteorder("=")
237
+ )
238
+ elif not value_np.flags.c_contiguous:
239
+ value_np = np.ascontiguousarray(value_np)
240
+ await asyncio.to_thread(
241
+ self.impl.store_chunks_with_indices,
242
+ chunks_desc.chunk_info_with_indices,
243
+ value_np,
244
+ chunks_desc.write_empty_chunks,
245
+ )
246
+ return None
247
+
248
+ def _raise_error_on_unsupported_batch_dtype(
249
+ self,
250
+ batch_info: Iterable[
251
+ tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
252
+ ],
253
+ ):
254
+ # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
255
+ # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
256
+ if any(
257
+ info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
258
+ for (_, info, _, _, _) in batch_info
259
+ ):
260
+ raise UnsupportedDataTypeError()
zarrs/utils.py CHANGED
@@ -2,19 +2,22 @@ from __future__ import annotations
2
2
 
3
3
  import operator
4
4
  import os
5
+ from dataclasses import dataclass
5
6
  from functools import reduce
6
7
  from typing import TYPE_CHECKING, Any
7
8
 
8
9
  import numpy as np
10
+ from zarr.core.array_spec import ArraySpec
9
11
  from zarr.core.indexing import SelectorTuple, is_integer
10
12
 
13
+ from zarrs._internal import ChunkItem
14
+
11
15
  if TYPE_CHECKING:
12
16
  from collections.abc import Iterable
13
17
  from types import EllipsisType
14
18
 
15
19
  from zarr.abc.store import ByteGetter, ByteSetter
16
- from zarr.core.array_spec import ArraySpec
17
- from zarr.core.common import ChunkCoords
20
+ from zarr.dtype import ZDType
18
21
 
19
22
 
20
23
  # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
@@ -30,6 +33,10 @@ class CollapsedDimensionError(Exception):
30
33
  pass
31
34
 
32
35
 
36
+ class FillValueNoneError(Exception):
37
+ pass
38
+
39
+
33
40
  # This is a (mostly) copy of the function from zarr.core.indexing that fixes:
34
41
  # DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated
35
42
  # TODO: Upstream this fix
@@ -62,17 +69,6 @@ def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[sli
62
69
  return make_slice_selection(selector_tuple)
63
70
 
64
71
 
65
- def convert_chunk_to_primitive(
66
- byte_getter: ByteGetter | ByteSetter, chunk_spec: ArraySpec
67
- ) -> tuple[str, ChunkCoords, str, Any]:
68
- return (
69
- str(byte_getter),
70
- chunk_spec.shape,
71
- str(chunk_spec.dtype),
72
- chunk_spec.fill_value.tobytes(),
73
- )
74
-
75
-
76
72
  def resulting_shape_from_index(
77
73
  array_shape: tuple[int, ...],
78
74
  index_tuple: tuple[int | slice | EllipsisType | np.ndarray],
@@ -144,15 +140,44 @@ def get_shape_for_selector(
144
140
  return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
145
141
 
146
142
 
143
+ def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
144
+ if fill_value is None:
145
+ fill_value = dtype.default_scalar()
146
+ return fill_value
147
+
148
+
149
+ @dataclass(frozen=True)
150
+ class RustChunkInfo:
151
+ chunk_info_with_indices: list[ChunkItem]
152
+ write_empty_chunks: bool
153
+
154
+
147
155
  def make_chunk_info_for_rust_with_indices(
148
156
  batch_info: Iterable[
149
- tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
157
+ tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
150
158
  ],
151
159
  drop_axes: tuple[int, ...],
152
- ) -> list[tuple[tuple[str, ChunkCoords, str, Any], list[slice], list[slice]]]:
153
- chunk_info_with_indices = []
154
- for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info:
155
- chunk_info = convert_chunk_to_primitive(byte_getter, chunk_spec)
160
+ shape: tuple[int, ...],
161
+ ) -> RustChunkInfo:
162
+ shape = shape if shape else (1,) # constant array
163
+ chunk_info_with_indices: list[ChunkItem] = []
164
+ write_empty_chunks: bool = True
165
+ for (
166
+ byte_getter,
167
+ chunk_spec,
168
+ chunk_selection,
169
+ out_selection,
170
+ _,
171
+ ) in batch_info:
172
+ write_empty_chunks = chunk_spec.config.write_empty_chunks
173
+ if chunk_spec.fill_value is None:
174
+ chunk_spec = ArraySpec(
175
+ chunk_spec.shape,
176
+ chunk_spec.dtype,
177
+ get_implicit_fill_value(chunk_spec.dtype, chunk_spec.fill_value),
178
+ chunk_spec.config,
179
+ chunk_spec.prototype,
180
+ )
156
181
  out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
157
182
  chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
158
183
  shape_chunk_selection_slices = get_shape_for_selector(
@@ -169,17 +194,12 @@ def make_chunk_info_for_rust_with_indices(
169
194
  f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
170
195
  )
171
196
  chunk_info_with_indices.append(
172
- (chunk_info, out_selection_as_slices, chunk_selection_as_slices)
197
+ ChunkItem(
198
+ key=byte_getter.path,
199
+ chunk_subset=chunk_selection_as_slices,
200
+ chunk_shape=chunk_spec.shape,
201
+ subset=out_selection_as_slices,
202
+ shape=shape,
203
+ )
173
204
  )
174
- return chunk_info_with_indices
175
-
176
-
177
- def make_chunk_info_for_rust(
178
- batch_info: Iterable[
179
- tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
180
- ],
181
- ) -> list[tuple[str, ChunkCoords, str, Any]]:
182
- return list(
183
- convert_chunk_to_primitive(byte_getter, chunk_spec)
184
- for (byte_getter, chunk_spec, _, _) in batch_info
185
- )
205
+ return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
@@ -1,66 +1,34 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: zarrs
3
- Version: 0.1.0
3
+ Version: 0.2.2
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Classifier: Typing :: Typed
8
- Requires-Dist: asciitree
9
- Requires-Dist: numpy >=1.24
10
- Requires-Dist: fasteners
11
- Requires-Dist: numcodecs[msgpack] >=0.10.0
12
- Requires-Dist: fsspec >2024
13
- Requires-Dist: crc32c
14
- Requires-Dist: zstandard
15
- Requires-Dist: typing-extensions
16
- Requires-Dist: donfig
17
- Requires-Dist: pytest
18
- Requires-Dist: universal-pathlib >=0.2.0
19
- Requires-Dist: zarr >=3.0.0b2
20
- Requires-Dist: coverage ; extra == 'test'
21
- Requires-Dist: pytest ; extra == 'test'
22
- Requires-Dist: pytest-cov ; extra == 'test'
23
- Requires-Dist: msgpack ; extra == 'test'
24
- Requires-Dist: lmdb ; extra == 'test'
25
- Requires-Dist: s3fs ; extra == 'test'
26
- Requires-Dist: pytest-asyncio ; extra == 'test'
27
- Requires-Dist: moto[s3] ; extra == 'test'
28
- Requires-Dist: flask-cors ; extra == 'test'
29
- Requires-Dist: flask ; extra == 'test'
30
- Requires-Dist: requests ; extra == 'test'
31
- Requires-Dist: mypy ; extra == 'test'
32
- Requires-Dist: hypothesis ; extra == 'test'
33
- Requires-Dist: pytest-xdist ; extra == 'test'
34
- Requires-Dist: maturin ; extra == 'dev'
35
- Requires-Dist: pip ; extra == 'dev'
36
- Requires-Dist: pre-commit ; extra == 'dev'
37
- Requires-Dist: sphinx >=7.4.6 ; extra == 'doc'
38
- Requires-Dist: myst-parser ; extra == 'doc'
39
- Provides-Extra: test
40
- Provides-Extra: dev
41
- Provides-Extra: doc
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: zarr>=3.1
42
10
  License-File: LICENSE
11
+ Summary: A CodecPipeline for zarr-python backed by the zarrs Rust crate
43
12
  Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
44
- License: MIT
13
+ License-Expression: MIT
45
14
  Requires-Python: >=3.11
46
15
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
47
16
 
48
17
  # zarrs-python
49
18
 
50
- ```{warning}
51
- ⚠️ The version of `zarr-python` we currently depend on is still in pre-release and this
52
- package is accordingly extremely experimental.
53
- We cannot guarantee any stability or correctness at the moment, although we have
54
- tried to do extensive testing and make clear what we think we support and do not.
55
- ```
19
+ [![PyPI](https://img.shields.io/pypi/v/zarrs.svg)](https://pypi.org/project/zarrs)
20
+ [![Downloads](https://static.pepy.tech/badge/zarrs/month)](https://pepy.tech/project/zarrs)
21
+ [![Downloads](https://static.pepy.tech/badge/zarrs)](https://pepy.tech/project/zarrs)
22
+ [![Stars](https://img.shields.io/github/stars/zarrs/zarrs-python?style=flat&logo=github&color=yellow)](https://github.com/zarrs/zarrs-python/stargazers)
23
+ ![CI](https://github.com/zarrs/zarrs-python/actions/workflows/ci.yml/badge.svg)
24
+ ![CD](https://github.com/zarrs/zarrs-python/actions/workflows/cd.yml/badge.svg)
56
25
 
57
- This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o.
26
+ This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) (Rust) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) (`zarr-python`) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o (see [`zarr_benchmarks`](https://github.com/LDeakin/zarr_benchmarks)).
58
27
 
59
- To use the project, simply install our package (which depends on `zarr-python>3.0.0b0`), and run:
28
+ To use the project, simply install our package (which depends on `zarr-python>=3.0.0`), and run:
60
29
 
61
30
  ```python
62
31
  import zarr
63
- import zarrs
64
32
  zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
65
33
  ```
66
34
 
@@ -68,7 +36,15 @@ You can then use your `zarr` as normal (with some caveats)!
68
36
 
69
37
  ## API
70
38
 
71
- We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here. We also export two errors, `DiscontiguousArrayError` and `CollapsedDimensionError` that can be thrown in the process of converting to indexers that `zarrs` can understand (see below for more details).
39
+ We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here.
40
+
41
+ At the moment, we only support a subset of the `zarr-python` stores:
42
+
43
+ - [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem)
44
+ - [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage)
45
+ - [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore)
46
+
47
+ A `NotImplementedError` will be raised if a store is not supported.
72
48
 
73
49
  ### Configuration
74
50
 
@@ -79,7 +55,6 @@ Standard `zarr.config` options control some functionality (see the defaults in t
79
55
  - Defaults to the number of threads in the global `rayon` thread pool if set to `None`, which is [typically the number of logical CPUs](https://docs.rs/rayon/latest/rayon/struct.ThreadPoolBuilder.html#method.num_threads).
80
56
  - `array.write_empty_chunks`: whether or not to store empty chunks.
81
57
  - Defaults to false if `None`. Note that checking for emptiness has some overhead, see [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#store-empty-chunks) for more info.
82
- - This option name is proposed in [zarr-python #2429](https://github.com/zarr-developers/zarr-python/pull/2429)
83
58
 
84
59
  The `ZarrsCodecPipeline` specific options are:
85
60
  - `codec_pipeline.chunk_concurrent_maximum`: the maximum number of chunks stored/retrieved concurrently.
@@ -87,7 +62,11 @@ The `ZarrsCodecPipeline` specific options are:
87
62
  - `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
88
63
  - Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
89
64
  - `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
90
- - Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
65
+ - Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
66
+ - `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
67
+ - Defaults to `False`.
68
+ - `codec_pipeline.strict`: raise exceptions for unsupported operations instead of falling back to the default codec pipeline of `zarr-python`.
69
+ - Defaults to `False`.
91
70
 
92
71
  For example:
93
72
  ```python
@@ -97,13 +76,16 @@ zarr.config.set({
97
76
  "codec_pipeline": {
98
77
  "path": "zarrs.ZarrsCodecPipeline",
99
78
  "validate_checksums": True,
100
- "store_empty_chunks": False,
101
79
  "chunk_concurrent_maximum": None,
102
80
  "chunk_concurrent_minimum": 4,
81
+ "direct_io": False,
82
+ "strict": False
103
83
  }
104
84
  })
105
85
  ```
106
86
 
87
+ If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
88
+
107
89
  ## Concurrency
108
90
 
109
91
  Concurrency can be classified into two types:
@@ -123,7 +105,7 @@ Chunk concurrency is typically favored because:
123
105
 
124
106
  ## Supported Indexing Methods
125
107
 
126
- We **do not** officially support the following indexing methods. Some of these methods may error out, others may not:
108
+ The following methods will trigger use with the old zarr-python pipeline:
127
109
 
128
110
  1. Any `oindex` or `vindex` integer `np.ndarray` indexing with dimensionality >=3 i.e.,
129
111
 
@@ -153,7 +135,10 @@ We **do not** officially support the following indexing methods. Some of these
153
135
  arr[0:10, ..., 0:5]
154
136
  ```
155
137
 
156
- Otherwise, we believe that we support your indexing case: slices, ints, and all integer `np.ndarray` indices in 2D for reading, contiguous integer `np.ndarray` indices along one axis for writing etc. Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot of these issues!
157
138
 
158
- That being said, using non-contiguous integer `np.ndarray` indexing for reads may not be as fast as expected given the performance of other supported methods. Until `zarrs` supports integer indexing, only fetching chunks is done in `rust` while indexing then occurs in `python`.
139
+ Furthermore, using anything except contiguous (i.e., slices or consecutive integer) `np.ndarray` for numeric data will fall back to the default `zarr-python` implementation.
140
+
141
+ Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot the use of the rust pipeline for that use-case (very useful for mini-batch training perhaps!).
142
+
143
+ Further, any codecs not supported by `zarrs` will also automatically fall back to the python implementation.
159
144
 
@@ -0,0 +1,11 @@
1
+ zarrs-0.2.2.dist-info/METADATA,sha256=8K1AOS_SVQgRLzQ2rWtNbJxCAb12XmTLRrT6QklQgOI,8054
2
+ zarrs-0.2.2.dist-info/WHEEL,sha256=N8W3-0eDM6igWj-H12r7VkxoMaJIqJLxUyWCFstEaGg,105
3
+ zarrs-0.2.2.dist-info/entry_points.txt,sha256=EzI6yCIUPDHBHzjDdexuGGYbOLXf8x2ICokOJXnuX3k,68
4
+ zarrs-0.2.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
5
+ zarrs/__init__.py,sha256=lRVtAPzCzJkGs4vQrW4UgANq-pC-khS0ZF7HTj4__Hg,489
6
+ zarrs/_internal.abi3.so,sha256=tnP5IiuDmhfwB15cX4yTyu1mVZgShtJy1lnS87TzK1o,14928348
7
+ zarrs/_internal.pyi,sha256=a_D4yx99r4xeQX1ntY_A_Q4wVmLeLwJZHWAQV_mVu9A,1308
8
+ zarrs/pipeline.py,sha256=YfB13GWNfxELerXVtJ_ipFwSL7bN-YuPys6jCB9lnms,9008
9
+ zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ zarrs/utils.py,sha256=W2XCjJDVrdHYJgtVaRKN533Ljw1MF7o0YwXuz5ZAk2g,7020
11
+ zarrs-0.2.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.7.4)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-abi3-macosx_10_12_x86_64
@@ -0,0 +1,2 @@
1
+ [zarr.codec_pipeline]
2
+ zarrs.codec_pipeline=zarrs:ZarrsCodecPipeline
@@ -1,10 +0,0 @@
1
- zarrs-0.1.0.dist-info/METADATA,sha256=Qf4O1LEJrZrrnWtO4vqOn4gF3cf882aX0kIf3TMhUMY,7923
2
- zarrs-0.1.0.dist-info/WHEEL,sha256=LZygbeT1PTQw7a9tONPp78bbG4FZc86U59Z0RFJcoR8,105
3
- zarrs-0.1.0.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
4
- zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
5
- zarrs/_internal.pyi,sha256=revBHMbEur_WKTDRtyJqah0e-D6CPy58sIIZLpicRgA,1330
6
- zarrs/utils.py,sha256=qV-__rjVNs7bhvnyY4U2eOtEFESGm-XQUv6t9ECvjcc,6588
7
- zarrs/pipeline.py,sha256=pcZ56LVQ131e5iq_HpEZWOyo-HzDi6GUOSulDHPWyLQ,5576
8
- zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- zarrs/_internal.abi3.so,sha256=uc2IGucmaDNNx0t5K5lETXYmckCIJedoPcPLJhHCfOs,3193056
10
- zarrs-0.1.0.dist-info/RECORD,,