zarrs 0.1.1__cp311-abi3-macosx_10_12_x86_64.whl → 0.2.2__cp311-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zarrs/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- from zarr.registry import register_pipeline
2
-
3
1
  from ._internal import __version__
4
2
  from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
5
3
  from .utils import CollapsedDimensionError, DiscontiguousArrayError
@@ -10,8 +8,6 @@ class ZarrsCodecPipeline(_ZarrsCodecPipeline):
10
8
  pass
11
9
 
12
10
 
13
- register_pipeline(ZarrsCodecPipeline)
14
-
15
11
  __all__ = [
16
12
  "ZarrsCodecPipeline",
17
13
  "DiscontiguousArrayError",
zarrs/_internal.abi3.so CHANGED
Binary file
zarrs/_internal.pyi CHANGED
@@ -1,57 +1,44 @@
1
1
  # This file is automatically generated by pyo3_stub_gen
2
2
  # ruff: noqa: E501, F401
3
3
 
4
+ import builtins
4
5
  import typing
5
- from enum import Enum, auto
6
6
 
7
- import numpy
8
7
  import numpy.typing
8
+ import zarr.abc.store
9
9
 
10
- class Basic:
11
- def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any): ...
12
- ...
10
+ @typing.final
11
+ class ChunkItem:
12
+ def __new__(
13
+ cls,
14
+ key: builtins.str,
15
+ chunk_subset: typing.Sequence[slice],
16
+ chunk_shape: typing.Sequence[builtins.int],
17
+ subset: typing.Sequence[slice],
18
+ shape: typing.Sequence[builtins.int],
19
+ ) -> ChunkItem: ...
13
20
 
21
+ @typing.final
14
22
  class CodecPipelineImpl:
15
23
  def __new__(
16
24
  cls,
17
- metadata,
25
+ array_metadata: builtins.str,
26
+ store_config: zarr.abc.store.Store,
18
27
  *,
19
- validate_checksums=...,
20
- store_empty_chunks=...,
21
- chunk_concurrent_minimum=...,
22
- chunk_concurrent_maximum=...,
23
- num_threads=...,
24
- ): ...
28
+ validate_checksums: builtins.bool = False,
29
+ chunk_concurrent_minimum: builtins.int | None = None,
30
+ chunk_concurrent_maximum: builtins.int | None = None,
31
+ num_threads: builtins.int | None = None,
32
+ direct_io: builtins.bool = False,
33
+ ) -> CodecPipelineImpl: ...
25
34
  def retrieve_chunks_and_apply_index(
26
35
  self,
27
- chunk_descriptions: typing.Sequence[WithSubset],
36
+ chunk_descriptions: typing.Sequence[ChunkItem],
28
37
  value: numpy.typing.NDArray[typing.Any],
29
38
  ) -> None: ...
30
- def retrieve_chunks(
31
- self, chunk_descriptions: typing.Sequence[Basic]
32
- ) -> list[numpy.typing.NDArray[numpy.uint8]]: ...
33
39
  def store_chunks_with_indices(
34
40
  self,
35
- chunk_descriptions: typing.Sequence[WithSubset],
41
+ chunk_descriptions: typing.Sequence[ChunkItem],
36
42
  value: numpy.typing.NDArray[typing.Any],
43
+ write_empty_chunks: builtins.bool,
37
44
  ) -> None: ...
38
-
39
- class FilesystemStoreConfig:
40
- root: str
41
-
42
- class HttpStoreConfig:
43
- endpoint: str
44
-
45
- class WithSubset:
46
- def __new__(
47
- cls,
48
- item: Basic,
49
- chunk_subset: typing.Sequence[slice],
50
- subset: typing.Sequence[slice],
51
- shape: typing.Sequence[int],
52
- ): ...
53
- ...
54
-
55
- class StoreConfig(Enum):
56
- Filesystem = auto()
57
- Http = auto()
zarrs/pipeline.py CHANGED
@@ -4,44 +4,84 @@ import asyncio
4
4
  import json
5
5
  from dataclasses import dataclass
6
6
  from typing import TYPE_CHECKING, TypedDict
7
+ from warnings import warn
7
8
 
8
9
  import numpy as np
9
10
  from zarr.abc.codec import Codec, CodecPipeline
11
+ from zarr.codecs._v2 import V2Codec
12
+ from zarr.core import BatchedCodecPipeline
10
13
  from zarr.core.config import config
14
+ from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
11
15
 
12
16
  if TYPE_CHECKING:
13
17
  from collections.abc import Iterable, Iterator
14
- from typing import Any, Self
18
+ from typing import Self
15
19
 
16
- from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.abc.store import ByteGetter, ByteSetter, Store
17
21
  from zarr.core.array_spec import ArraySpec
18
22
  from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
19
23
  from zarr.core.chunk_grids import ChunkGrid
20
- from zarr.core.common import ChunkCoords
21
24
  from zarr.core.indexing import SelectorTuple
25
+ from zarr.dtype import ZDType
22
26
 
23
27
  from ._internal import CodecPipelineImpl
24
28
  from .utils import (
25
29
  CollapsedDimensionError,
26
30
  DiscontiguousArrayError,
27
- make_chunk_info_for_rust,
31
+ FillValueNoneError,
28
32
  make_chunk_info_for_rust_with_indices,
29
33
  )
30
34
 
31
35
 
32
- def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl:
33
- return CodecPipelineImpl(
34
- codec_metadata_json,
35
- validate_checksums=config.get("codec_pipeline.validate_checksums", None),
36
- store_empty_chunks=config.get("array.write_empty_chunks", None),
37
- chunk_concurrent_minimum=config.get(
38
- "codec_pipeline.chunk_concurrent_minimum", None
39
- ),
40
- chunk_concurrent_maximum=config.get(
41
- "codec_pipeline.chunk_concurrent_maximum", None
42
- ),
43
- num_threads=config.get("threading.max_workers", None),
44
- )
36
+ class UnsupportedDataTypeError(Exception):
37
+ pass
38
+
39
+
40
+ class UnsupportedMetadataError(Exception):
41
+ pass
42
+
43
+
44
+ def get_codec_pipeline_impl(
45
+ metadata: ArrayMetadata, store: Store, *, strict: bool
46
+ ) -> CodecPipelineImpl | None:
47
+ try:
48
+ array_metadata_json = json.dumps(metadata.to_dict())
49
+ # Maintain old behavior: https://github.com/zarrs/zarrs-python/tree/b36ba797cafec77f5f41a25316be02c718a2b4f8?tab=readme-ov-file#configuration
50
+ validate_checksums = config.get("codec_pipeline.validate_checksums", True)
51
+ if validate_checksums is None:
52
+ validate_checksums = True
53
+ return CodecPipelineImpl(
54
+ array_metadata_json,
55
+ store_config=store,
56
+ validate_checksums=validate_checksums,
57
+ chunk_concurrent_minimum=config.get(
58
+ "codec_pipeline.chunk_concurrent_minimum", None
59
+ ),
60
+ chunk_concurrent_maximum=config.get(
61
+ "codec_pipeline.chunk_concurrent_maximum", None
62
+ ),
63
+ num_threads=config.get("threading.max_workers", None),
64
+ direct_io=config.get("codec_pipeline.direct_io", False),
65
+ )
66
+ except TypeError as e:
67
+ if strict:
68
+ raise UnsupportedMetadataError() from e
69
+
70
+ warn(
71
+ f"Array is unsupported by ZarrsCodecPipeline: {e}",
72
+ category=UserWarning,
73
+ )
74
+ return None
75
+
76
+
77
+ def get_codec_pipeline_fallback(
78
+ metadata: ArrayMetadata, *, strict: bool
79
+ ) -> BatchedCodecPipeline | None:
80
+ if strict:
81
+ return None
82
+ else:
83
+ codecs = array_metadata_to_codecs(metadata)
84
+ return BatchedCodecPipeline.from_codecs(codecs)
45
85
 
46
86
 
47
87
  class ZarrsCodecPipelineState(TypedDict):
@@ -49,35 +89,48 @@ class ZarrsCodecPipelineState(TypedDict):
49
89
  codecs: tuple[Codec, ...]
50
90
 
51
91
 
92
+ def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
93
+ if isinstance(metadata, ArrayV3Metadata):
94
+ return metadata.codecs
95
+ elif isinstance(metadata, ArrayV2Metadata):
96
+ v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
97
+ return [v2_codec]
98
+
99
+
52
100
  @dataclass
53
101
  class ZarrsCodecPipeline(CodecPipeline):
54
- codecs: tuple[Codec, ...]
55
- impl: CodecPipelineImpl
56
- codec_metadata_json: str
102
+ metadata: ArrayMetadata
103
+ store: Store
104
+ impl: CodecPipelineImpl | None
105
+ python_impl: BatchedCodecPipeline | None
57
106
 
58
107
  def __getstate__(self) -> ZarrsCodecPipelineState:
59
- return {"codec_metadata_json": self.codec_metadata_json, "codecs": self.codecs}
108
+ return {"metadata": self.metadata, "store": self.store}
60
109
 
61
110
  def __setstate__(self, state: ZarrsCodecPipelineState):
62
- self.codecs = state["codecs"]
63
- self.codec_metadata_json = state["codec_metadata_json"]
64
- self.impl = get_codec_pipeline_impl(self.codec_metadata_json)
111
+ self.metadata = state["metadata"]
112
+ self.store = state["store"]
113
+ strict = config.get("codec_pipeline.strict", False)
114
+ self.impl = get_codec_pipeline_impl(self.metadata, self.store, strict=strict)
115
+ self.python_impl = get_codec_pipeline_fallback(self.metadata, strict=strict)
65
116
 
66
117
  def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
67
- raise NotImplementedError("evolve_from_array_spec")
118
+ return self
68
119
 
69
120
  @classmethod
70
121
  def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
71
- codec_metadata = [codec.to_dict() for codec in codecs]
72
- codec_metadata_json = json.dumps(codec_metadata)
73
- # TODO: upstream zarr-python has not settled on how to deal with configs yet
74
- # Should they be checked when an array is created, or when an operation is performed?
75
- # https://github.com/zarr-developers/zarr-python/issues/2409
76
- # https://github.com/zarr-developers/zarr-python/pull/2429#issuecomment-2566976567
122
+ return BatchedCodecPipeline.from_codecs(codecs)
123
+
124
+ @classmethod
125
+ def from_array_metadata_and_store(
126
+ cls, array_metadata: ArrayMetadata, store: Store
127
+ ) -> Self:
128
+ strict = config.get("codec_pipeline.strict", False)
77
129
  return cls(
78
- codec_metadata_json=codec_metadata_json,
79
- codecs=tuple(codecs),
80
- impl=get_codec_pipeline_impl(codec_metadata_json),
130
+ metadata=array_metadata,
131
+ store=store,
132
+ impl=get_codec_pipeline_impl(array_metadata, store, strict=strict),
133
+ python_impl=get_codec_pipeline_fallback(array_metadata, strict=strict),
81
134
  )
82
135
 
83
136
  @property
@@ -92,7 +145,7 @@ class ZarrsCodecPipeline(CodecPipeline):
92
145
  yield from self.codecs
93
146
 
94
147
  def validate(
95
- self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
148
+ self, *, shape: tuple[int, ...], dtype: ZDType, chunk_grid: ChunkGrid
96
149
  ) -> None:
97
150
  raise NotImplementedError("validate")
98
151
 
@@ -114,52 +167,94 @@ class ZarrsCodecPipeline(CodecPipeline):
114
167
  async def read(
115
168
  self,
116
169
  batch_info: Iterable[
117
- tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]
170
+ tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
118
171
  ],
119
172
  out: NDBuffer, # type: ignore
120
173
  drop_axes: tuple[int, ...] = (), # FIXME: unused
121
174
  ) -> None:
122
175
  # FIXME: Error if array is not in host memory
123
- out: NDArrayLike = out.as_ndarray_like()
124
176
  if not out.dtype.isnative:
125
177
  raise RuntimeError("Non-native byte order not supported")
126
178
  try:
179
+ if self.impl is None:
180
+ raise UnsupportedMetadataError()
181
+ self._raise_error_on_unsupported_batch_dtype(batch_info)
127
182
  chunks_desc = make_chunk_info_for_rust_with_indices(
128
183
  batch_info, drop_axes, out.shape
129
184
  )
130
- except (DiscontiguousArrayError, CollapsedDimensionError):
131
- chunks_desc = make_chunk_info_for_rust(batch_info)
185
+ except (
186
+ UnsupportedMetadataError,
187
+ DiscontiguousArrayError,
188
+ CollapsedDimensionError,
189
+ UnsupportedDataTypeError,
190
+ FillValueNoneError,
191
+ ):
192
+ if self.python_impl is None:
193
+ raise
194
+ await self.python_impl.read(batch_info, out, drop_axes)
195
+ return None
132
196
  else:
197
+ out: NDArrayLike = out.as_ndarray_like()
133
198
  await asyncio.to_thread(
134
199
  self.impl.retrieve_chunks_and_apply_index,
135
- chunks_desc,
200
+ chunks_desc.chunk_info_with_indices,
136
201
  out,
137
202
  )
138
203
  return None
139
- chunks = await asyncio.to_thread(self.impl.retrieve_chunks, chunks_desc)
140
- for chunk, (_, spec, selection, out_selection) in zip(chunks, batch_info):
141
- chunk_reshaped = chunk.view(spec.dtype).reshape(spec.shape)
142
- chunk_selected = chunk_reshaped[selection]
143
- if drop_axes:
144
- chunk_selected = np.squeeze(chunk_selected, axis=drop_axes)
145
- out[out_selection] = chunk_selected
146
204
 
147
205
  async def write(
148
206
  self,
149
207
  batch_info: Iterable[
150
- tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
208
+ tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
151
209
  ],
152
210
  value: NDBuffer, # type: ignore
153
211
  drop_axes: tuple[int, ...] = (),
154
212
  ) -> None:
155
- # FIXME: Error if array is not in host memory
156
- value: NDArrayLike | np.ndarray = value.as_ndarray_like()
157
- if not value.dtype.isnative:
158
- value = np.ascontiguousarray(value, dtype=value.dtype.newbyteorder("="))
159
- elif not value.flags.c_contiguous:
160
- value = np.ascontiguousarray(value)
161
- chunks_desc = make_chunk_info_for_rust_with_indices(
162
- batch_info, drop_axes, value.shape
163
- )
164
- await asyncio.to_thread(self.impl.store_chunks_with_indices, chunks_desc, value)
165
- return None
213
+ try:
214
+ if self.impl is None:
215
+ raise UnsupportedMetadataError()
216
+ self._raise_error_on_unsupported_batch_dtype(batch_info)
217
+ chunks_desc = make_chunk_info_for_rust_with_indices(
218
+ batch_info, drop_axes, value.shape
219
+ )
220
+ except (
221
+ UnsupportedMetadataError,
222
+ DiscontiguousArrayError,
223
+ CollapsedDimensionError,
224
+ UnsupportedDataTypeError,
225
+ FillValueNoneError,
226
+ ):
227
+ if self.python_impl is None:
228
+ raise
229
+ await self.python_impl.write(batch_info, value, drop_axes)
230
+ return None
231
+ else:
232
+ # FIXME: Error if array is not in host memory
233
+ value_np: NDArrayLike | np.ndarray = value.as_ndarray_like()
234
+ if not value_np.dtype.isnative:
235
+ value_np = np.ascontiguousarray(
236
+ value_np, dtype=value_np.dtype.newbyteorder("=")
237
+ )
238
+ elif not value_np.flags.c_contiguous:
239
+ value_np = np.ascontiguousarray(value_np)
240
+ await asyncio.to_thread(
241
+ self.impl.store_chunks_with_indices,
242
+ chunks_desc.chunk_info_with_indices,
243
+ value_np,
244
+ chunks_desc.write_empty_chunks,
245
+ )
246
+ return None
247
+
248
+ def _raise_error_on_unsupported_batch_dtype(
249
+ self,
250
+ batch_info: Iterable[
251
+ tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
252
+ ],
253
+ ):
254
+ # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
255
+ # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
256
+ if any(
257
+ info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
258
+ for (_, info, _, _, _) in batch_info
259
+ ):
260
+ raise UnsupportedDataTypeError()
zarrs/utils.py CHANGED
@@ -2,20 +2,22 @@ from __future__ import annotations
2
2
 
3
3
  import operator
4
4
  import os
5
+ from dataclasses import dataclass
5
6
  from functools import reduce
6
- from typing import TYPE_CHECKING
7
+ from typing import TYPE_CHECKING, Any
7
8
 
8
9
  import numpy as np
10
+ from zarr.core.array_spec import ArraySpec
9
11
  from zarr.core.indexing import SelectorTuple, is_integer
10
12
 
11
- from zarrs._internal import Basic, WithSubset
13
+ from zarrs._internal import ChunkItem
12
14
 
13
15
  if TYPE_CHECKING:
14
16
  from collections.abc import Iterable
15
17
  from types import EllipsisType
16
18
 
17
19
  from zarr.abc.store import ByteGetter, ByteSetter
18
- from zarr.core.array_spec import ArraySpec
20
+ from zarr.dtype import ZDType
19
21
 
20
22
 
21
23
  # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
@@ -31,6 +33,10 @@ class CollapsedDimensionError(Exception):
31
33
  pass
32
34
 
33
35
 
36
+ class FillValueNoneError(Exception):
37
+ pass
38
+
39
+
34
40
  # This is a (mostly) copy of the function from zarr.core.indexing that fixes:
35
41
  # DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated
36
42
  # TODO: Upstream this fix
@@ -134,17 +140,44 @@ def get_shape_for_selector(
134
140
  return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
135
141
 
136
142
 
143
+ def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
144
+ if fill_value is None:
145
+ fill_value = dtype.default_scalar()
146
+ return fill_value
147
+
148
+
149
+ @dataclass(frozen=True)
150
+ class RustChunkInfo:
151
+ chunk_info_with_indices: list[ChunkItem]
152
+ write_empty_chunks: bool
153
+
154
+
137
155
  def make_chunk_info_for_rust_with_indices(
138
156
  batch_info: Iterable[
139
- tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
157
+ tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
140
158
  ],
141
159
  drop_axes: tuple[int, ...],
142
160
  shape: tuple[int, ...],
143
- ) -> list[WithSubset]:
161
+ ) -> RustChunkInfo:
144
162
  shape = shape if shape else (1,) # constant array
145
- chunk_info_with_indices: list[WithSubset] = []
146
- for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info:
147
- chunk_info = Basic(byte_getter, chunk_spec)
163
+ chunk_info_with_indices: list[ChunkItem] = []
164
+ write_empty_chunks: bool = True
165
+ for (
166
+ byte_getter,
167
+ chunk_spec,
168
+ chunk_selection,
169
+ out_selection,
170
+ _,
171
+ ) in batch_info:
172
+ write_empty_chunks = chunk_spec.config.write_empty_chunks
173
+ if chunk_spec.fill_value is None:
174
+ chunk_spec = ArraySpec(
175
+ chunk_spec.shape,
176
+ chunk_spec.dtype,
177
+ get_implicit_fill_value(chunk_spec.dtype, chunk_spec.fill_value),
178
+ chunk_spec.config,
179
+ chunk_spec.prototype,
180
+ )
148
181
  out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
149
182
  chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
150
183
  shape_chunk_selection_slices = get_shape_for_selector(
@@ -161,22 +194,12 @@ def make_chunk_info_for_rust_with_indices(
161
194
  f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
162
195
  )
163
196
  chunk_info_with_indices.append(
164
- WithSubset(
165
- chunk_info,
197
+ ChunkItem(
198
+ key=byte_getter.path,
166
199
  chunk_subset=chunk_selection_as_slices,
200
+ chunk_shape=chunk_spec.shape,
167
201
  subset=out_selection_as_slices,
168
202
  shape=shape,
169
203
  )
170
204
  )
171
- return chunk_info_with_indices
172
-
173
-
174
- def make_chunk_info_for_rust(
175
- batch_info: Iterable[
176
- tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
177
- ],
178
- ) -> list[Basic]:
179
- return [
180
- Basic(byte_interface, chunk_spec)
181
- for (byte_interface, chunk_spec, _, _) in batch_info
182
- ]
205
+ return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
@@ -1,47 +1,16 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: zarrs
3
- Version: 0.1.1
3
+ Version: 0.2.2
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Classifier: Typing :: Typed
8
- Requires-Dist: asciitree
9
- Requires-Dist: numpy >=1.24
10
- Requires-Dist: fasteners
11
- Requires-Dist: numcodecs[msgpack] >=0.10.0
12
- Requires-Dist: fsspec >2024
13
- Requires-Dist: crc32c
14
- Requires-Dist: zstandard
15
- Requires-Dist: typing-extensions
16
- Requires-Dist: donfig
17
- Requires-Dist: pytest
18
- Requires-Dist: universal-pathlib >=0.2.0
19
- Requires-Dist: zarr >=3.0.0
20
- Requires-Dist: coverage ; extra == 'test'
21
- Requires-Dist: pytest ; extra == 'test'
22
- Requires-Dist: pytest-cov ; extra == 'test'
23
- Requires-Dist: msgpack ; extra == 'test'
24
- Requires-Dist: lmdb ; extra == 'test'
25
- Requires-Dist: s3fs ; extra == 'test'
26
- Requires-Dist: pytest-asyncio ; extra == 'test'
27
- Requires-Dist: moto[s3] ; extra == 'test'
28
- Requires-Dist: flask-cors ; extra == 'test'
29
- Requires-Dist: flask ; extra == 'test'
30
- Requires-Dist: requests ; extra == 'test'
31
- Requires-Dist: mypy ; extra == 'test'
32
- Requires-Dist: hypothesis ; extra == 'test'
33
- Requires-Dist: pytest-xdist ; extra == 'test'
34
- Requires-Dist: maturin ; extra == 'dev'
35
- Requires-Dist: pip ; extra == 'dev'
36
- Requires-Dist: pre-commit ; extra == 'dev'
37
- Requires-Dist: sphinx >=7.4.6 ; extra == 'doc'
38
- Requires-Dist: myst-parser ; extra == 'doc'
39
- Provides-Extra: test
40
- Provides-Extra: dev
41
- Provides-Extra: doc
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: zarr>=3.1
42
10
  License-File: LICENSE
11
+ Summary: A CodecPipeline for zarr-python backed by the zarrs Rust crate
43
12
  Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
44
- License: MIT
13
+ License-Expression: MIT
45
14
  Requires-Python: >=3.11
46
15
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
47
16
 
@@ -50,9 +19,9 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
50
19
  [![PyPI](https://img.shields.io/pypi/v/zarrs.svg)](https://pypi.org/project/zarrs)
51
20
  [![Downloads](https://static.pepy.tech/badge/zarrs/month)](https://pepy.tech/project/zarrs)
52
21
  [![Downloads](https://static.pepy.tech/badge/zarrs)](https://pepy.tech/project/zarrs)
53
- [![Stars](https://img.shields.io/github/stars/ilan-gold/zarrs-python?style=flat&logo=github&color=yellow)](https://github.com/ilan-gold/zarrs-python/stargazers)
54
- ![CI](https://github.com/ilan-gold/zarrs-python/actions/workflows/ci.yml/badge.svg)
55
- ![CD](https://github.com/ilan-gold/zarrs-python/actions/workflows/cd.yml/badge.svg)
22
+ [![Stars](https://img.shields.io/github/stars/zarrs/zarrs-python?style=flat&logo=github&color=yellow)](https://github.com/zarrs/zarrs-python/stargazers)
23
+ ![CI](https://github.com/zarrs/zarrs-python/actions/workflows/ci.yml/badge.svg)
24
+ ![CD](https://github.com/zarrs/zarrs-python/actions/workflows/cd.yml/badge.svg)
56
25
 
57
26
  This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) (Rust) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) (`zarr-python`) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o (see [`zarr_benchmarks`](https://github.com/LDeakin/zarr_benchmarks)).
58
27
 
@@ -60,7 +29,6 @@ To use the project, simply install our package (which depends on `zarr-python>=3
60
29
 
61
30
  ```python
62
31
  import zarr
63
- import zarrs
64
32
  zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
65
33
  ```
66
34
 
@@ -68,16 +36,15 @@ You can then use your `zarr` as normal (with some caveats)!
68
36
 
69
37
  ## API
70
38
 
71
- We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here. We also export two errors, `DiscontiguousArrayError` and `CollapsedDimensionError` that can be thrown in the process of converting to indexers that `zarrs` can understand (see below for more details).
39
+ We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here.
72
40
 
73
41
  At the moment, we only support a subset of the `zarr-python` stores:
74
42
 
75
- - [x] [LocalStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.LocalStore) (FileSystem)
76
- - [FsspecStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.FsspecStore)
77
- - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem)
43
+ - [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem)
44
+ - [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage)
45
+ - [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore)
78
46
 
79
47
  A `NotImplementedError` will be raised if a store is not supported.
80
- We intend to support more stores in the future: https://github.com/ilan-gold/zarrs-python/issues/44.
81
48
 
82
49
  ### Configuration
83
50
 
@@ -95,7 +62,11 @@ The `ZarrsCodecPipeline` specific options are:
95
62
  - `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
96
63
  - Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
97
64
  - `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
98
- - Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
65
+ - Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
66
+ - `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
67
+ - Defaults to `False`.
68
+ - `codec_pipeline.strict`: raise exceptions for unsupported operations instead of falling back to the default codec pipeline of `zarr-python`.
69
+ - Defaults to `False`.
99
70
 
100
71
  For example:
101
72
  ```python
@@ -105,14 +76,15 @@ zarr.config.set({
105
76
  "codec_pipeline": {
106
77
  "path": "zarrs.ZarrsCodecPipeline",
107
78
  "validate_checksums": True,
108
- "store_empty_chunks": False,
109
79
  "chunk_concurrent_maximum": None,
110
80
  "chunk_concurrent_minimum": 4,
81
+ "direct_io": False,
82
+ "strict": False
111
83
  }
112
84
  })
113
85
  ```
114
86
 
115
- If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
87
+ If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
116
88
 
117
89
  ## Concurrency
118
90
 
@@ -133,7 +105,7 @@ Chunk concurrency is typically favored because:
133
105
 
134
106
  ## Supported Indexing Methods
135
107
 
136
- We **do not** officially support the following indexing methods. Some of these methods may error out, others may not:
108
+ The following methods will trigger use with the old zarr-python pipeline:
137
109
 
138
110
  1. Any `oindex` or `vindex` integer `np.ndarray` indexing with dimensionality >=3 i.e.,
139
111
 
@@ -163,7 +135,10 @@ We **do not** officially support the following indexing methods. Some of these
163
135
  arr[0:10, ..., 0:5]
164
136
  ```
165
137
 
166
- Otherwise, we believe that we support your indexing case: slices, ints, and all integer `np.ndarray` indices in 2D for reading, contiguous integer `np.ndarray` indices along one axis for writing etc. Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot of these issues!
167
138
 
168
- That being said, using non-contiguous integer `np.ndarray` indexing for reads may not be as fast as expected given the performance of other supported methods. Until `zarrs` supports integer indexing, only fetching chunks is done in `rust` while indexing then occurs in `python`.
139
+ Furthermore, using anything except contiguous (i.e., slices or consecutive integer) `np.ndarray` for numeric data will fall back to the default `zarr-python` implementation.
140
+
141
+ Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot the use of the rust pipeline for that use-case (very useful for mini-batch training perhaps!).
142
+
143
+ Further, any codecs not supported by `zarrs` will also automatically fall back to the python implementation.
169
144
 
@@ -0,0 +1,11 @@
1
+ zarrs-0.2.2.dist-info/METADATA,sha256=8K1AOS_SVQgRLzQ2rWtNbJxCAb12XmTLRrT6QklQgOI,8054
2
+ zarrs-0.2.2.dist-info/WHEEL,sha256=N8W3-0eDM6igWj-H12r7VkxoMaJIqJLxUyWCFstEaGg,105
3
+ zarrs-0.2.2.dist-info/entry_points.txt,sha256=EzI6yCIUPDHBHzjDdexuGGYbOLXf8x2ICokOJXnuX3k,68
4
+ zarrs-0.2.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
5
+ zarrs/__init__.py,sha256=lRVtAPzCzJkGs4vQrW4UgANq-pC-khS0ZF7HTj4__Hg,489
6
+ zarrs/_internal.abi3.so,sha256=tnP5IiuDmhfwB15cX4yTyu1mVZgShtJy1lnS87TzK1o,14928348
7
+ zarrs/_internal.pyi,sha256=a_D4yx99r4xeQX1ntY_A_Q4wVmLeLwJZHWAQV_mVu9A,1308
8
+ zarrs/pipeline.py,sha256=YfB13GWNfxELerXVtJ_ipFwSL7bN-YuPys6jCB9lnms,9008
9
+ zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ zarrs/utils.py,sha256=W2XCjJDVrdHYJgtVaRKN533Ljw1MF7o0YwXuz5ZAk2g,7020
11
+ zarrs-0.2.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.7.4)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-abi3-macosx_10_12_x86_64
@@ -0,0 +1,2 @@
1
+ [zarr.codec_pipeline]
2
+ zarrs.codec_pipeline=zarrs:ZarrsCodecPipeline
@@ -1,10 +0,0 @@
1
- zarrs-0.1.1.dist-info/METADATA,sha256=jwXKTiA6MIVeg8i_zrb3I4H7GTsqG5lA-H8GicZUDTE,9215
2
- zarrs-0.1.1.dist-info/WHEEL,sha256=LZygbeT1PTQw7a9tONPp78bbG4FZc86U59Z0RFJcoR8,105
3
- zarrs-0.1.1.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
4
- zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
5
- zarrs/_internal.pyi,sha256=fWWKwTSf8bHFlK6XusyHlDzahTlcjcfth6c1nJyrYdE,1375
6
- zarrs/utils.py,sha256=PNZWAjU46s2_ZjWN4cxr2JsZHl88H2NAiaPzFf7SPUE,6403
7
- zarrs/pipeline.py,sha256=M2r7QIauWIyQrNkd722osMZOeQ1LqYP1aX8SypplaAY,6021
8
- zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- zarrs/_internal.abi3.so,sha256=fUxAxUdypeC3THpaKKO1nppCP0traxV7bFsmGB63oLc,6804296
10
- zarrs-0.1.1.dist-info/RECORD,,