zarrs 0.1.2__cp311-abi3-macosx_11_0_arm64.whl → 0.2.2__cp311-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zarrs/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- from zarr.registry import register_pipeline
2
-
3
1
  from ._internal import __version__
4
2
  from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
5
3
  from .utils import CollapsedDimensionError, DiscontiguousArrayError
@@ -10,8 +8,6 @@ class ZarrsCodecPipeline(_ZarrsCodecPipeline):
10
8
  pass
11
9
 
12
10
 
13
- register_pipeline(ZarrsCodecPipeline)
14
-
15
11
  __all__ = [
16
12
  "ZarrsCodecPipeline",
17
13
  "DiscontiguousArrayError",
zarrs/_internal.abi3.so CHANGED
Binary file
zarrs/_internal.pyi CHANGED
@@ -1,53 +1,44 @@
1
1
  # This file is automatically generated by pyo3_stub_gen
2
2
  # ruff: noqa: E501, F401
3
3
 
4
+ import builtins
4
5
  import typing
5
- from enum import Enum, auto
6
6
 
7
7
  import numpy.typing
8
+ import zarr.abc.store
8
9
 
9
- class Basic:
10
- def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any): ...
11
- ...
10
+ @typing.final
11
+ class ChunkItem:
12
+ def __new__(
13
+ cls,
14
+ key: builtins.str,
15
+ chunk_subset: typing.Sequence[slice],
16
+ chunk_shape: typing.Sequence[builtins.int],
17
+ subset: typing.Sequence[slice],
18
+ shape: typing.Sequence[builtins.int],
19
+ ) -> ChunkItem: ...
12
20
 
21
+ @typing.final
13
22
  class CodecPipelineImpl:
14
23
  def __new__(
15
24
  cls,
16
- metadata,
25
+ array_metadata: builtins.str,
26
+ store_config: zarr.abc.store.Store,
17
27
  *,
18
- validate_checksums=...,
19
- store_empty_chunks=...,
20
- chunk_concurrent_minimum=...,
21
- chunk_concurrent_maximum=...,
22
- num_threads=...,
23
- ): ...
28
+ validate_checksums: builtins.bool = False,
29
+ chunk_concurrent_minimum: builtins.int | None = None,
30
+ chunk_concurrent_maximum: builtins.int | None = None,
31
+ num_threads: builtins.int | None = None,
32
+ direct_io: builtins.bool = False,
33
+ ) -> CodecPipelineImpl: ...
24
34
  def retrieve_chunks_and_apply_index(
25
35
  self,
26
- chunk_descriptions: typing.Sequence[WithSubset],
36
+ chunk_descriptions: typing.Sequence[ChunkItem],
27
37
  value: numpy.typing.NDArray[typing.Any],
28
38
  ) -> None: ...
29
39
  def store_chunks_with_indices(
30
40
  self,
31
- chunk_descriptions: typing.Sequence[WithSubset],
41
+ chunk_descriptions: typing.Sequence[ChunkItem],
32
42
  value: numpy.typing.NDArray[typing.Any],
43
+ write_empty_chunks: builtins.bool,
33
44
  ) -> None: ...
34
-
35
- class FilesystemStoreConfig:
36
- root: str
37
-
38
- class HttpStoreConfig:
39
- endpoint: str
40
-
41
- class WithSubset:
42
- def __new__(
43
- cls,
44
- item: Basic,
45
- chunk_subset: typing.Sequence[slice],
46
- subset: typing.Sequence[slice],
47
- shape: typing.Sequence[int],
48
- ): ...
49
- ...
50
-
51
- class StoreConfig(Enum):
52
- Filesystem = auto()
53
- Http = auto()
zarrs/pipeline.py CHANGED
@@ -2,27 +2,29 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import json
5
- import re
6
5
  from dataclasses import dataclass
7
6
  from typing import TYPE_CHECKING, TypedDict
7
+ from warnings import warn
8
8
 
9
9
  import numpy as np
10
10
  from zarr.abc.codec import Codec, CodecPipeline
11
+ from zarr.codecs._v2 import V2Codec
11
12
  from zarr.core import BatchedCodecPipeline
12
13
  from zarr.core.config import config
14
+ from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
13
15
 
14
16
  if TYPE_CHECKING:
15
- from collections.abc import Generator, Iterable, Iterator
16
- from typing import Any, Self
17
+ from collections.abc import Iterable, Iterator
18
+ from typing import Self
17
19
 
18
- from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.abc.store import ByteGetter, ByteSetter, Store
19
21
  from zarr.core.array_spec import ArraySpec
20
22
  from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
21
23
  from zarr.core.chunk_grids import ChunkGrid
22
- from zarr.core.common import ChunkCoords
23
24
  from zarr.core.indexing import SelectorTuple
25
+ from zarr.dtype import ZDType
24
26
 
25
- from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
27
+ from ._internal import CodecPipelineImpl
26
28
  from .utils import (
27
29
  CollapsedDimensionError,
28
30
  DiscontiguousArrayError,
@@ -39,12 +41,19 @@ class UnsupportedMetadataError(Exception):
39
41
  pass
40
42
 
41
43
 
42
- def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | None:
44
+ def get_codec_pipeline_impl(
45
+ metadata: ArrayMetadata, store: Store, *, strict: bool
46
+ ) -> CodecPipelineImpl | None:
43
47
  try:
48
+ array_metadata_json = json.dumps(metadata.to_dict())
49
+ # Maintain old behavior: https://github.com/zarrs/zarrs-python/tree/b36ba797cafec77f5f41a25316be02c718a2b4f8?tab=readme-ov-file#configuration
50
+ validate_checksums = config.get("codec_pipeline.validate_checksums", True)
51
+ if validate_checksums is None:
52
+ validate_checksums = True
44
53
  return CodecPipelineImpl(
45
- codec_metadata_json,
46
- validate_checksums=config.get("codec_pipeline.validate_checksums", None),
47
- store_empty_chunks=config.get("array.write_empty_chunks", None),
54
+ array_metadata_json,
55
+ store_config=store,
56
+ validate_checksums=validate_checksums,
48
57
  chunk_concurrent_minimum=config.get(
49
58
  "codec_pipeline.chunk_concurrent_minimum", None
50
59
  ),
@@ -52,34 +61,27 @@ def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | Non
52
61
  "codec_pipeline.chunk_concurrent_maximum", None
53
62
  ),
54
63
  num_threads=config.get("threading.max_workers", None),
64
+ direct_io=config.get("codec_pipeline.direct_io", False),
55
65
  )
56
66
  except TypeError as e:
57
- if re.match(r"codec (delta|zlib) is not supported", str(e)):
58
- return None
59
- else:
60
- raise e
61
-
62
-
63
- def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]:
64
- for codec in codecs:
65
- if codec.__class__.__name__ == "V2Codec":
66
- codec_dict = codec.to_dict()
67
- if codec_dict.get("filters", None) is not None:
68
- filters = [
69
- json.dumps(filter.get_config())
70
- for filter in codec_dict.get("filters")
71
- ]
72
- else:
73
- filters = None
74
- if codec_dict.get("compressor", None) is not None:
75
- compressor = json.dumps(codec_dict.get("compressor").get_config())
76
- else:
77
- compressor = None
78
- codecs_v3 = codec_metadata_v2_to_v3(filters, compressor)
79
- for codec in codecs_v3:
80
- yield json.loads(codec)
81
- else:
82
- yield codec.to_dict()
67
+ if strict:
68
+ raise UnsupportedMetadataError() from e
69
+
70
+ warn(
71
+ f"Array is unsupported by ZarrsCodecPipeline: {e}",
72
+ category=UserWarning,
73
+ )
74
+ return None
75
+
76
+
77
+ def get_codec_pipeline_fallback(
78
+ metadata: ArrayMetadata, *, strict: bool
79
+ ) -> BatchedCodecPipeline | None:
80
+ if strict:
81
+ return None
82
+ else:
83
+ codecs = array_metadata_to_codecs(metadata)
84
+ return BatchedCodecPipeline.from_codecs(codecs)
83
85
 
84
86
 
85
87
  class ZarrsCodecPipelineState(TypedDict):
@@ -87,38 +89,48 @@ class ZarrsCodecPipelineState(TypedDict):
87
89
  codecs: tuple[Codec, ...]
88
90
 
89
91
 
92
+ def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
93
+ if isinstance(metadata, ArrayV3Metadata):
94
+ return metadata.codecs
95
+ elif isinstance(metadata, ArrayV2Metadata):
96
+ v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
97
+ return [v2_codec]
98
+
99
+
90
100
  @dataclass
91
101
  class ZarrsCodecPipeline(CodecPipeline):
92
- codecs: tuple[Codec, ...]
102
+ metadata: ArrayMetadata
103
+ store: Store
93
104
  impl: CodecPipelineImpl | None
94
- codec_metadata_json: str
95
- python_impl: BatchedCodecPipeline
105
+ python_impl: BatchedCodecPipeline | None
96
106
 
97
107
  def __getstate__(self) -> ZarrsCodecPipelineState:
98
- return {"codec_metadata_json": self.codec_metadata_json, "codecs": self.codecs}
108
+ return {"metadata": self.metadata, "store": self.store}
99
109
 
100
110
  def __setstate__(self, state: ZarrsCodecPipelineState):
101
- self.codecs = state["codecs"]
102
- self.codec_metadata_json = state["codec_metadata_json"]
103
- self.impl = get_codec_pipeline_impl(self.codec_metadata_json)
104
- self.python_impl = BatchedCodecPipeline.from_codecs(self.codecs)
111
+ self.metadata = state["metadata"]
112
+ self.store = state["store"]
113
+ strict = config.get("codec_pipeline.strict", False)
114
+ self.impl = get_codec_pipeline_impl(self.metadata, self.store, strict=strict)
115
+ self.python_impl = get_codec_pipeline_fallback(self.metadata, strict=strict)
105
116
 
106
117
  def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
107
- raise NotImplementedError("evolve_from_array_spec")
118
+ return self
108
119
 
109
120
  @classmethod
110
121
  def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
111
- codec_metadata = list(codecs_to_dict(codecs))
112
- codec_metadata_json = json.dumps(codec_metadata)
113
- # TODO: upstream zarr-python has not settled on how to deal with configs yet
114
- # Should they be checked when an array is created, or when an operation is performed?
115
- # https://github.com/zarr-developers/zarr-python/issues/2409
116
- # https://github.com/zarr-developers/zarr-python/pull/2429#issuecomment-2566976567
122
+ return BatchedCodecPipeline.from_codecs(codecs)
123
+
124
+ @classmethod
125
+ def from_array_metadata_and_store(
126
+ cls, array_metadata: ArrayMetadata, store: Store
127
+ ) -> Self:
128
+ strict = config.get("codec_pipeline.strict", False)
117
129
  return cls(
118
- codec_metadata_json=codec_metadata_json,
119
- codecs=tuple(codecs),
120
- impl=get_codec_pipeline_impl(codec_metadata_json),
121
- python_impl=BatchedCodecPipeline.from_codecs(codecs),
130
+ metadata=array_metadata,
131
+ store=store,
132
+ impl=get_codec_pipeline_impl(array_metadata, store, strict=strict),
133
+ python_impl=get_codec_pipeline_fallback(array_metadata, strict=strict),
122
134
  )
123
135
 
124
136
  @property
@@ -133,7 +145,7 @@ class ZarrsCodecPipeline(CodecPipeline):
133
145
  yield from self.codecs
134
146
 
135
147
  def validate(
136
- self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
148
+ self, *, shape: tuple[int, ...], dtype: ZDType, chunk_grid: ChunkGrid
137
149
  ) -> None:
138
150
  raise NotImplementedError("validate")
139
151
 
@@ -155,7 +167,7 @@ class ZarrsCodecPipeline(CodecPipeline):
155
167
  async def read(
156
168
  self,
157
169
  batch_info: Iterable[
158
- tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]
170
+ tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
159
171
  ],
160
172
  out: NDBuffer, # type: ignore
161
173
  drop_axes: tuple[int, ...] = (), # FIXME: unused
@@ -177,13 +189,15 @@ class ZarrsCodecPipeline(CodecPipeline):
177
189
  UnsupportedDataTypeError,
178
190
  FillValueNoneError,
179
191
  ):
192
+ if self.python_impl is None:
193
+ raise
180
194
  await self.python_impl.read(batch_info, out, drop_axes)
181
195
  return None
182
196
  else:
183
197
  out: NDArrayLike = out.as_ndarray_like()
184
198
  await asyncio.to_thread(
185
199
  self.impl.retrieve_chunks_and_apply_index,
186
- chunks_desc,
200
+ chunks_desc.chunk_info_with_indices,
187
201
  out,
188
202
  )
189
203
  return None
@@ -191,7 +205,7 @@ class ZarrsCodecPipeline(CodecPipeline):
191
205
  async def write(
192
206
  self,
193
207
  batch_info: Iterable[
194
- tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
208
+ tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
195
209
  ],
196
210
  value: NDBuffer, # type: ignore
197
211
  drop_axes: tuple[int, ...] = (),
@@ -210,6 +224,8 @@ class ZarrsCodecPipeline(CodecPipeline):
210
224
  UnsupportedDataTypeError,
211
225
  FillValueNoneError,
212
226
  ):
227
+ if self.python_impl is None:
228
+ raise
213
229
  await self.python_impl.write(batch_info, value, drop_axes)
214
230
  return None
215
231
  else:
@@ -222,20 +238,23 @@ class ZarrsCodecPipeline(CodecPipeline):
222
238
  elif not value_np.flags.c_contiguous:
223
239
  value_np = np.ascontiguousarray(value_np)
224
240
  await asyncio.to_thread(
225
- self.impl.store_chunks_with_indices, chunks_desc, value_np
241
+ self.impl.store_chunks_with_indices,
242
+ chunks_desc.chunk_info_with_indices,
243
+ value_np,
244
+ chunks_desc.write_empty_chunks,
226
245
  )
227
246
  return None
228
247
 
229
248
  def _raise_error_on_unsupported_batch_dtype(
230
249
  self,
231
250
  batch_info: Iterable[
232
- tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
251
+ tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
233
252
  ],
234
253
  ):
235
254
  # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
236
- # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object is also out
255
+ # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
237
256
  if any(
238
- info.dtype.kind in {"V", "S", "U", "M", "m", "O"}
239
- for (_, info, _, _) in batch_info
257
+ info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
258
+ for (_, info, _, _, _) in batch_info
240
259
  ):
241
260
  raise UnsupportedDataTypeError()
zarrs/utils.py CHANGED
@@ -2,21 +2,22 @@ from __future__ import annotations
2
2
 
3
3
  import operator
4
4
  import os
5
+ from dataclasses import dataclass
5
6
  from functools import reduce
6
7
  from typing import TYPE_CHECKING, Any
7
8
 
8
9
  import numpy as np
9
10
  from zarr.core.array_spec import ArraySpec
10
11
  from zarr.core.indexing import SelectorTuple, is_integer
11
- from zarr.core.metadata.v2 import _default_fill_value
12
12
 
13
- from zarrs._internal import Basic, WithSubset
13
+ from zarrs._internal import ChunkItem
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from collections.abc import Iterable
17
17
  from types import EllipsisType
18
18
 
19
19
  from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.dtype import ZDType
20
21
 
21
22
 
22
23
  # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
@@ -139,22 +140,36 @@ def get_shape_for_selector(
139
140
  return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
140
141
 
141
142
 
142
- def get_implicit_fill_value(dtype: np.dtype, fill_value: Any) -> Any:
143
+ def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
143
144
  if fill_value is None:
144
- fill_value = _default_fill_value(dtype)
145
+ fill_value = dtype.default_scalar()
145
146
  return fill_value
146
147
 
147
148
 
149
+ @dataclass(frozen=True)
150
+ class RustChunkInfo:
151
+ chunk_info_with_indices: list[ChunkItem]
152
+ write_empty_chunks: bool
153
+
154
+
148
155
  def make_chunk_info_for_rust_with_indices(
149
156
  batch_info: Iterable[
150
- tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
157
+ tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
151
158
  ],
152
159
  drop_axes: tuple[int, ...],
153
160
  shape: tuple[int, ...],
154
- ) -> list[WithSubset]:
161
+ ) -> RustChunkInfo:
155
162
  shape = shape if shape else (1,) # constant array
156
- chunk_info_with_indices: list[WithSubset] = []
157
- for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info:
163
+ chunk_info_with_indices: list[ChunkItem] = []
164
+ write_empty_chunks: bool = True
165
+ for (
166
+ byte_getter,
167
+ chunk_spec,
168
+ chunk_selection,
169
+ out_selection,
170
+ _,
171
+ ) in batch_info:
172
+ write_empty_chunks = chunk_spec.config.write_empty_chunks
158
173
  if chunk_spec.fill_value is None:
159
174
  chunk_spec = ArraySpec(
160
175
  chunk_spec.shape,
@@ -163,7 +178,6 @@ def make_chunk_info_for_rust_with_indices(
163
178
  chunk_spec.config,
164
179
  chunk_spec.prototype,
165
180
  )
166
- chunk_info = Basic(byte_getter, chunk_spec)
167
181
  out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
168
182
  chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
169
183
  shape_chunk_selection_slices = get_shape_for_selector(
@@ -180,11 +194,12 @@ def make_chunk_info_for_rust_with_indices(
180
194
  f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
181
195
  )
182
196
  chunk_info_with_indices.append(
183
- WithSubset(
184
- chunk_info,
197
+ ChunkItem(
198
+ key=byte_getter.path,
185
199
  chunk_subset=chunk_selection_as_slices,
200
+ chunk_shape=chunk_spec.shape,
186
201
  subset=out_selection_as_slices,
187
202
  shape=shape,
188
203
  )
189
204
  )
190
- return chunk_info_with_indices
205
+ return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
@@ -1,47 +1,16 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: zarrs
3
- Version: 0.1.2
3
+ Version: 0.2.2
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Classifier: Typing :: Typed
8
- Requires-Dist: asciitree
9
- Requires-Dist: numpy >=1.24
10
- Requires-Dist: fasteners
11
- Requires-Dist: numcodecs[msgpack] >=0.10.0
12
- Requires-Dist: fsspec >2024
13
- Requires-Dist: crc32c
14
- Requires-Dist: zstandard
15
- Requires-Dist: typing-extensions
16
- Requires-Dist: donfig
17
- Requires-Dist: pytest
18
- Requires-Dist: universal-pathlib >=0.2.0
19
- Requires-Dist: zarr
20
- Requires-Dist: coverage ; extra == 'test'
21
- Requires-Dist: pytest ; extra == 'test'
22
- Requires-Dist: pytest-cov ; extra == 'test'
23
- Requires-Dist: msgpack ; extra == 'test'
24
- Requires-Dist: lmdb ; extra == 'test'
25
- Requires-Dist: s3fs ; extra == 'test'
26
- Requires-Dist: pytest-asyncio ; extra == 'test'
27
- Requires-Dist: moto[s3] ; extra == 'test'
28
- Requires-Dist: flask-cors ; extra == 'test'
29
- Requires-Dist: flask ; extra == 'test'
30
- Requires-Dist: requests ; extra == 'test'
31
- Requires-Dist: mypy ; extra == 'test'
32
- Requires-Dist: hypothesis ; extra == 'test'
33
- Requires-Dist: pytest-xdist ; extra == 'test'
34
- Requires-Dist: maturin ; extra == 'dev'
35
- Requires-Dist: pip ; extra == 'dev'
36
- Requires-Dist: pre-commit ; extra == 'dev'
37
- Requires-Dist: sphinx >=7.4.6 ; extra == 'doc'
38
- Requires-Dist: myst-parser ; extra == 'doc'
39
- Provides-Extra: test
40
- Provides-Extra: dev
41
- Provides-Extra: doc
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: zarr>=3.1
42
10
  License-File: LICENSE
11
+ Summary: A CodecPipeline for zarr-python backed by the zarrs Rust crate
43
12
  Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
44
- License: MIT
13
+ License-Expression: MIT
45
14
  Requires-Python: >=3.11
46
15
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
47
16
 
@@ -50,9 +19,9 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
50
19
  [![PyPI](https://img.shields.io/pypi/v/zarrs.svg)](https://pypi.org/project/zarrs)
51
20
  [![Downloads](https://static.pepy.tech/badge/zarrs/month)](https://pepy.tech/project/zarrs)
52
21
  [![Downloads](https://static.pepy.tech/badge/zarrs)](https://pepy.tech/project/zarrs)
53
- [![Stars](https://img.shields.io/github/stars/ilan-gold/zarrs-python?style=flat&logo=github&color=yellow)](https://github.com/ilan-gold/zarrs-python/stargazers)
54
- ![CI](https://github.com/ilan-gold/zarrs-python/actions/workflows/ci.yml/badge.svg)
55
- ![CD](https://github.com/ilan-gold/zarrs-python/actions/workflows/cd.yml/badge.svg)
22
+ [![Stars](https://img.shields.io/github/stars/zarrs/zarrs-python?style=flat&logo=github&color=yellow)](https://github.com/zarrs/zarrs-python/stargazers)
23
+ ![CI](https://github.com/zarrs/zarrs-python/actions/workflows/ci.yml/badge.svg)
24
+ ![CD](https://github.com/zarrs/zarrs-python/actions/workflows/cd.yml/badge.svg)
56
25
 
57
26
  This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) (Rust) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) (`zarr-python`) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o (see [`zarr_benchmarks`](https://github.com/LDeakin/zarr_benchmarks)).
58
27
 
@@ -60,7 +29,6 @@ To use the project, simply install our package (which depends on `zarr-python>=3
60
29
 
61
30
  ```python
62
31
  import zarr
63
- import zarrs
64
32
  zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
65
33
  ```
66
34
 
@@ -72,12 +40,11 @@ We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class b
72
40
 
73
41
  At the moment, we only support a subset of the `zarr-python` stores:
74
42
 
75
- - [x] [LocalStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.LocalStore) (FileSystem)
76
- - [FsspecStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.FsspecStore)
77
- - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem)
43
+ - [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem)
44
+ - [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage)
45
+ - [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore)
78
46
 
79
47
  A `NotImplementedError` will be raised if a store is not supported.
80
- We intend to support more stores in the future: https://github.com/ilan-gold/zarrs-python/issues/44.
81
48
 
82
49
  ### Configuration
83
50
 
@@ -95,7 +62,11 @@ The `ZarrsCodecPipeline` specific options are:
95
62
  - `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
96
63
  - Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
97
64
  - `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
98
- - Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
65
+ - Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
66
+ - `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
67
+ - Defaults to `False`.
68
+ - `codec_pipeline.strict`: raise exceptions for unsupported operations instead of falling back to the default codec pipeline of `zarr-python`.
69
+ - Defaults to `False`.
99
70
 
100
71
  For example:
101
72
  ```python
@@ -105,14 +76,15 @@ zarr.config.set({
105
76
  "codec_pipeline": {
106
77
  "path": "zarrs.ZarrsCodecPipeline",
107
78
  "validate_checksums": True,
108
- "store_empty_chunks": False,
109
79
  "chunk_concurrent_maximum": None,
110
80
  "chunk_concurrent_minimum": 4,
81
+ "direct_io": False,
82
+ "strict": False
111
83
  }
112
84
  })
113
85
  ```
114
86
 
115
- If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
87
+ If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
116
88
 
117
89
  ## Concurrency
118
90
 
@@ -0,0 +1,11 @@
1
+ zarrs-0.2.2.dist-info/METADATA,sha256=8K1AOS_SVQgRLzQ2rWtNbJxCAb12XmTLRrT6QklQgOI,8054
2
+ zarrs-0.2.2.dist-info/WHEEL,sha256=cVaoL47Ex1FxzwnkO_WCjy3a1Wl6mtZbBPTvTiNCHdY,103
3
+ zarrs-0.2.2.dist-info/entry_points.txt,sha256=EzI6yCIUPDHBHzjDdexuGGYbOLXf8x2ICokOJXnuX3k,68
4
+ zarrs-0.2.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
5
+ zarrs/__init__.py,sha256=lRVtAPzCzJkGs4vQrW4UgANq-pC-khS0ZF7HTj4__Hg,489
6
+ zarrs/_internal.abi3.so,sha256=LZocl0zbfU1hDhFn6G_V5YpY3SQ8blerNZFcjAQ0iOE,13652224
7
+ zarrs/_internal.pyi,sha256=a_D4yx99r4xeQX1ntY_A_Q4wVmLeLwJZHWAQV_mVu9A,1308
8
+ zarrs/pipeline.py,sha256=YfB13GWNfxELerXVtJ_ipFwSL7bN-YuPys6jCB9lnms,9008
9
+ zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ zarrs/utils.py,sha256=W2XCjJDVrdHYJgtVaRKN533Ljw1MF7o0YwXuz5ZAk2g,7020
11
+ zarrs-0.2.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.7.4)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-abi3-macosx_11_0_arm64
@@ -0,0 +1,2 @@
1
+ [zarr.codec_pipeline]
2
+ zarrs.codec_pipeline=zarrs:ZarrsCodecPipeline
@@ -1,10 +0,0 @@
1
- zarrs-0.1.2.dist-info/METADATA,sha256=KRSJHcPn_6Db3Fnf_3hm8zFTfu929LazZRSc8aTGp6Y,8842
2
- zarrs-0.1.2.dist-info/WHEEL,sha256=8JKaGEbIvFI3ESICMOCnA2alRxR2MhR5AYZnD5AAa6k,103
3
- zarrs-0.1.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
4
- zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
5
- zarrs/_internal.pyi,sha256=-2Vy3NqBAS2g-ShALrzknK7Kqxpha17FjO6o35khlyY,1226
6
- zarrs/utils.py,sha256=-SgsM1P6D8ClRgLdyUEvUUvQ-23i6Ie6SiTQ9aaWjSg,6705
7
- zarrs/pipeline.py,sha256=-0xLL-Z6p_7nEq-BD8SiOmUy8mp-yjrIsD_lvsL3X68,8610
8
- zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- zarrs/_internal.abi3.so,sha256=5YxB-4wxOfzOHvp-PSrHDNI-MklBhfx7EYf1m6Hydmo,6197968
10
- zarrs-0.1.2.dist-info/RECORD,,