zarrs 0.2.0__cp311-abi3-manylinux_2_28_aarch64.whl → 0.2.2__cp311-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zarrs/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- from zarr.registry import register_pipeline
2
-
3
1
  from ._internal import __version__
4
2
  from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
5
3
  from .utils import CollapsedDimensionError, DiscontiguousArrayError
@@ -10,8 +8,6 @@ class ZarrsCodecPipeline(_ZarrsCodecPipeline):
10
8
  pass
11
9
 
12
10
 
13
- register_pipeline(ZarrsCodecPipeline)
14
-
15
11
  __all__ = [
16
12
  "ZarrsCodecPipeline",
17
13
  "DiscontiguousArrayError",
zarrs/_internal.abi3.so CHANGED
Binary file
zarrs/_internal.pyi CHANGED
@@ -3,51 +3,42 @@
3
3
 
4
4
  import builtins
5
5
  import typing
6
- from enum import Enum
7
6
 
8
7
  import numpy.typing
8
+ import zarr.abc.store
9
9
 
10
- class Basic:
11
- def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any) -> Basic: ...
10
+ @typing.final
11
+ class ChunkItem:
12
+ def __new__(
13
+ cls,
14
+ key: builtins.str,
15
+ chunk_subset: typing.Sequence[slice],
16
+ chunk_shape: typing.Sequence[builtins.int],
17
+ subset: typing.Sequence[slice],
18
+ shape: typing.Sequence[builtins.int],
19
+ ) -> ChunkItem: ...
12
20
 
21
+ @typing.final
13
22
  class CodecPipelineImpl:
14
23
  def __new__(
15
24
  cls,
16
25
  array_metadata: builtins.str,
17
- store_config: StoreConfig,
26
+ store_config: zarr.abc.store.Store,
18
27
  *,
19
- validate_checksums: builtins.bool | None = None,
28
+ validate_checksums: builtins.bool = False,
20
29
  chunk_concurrent_minimum: builtins.int | None = None,
21
30
  chunk_concurrent_maximum: builtins.int | None = None,
22
31
  num_threads: builtins.int | None = None,
32
+ direct_io: builtins.bool = False,
23
33
  ) -> CodecPipelineImpl: ...
24
34
  def retrieve_chunks_and_apply_index(
25
35
  self,
26
- chunk_descriptions: typing.Sequence[WithSubset],
36
+ chunk_descriptions: typing.Sequence[ChunkItem],
27
37
  value: numpy.typing.NDArray[typing.Any],
28
38
  ) -> None: ...
29
39
  def store_chunks_with_indices(
30
40
  self,
31
- chunk_descriptions: typing.Sequence[WithSubset],
41
+ chunk_descriptions: typing.Sequence[ChunkItem],
32
42
  value: numpy.typing.NDArray[typing.Any],
33
43
  write_empty_chunks: builtins.bool,
34
44
  ) -> None: ...
35
-
36
- class FilesystemStoreConfig:
37
- root: builtins.str
38
-
39
- class HttpStoreConfig:
40
- endpoint: builtins.str
41
-
42
- class WithSubset:
43
- def __new__(
44
- cls,
45
- item: Basic,
46
- chunk_subset: typing.Sequence[slice],
47
- subset: typing.Sequence[slice],
48
- shape: typing.Sequence[builtins.int],
49
- ) -> WithSubset: ...
50
-
51
- class StoreConfig(Enum):
52
- Filesystem = ...
53
- Http = ...
zarrs/pipeline.py CHANGED
@@ -14,18 +14,17 @@ from zarr.core.config import config
14
14
  from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
15
15
 
16
16
  if TYPE_CHECKING:
17
- from collections.abc import Generator, Iterable, Iterator
18
- from typing import Any, Self
17
+ from collections.abc import Iterable, Iterator
18
+ from typing import Self
19
19
 
20
20
  from zarr.abc.store import ByteGetter, ByteSetter, Store
21
21
  from zarr.core.array_spec import ArraySpec
22
22
  from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
23
23
  from zarr.core.chunk_grids import ChunkGrid
24
- from zarr.core.common import ChunkCoords
25
24
  from zarr.core.indexing import SelectorTuple
26
25
  from zarr.dtype import ZDType
27
26
 
28
- from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
27
+ from ._internal import CodecPipelineImpl
29
28
  from .utils import (
30
29
  CollapsedDimensionError,
31
30
  DiscontiguousArrayError,
@@ -43,14 +42,18 @@ class UnsupportedMetadataError(Exception):
43
42
 
44
43
 
45
44
  def get_codec_pipeline_impl(
46
- metadata: ArrayMetadata, store: Store
45
+ metadata: ArrayMetadata, store: Store, *, strict: bool
47
46
  ) -> CodecPipelineImpl | None:
48
47
  try:
49
48
  array_metadata_json = json.dumps(metadata.to_dict())
49
+ # Maintain old behavior: https://github.com/zarrs/zarrs-python/tree/b36ba797cafec77f5f41a25316be02c718a2b4f8?tab=readme-ov-file#configuration
50
+ validate_checksums = config.get("codec_pipeline.validate_checksums", True)
51
+ if validate_checksums is None:
52
+ validate_checksums = True
50
53
  return CodecPipelineImpl(
51
54
  array_metadata_json,
52
55
  store_config=store,
53
- validate_checksums=config.get("codec_pipeline.validate_checksums", None),
56
+ validate_checksums=validate_checksums,
54
57
  chunk_concurrent_minimum=config.get(
55
58
  "codec_pipeline.chunk_concurrent_minimum", None
56
59
  ),
@@ -58,8 +61,12 @@ def get_codec_pipeline_impl(
58
61
  "codec_pipeline.chunk_concurrent_maximum", None
59
62
  ),
60
63
  num_threads=config.get("threading.max_workers", None),
64
+ direct_io=config.get("codec_pipeline.direct_io", False),
61
65
  )
62
66
  except TypeError as e:
67
+ if strict:
68
+ raise UnsupportedMetadataError() from e
69
+
63
70
  warn(
64
71
  f"Array is unsupported by ZarrsCodecPipeline: {e}",
65
72
  category=UserWarning,
@@ -67,27 +74,14 @@ def get_codec_pipeline_impl(
67
74
  return None
68
75
 
69
76
 
70
- def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]:
71
- for codec in codecs:
72
- if codec.__class__.__name__ == "V2Codec":
73
- codec_dict = codec.to_dict()
74
- if codec_dict.get("filters", None) is not None:
75
- filters = [
76
- json.dumps(filter.get_config())
77
- for filter in codec_dict.get("filters")
78
- ]
79
- else:
80
- filters = None
81
- if codec_dict.get("compressor", None) is not None:
82
- compressor_json = codec_dict.get("compressor").get_config()
83
- compressor = json.dumps(compressor_json)
84
- else:
85
- compressor = None
86
- codecs_v3 = codec_metadata_v2_to_v3(filters, compressor)
87
- for codec in codecs_v3:
88
- yield json.loads(codec)
89
- else:
90
- yield codec.to_dict()
77
+ def get_codec_pipeline_fallback(
78
+ metadata: ArrayMetadata, *, strict: bool
79
+ ) -> BatchedCodecPipeline | None:
80
+ if strict:
81
+ return None
82
+ else:
83
+ codecs = array_metadata_to_codecs(metadata)
84
+ return BatchedCodecPipeline.from_codecs(codecs)
91
85
 
92
86
 
93
87
  class ZarrsCodecPipelineState(TypedDict):
@@ -108,7 +102,7 @@ class ZarrsCodecPipeline(CodecPipeline):
108
102
  metadata: ArrayMetadata
109
103
  store: Store
110
104
  impl: CodecPipelineImpl | None
111
- python_impl: BatchedCodecPipeline
105
+ python_impl: BatchedCodecPipeline | None
112
106
 
113
107
  def __getstate__(self) -> ZarrsCodecPipelineState:
114
108
  return {"metadata": self.metadata, "store": self.store}
@@ -116,9 +110,9 @@ class ZarrsCodecPipeline(CodecPipeline):
116
110
  def __setstate__(self, state: ZarrsCodecPipelineState):
117
111
  self.metadata = state["metadata"]
118
112
  self.store = state["store"]
119
- self.impl = get_codec_pipeline_impl(self.metadata, self.store)
120
- codecs = array_metadata_to_codecs(self.metadata)
121
- self.python_impl = BatchedCodecPipeline.from_codecs(codecs)
113
+ strict = config.get("codec_pipeline.strict", False)
114
+ self.impl = get_codec_pipeline_impl(self.metadata, self.store, strict=strict)
115
+ self.python_impl = get_codec_pipeline_fallback(self.metadata, strict=strict)
122
116
 
123
117
  def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
124
118
  return self
@@ -131,12 +125,12 @@ class ZarrsCodecPipeline(CodecPipeline):
131
125
  def from_array_metadata_and_store(
132
126
  cls, array_metadata: ArrayMetadata, store: Store
133
127
  ) -> Self:
134
- codecs = array_metadata_to_codecs(array_metadata)
128
+ strict = config.get("codec_pipeline.strict", False)
135
129
  return cls(
136
130
  metadata=array_metadata,
137
131
  store=store,
138
- impl=get_codec_pipeline_impl(array_metadata, store),
139
- python_impl=BatchedCodecPipeline.from_codecs(codecs),
132
+ impl=get_codec_pipeline_impl(array_metadata, store, strict=strict),
133
+ python_impl=get_codec_pipeline_fallback(array_metadata, strict=strict),
140
134
  )
141
135
 
142
136
  @property
@@ -151,7 +145,7 @@ class ZarrsCodecPipeline(CodecPipeline):
151
145
  yield from self.codecs
152
146
 
153
147
  def validate(
154
- self, *, shape: ChunkCoords, dtype: ZDType, chunk_grid: ChunkGrid
148
+ self, *, shape: tuple[int, ...], dtype: ZDType, chunk_grid: ChunkGrid
155
149
  ) -> None:
156
150
  raise NotImplementedError("validate")
157
151
 
@@ -195,6 +189,8 @@ class ZarrsCodecPipeline(CodecPipeline):
195
189
  UnsupportedDataTypeError,
196
190
  FillValueNoneError,
197
191
  ):
192
+ if self.python_impl is None:
193
+ raise
198
194
  await self.python_impl.read(batch_info, out, drop_axes)
199
195
  return None
200
196
  else:
@@ -228,6 +224,8 @@ class ZarrsCodecPipeline(CodecPipeline):
228
224
  UnsupportedDataTypeError,
229
225
  FillValueNoneError,
230
226
  ):
227
+ if self.python_impl is None:
228
+ raise
231
229
  await self.python_impl.write(batch_info, value, drop_axes)
232
230
  return None
233
231
  else:
zarrs/utils.py CHANGED
@@ -10,7 +10,7 @@ import numpy as np
10
10
  from zarr.core.array_spec import ArraySpec
11
11
  from zarr.core.indexing import SelectorTuple, is_integer
12
12
 
13
- from zarrs._internal import Basic, WithSubset
13
+ from zarrs._internal import ChunkItem
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from collections.abc import Iterable
@@ -148,7 +148,7 @@ def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
148
148
 
149
149
  @dataclass(frozen=True)
150
150
  class RustChunkInfo:
151
- chunk_info_with_indices: list[WithSubset]
151
+ chunk_info_with_indices: list[ChunkItem]
152
152
  write_empty_chunks: bool
153
153
 
154
154
 
@@ -158,9 +158,9 @@ def make_chunk_info_for_rust_with_indices(
158
158
  ],
159
159
  drop_axes: tuple[int, ...],
160
160
  shape: tuple[int, ...],
161
- ) -> list[WithSubset]:
161
+ ) -> RustChunkInfo:
162
162
  shape = shape if shape else (1,) # constant array
163
- chunk_info_with_indices: list[WithSubset] = []
163
+ chunk_info_with_indices: list[ChunkItem] = []
164
164
  write_empty_chunks: bool = True
165
165
  for (
166
166
  byte_getter,
@@ -178,7 +178,6 @@ def make_chunk_info_for_rust_with_indices(
178
178
  chunk_spec.config,
179
179
  chunk_spec.prototype,
180
180
  )
181
- chunk_info = Basic(byte_getter, chunk_spec)
182
181
  out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
183
182
  chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
184
183
  shape_chunk_selection_slices = get_shape_for_selector(
@@ -195,9 +194,10 @@ def make_chunk_info_for_rust_with_indices(
195
194
  f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
196
195
  )
197
196
  chunk_info_with_indices.append(
198
- WithSubset(
199
- chunk_info,
197
+ ChunkItem(
198
+ key=byte_getter.path,
200
199
  chunk_subset=chunk_selection_as_slices,
200
+ chunk_shape=chunk_spec.shape,
201
201
  subset=out_selection_as_slices,
202
202
  shape=shape,
203
203
  )
@@ -1,15 +1,16 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: zarrs
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Classifier: Typing :: Typed
8
- Requires-Dist: numpy >=1.24
9
- Requires-Dist: zarr >=3.1
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: zarr>=3.1
10
10
  License-File: LICENSE
11
+ Summary: A CodecPipeline for zarr-python backed by the zarrs Rust crate
11
12
  Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
12
- License: MIT
13
+ License-Expression: MIT
13
14
  Requires-Python: >=3.11
14
15
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
15
16
 
@@ -28,7 +29,6 @@ To use the project, simply install our package (which depends on `zarr-python>=3
28
29
 
29
30
  ```python
30
31
  import zarr
31
- import zarrs
32
32
  zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
33
33
  ```
34
34
 
@@ -40,12 +40,11 @@ We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class b
40
40
 
41
41
  At the moment, we only support a subset of the `zarr-python` stores:
42
42
 
43
- - [x] [LocalStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.LocalStore) (FileSystem)
44
- - [FsspecStore](https://zarr.readthedocs.io/en/latest/_autoapi/zarr/storage/index.html#zarr.storage.FsspecStore)
45
- - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem)
43
+ - [`LocalStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.LocalStore) (local filesystem)
44
+ - [`ObjectStore`](https://zarr.readthedocs.io/en/latest/user-guide/storage/#object-store) (cloud storage)
45
+ - [`HTTPFileSystem`](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) via [`FsspecStore`](https://zarr.readthedocs.io/en/latest/api/zarr/storage/#zarr.storage.FsspecStore)
46
46
 
47
47
  A `NotImplementedError` will be raised if a store is not supported.
48
- We intend to support more stores in the future: https://github.com/zarrs/zarrs-python/issues/44.
49
48
 
50
49
  ### Configuration
51
50
 
@@ -63,7 +62,11 @@ The `ZarrsCodecPipeline` specific options are:
63
62
  - `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
64
63
  - Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
65
64
  - `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
66
- - Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
65
+ - Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
66
+ - `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
67
+ - Defaults to `False`.
68
+ - `codec_pipeline.strict`: raise exceptions for unsupported operations instead of falling back to the default codec pipeline of `zarr-python`.
69
+ - Defaults to `False`.
67
70
 
68
71
  For example:
69
72
  ```python
@@ -75,6 +78,8 @@ zarr.config.set({
75
78
  "validate_checksums": True,
76
79
  "chunk_concurrent_maximum": None,
77
80
  "chunk_concurrent_minimum": 4,
81
+ "direct_io": False,
82
+ "strict": False
78
83
  }
79
84
  })
80
85
  ```
@@ -0,0 +1,11 @@
1
+ zarrs-0.2.2.dist-info/METADATA,sha256=8K1AOS_SVQgRLzQ2rWtNbJxCAb12XmTLRrT6QklQgOI,8054
2
+ zarrs-0.2.2.dist-info/WHEEL,sha256=pQhpX1zEYym7lHVMSudaqVXS44siTf8XBrjmsFdZ39M,108
3
+ zarrs-0.2.2.dist-info/entry_points.txt,sha256=EzI6yCIUPDHBHzjDdexuGGYbOLXf8x2ICokOJXnuX3k,68
4
+ zarrs-0.2.2.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
5
+ zarrs/__init__.py,sha256=lRVtAPzCzJkGs4vQrW4UgANq-pC-khS0ZF7HTj4__Hg,489
6
+ zarrs/_internal.abi3.so,sha256=2oa6VVFlcQ-C3qa6UK_LKKzRFc36GhmZfzKXqiJnvLA,15343720
7
+ zarrs/_internal.pyi,sha256=a_D4yx99r4xeQX1ntY_A_Q4wVmLeLwJZHWAQV_mVu9A,1308
8
+ zarrs/pipeline.py,sha256=YfB13GWNfxELerXVtJ_ipFwSL7bN-YuPys6jCB9lnms,9008
9
+ zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ zarrs/utils.py,sha256=W2XCjJDVrdHYJgtVaRKN533Ljw1MF7o0YwXuz5ZAk2g,7020
11
+ zarrs-0.2.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.7.4)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-abi3-manylinux_2_28_aarch64
@@ -0,0 +1,2 @@
1
+ [zarr.codec_pipeline]
2
+ zarrs.codec_pipeline=zarrs:ZarrsCodecPipeline
@@ -1,10 +0,0 @@
1
- zarrs-0.2.0.dist-info/METADATA,sha256=XxZ7DwO-fwgs0UKqvISPYRv2C58rtWNZTFnUlQ4GN0M,7626
2
- zarrs-0.2.0.dist-info/WHEEL,sha256=vlVK9XRfjbZsffx4VpMMyFV477nGomrs3TkCmK8HoZM,108
3
- zarrs-0.2.0.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
4
- zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
5
- zarrs/pipeline.py,sha256=9cog6mm1BMwxgRsKyO67mn_DyRdqsDrxykrTpyMyclo,9099
6
- zarrs/_internal.pyi,sha256=4WAVy2Upg3y_DX0j8AvdAr0_FAMeReDkS57XFc3Y9xE,1448
7
- zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- zarrs/utils.py,sha256=My01lFsg9ZFxkMajbTKHz7audaSxHJW6CviDjqo2tDs,7030
9
- zarrs/_internal.abi3.so,sha256=J5mngjmu3CE_IywczCJY9MM0NYh3VLXyOqlXN8As41o,9929656
10
- zarrs-0.2.0.dist-info/RECORD,,