zarrs 0.1.5__cp311-abi3-macosx_10_12_x86_64.whl → 0.2.1__cp311-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of zarrs might be problematic. Click here for more details.

zarrs/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- from zarr.registry import register_pipeline
2
-
3
1
  from ._internal import __version__
4
2
  from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
5
3
  from .utils import CollapsedDimensionError, DiscontiguousArrayError
@@ -10,8 +8,6 @@ class ZarrsCodecPipeline(_ZarrsCodecPipeline):
10
8
  pass
11
9
 
12
10
 
13
- register_pipeline(ZarrsCodecPipeline)
14
-
15
11
  __all__ = [
16
12
  "ZarrsCodecPipeline",
17
13
  "DiscontiguousArrayError",
zarrs/_internal.abi3.so CHANGED
Binary file
zarrs/_internal.pyi CHANGED
@@ -3,9 +3,9 @@
3
3
 
4
4
  import builtins
5
5
  import typing
6
- from enum import Enum
7
6
 
8
7
  import numpy.typing
8
+ import zarr.abc.store
9
9
 
10
10
  class Basic:
11
11
  def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any) -> Basic: ...
@@ -13,13 +13,14 @@ class Basic:
13
13
  class CodecPipelineImpl:
14
14
  def __new__(
15
15
  cls,
16
- metadata: builtins.str,
16
+ array_metadata: builtins.str,
17
+ store_config: zarr.abc.store.Store,
17
18
  *,
18
- validate_checksums: builtins.bool | None = None,
19
- store_empty_chunks: builtins.bool | None = None,
19
+ validate_checksums: builtins.bool = False,
20
20
  chunk_concurrent_minimum: builtins.int | None = None,
21
21
  chunk_concurrent_maximum: builtins.int | None = None,
22
22
  num_threads: builtins.int | None = None,
23
+ direct_io: builtins.bool = False,
23
24
  ) -> CodecPipelineImpl: ...
24
25
  def retrieve_chunks_and_apply_index(
25
26
  self,
@@ -30,14 +31,9 @@ class CodecPipelineImpl:
30
31
  self,
31
32
  chunk_descriptions: typing.Sequence[WithSubset],
32
33
  value: numpy.typing.NDArray[typing.Any],
34
+ write_empty_chunks: builtins.bool,
33
35
  ) -> None: ...
34
36
 
35
- class FilesystemStoreConfig:
36
- root: builtins.str
37
-
38
- class HttpStoreConfig:
39
- endpoint: builtins.str
40
-
41
37
  class WithSubset:
42
38
  def __new__(
43
39
  cls,
@@ -46,7 +42,3 @@ class WithSubset:
46
42
  subset: typing.Sequence[slice],
47
43
  shape: typing.Sequence[builtins.int],
48
44
  ) -> WithSubset: ...
49
-
50
- class StoreConfig(Enum):
51
- Filesystem = ...
52
- Http = ...
zarrs/pipeline.py CHANGED
@@ -2,27 +2,29 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import json
5
- import re
6
5
  from dataclasses import dataclass
7
6
  from typing import TYPE_CHECKING, TypedDict
7
+ from warnings import warn
8
8
 
9
9
  import numpy as np
10
10
  from zarr.abc.codec import Codec, CodecPipeline
11
+ from zarr.codecs._v2 import V2Codec
11
12
  from zarr.core import BatchedCodecPipeline
12
13
  from zarr.core.config import config
14
+ from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
13
15
 
14
16
  if TYPE_CHECKING:
15
- from collections.abc import Generator, Iterable, Iterator
16
- from typing import Any, Self
17
+ from collections.abc import Iterable, Iterator
18
+ from typing import Self
17
19
 
18
- from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.abc.store import ByteGetter, ByteSetter, Store
19
21
  from zarr.core.array_spec import ArraySpec
20
22
  from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
21
23
  from zarr.core.chunk_grids import ChunkGrid
22
- from zarr.core.common import ChunkCoords
23
24
  from zarr.core.indexing import SelectorTuple
25
+ from zarr.dtype import ZDType
24
26
 
25
- from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
27
+ from ._internal import CodecPipelineImpl
26
28
  from .utils import (
27
29
  CollapsedDimensionError,
28
30
  DiscontiguousArrayError,
@@ -39,12 +41,19 @@ class UnsupportedMetadataError(Exception):
39
41
  pass
40
42
 
41
43
 
42
- def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | None:
44
+ def get_codec_pipeline_impl(
45
+ metadata: ArrayMetadata, store: Store
46
+ ) -> CodecPipelineImpl | None:
43
47
  try:
48
+ array_metadata_json = json.dumps(metadata.to_dict())
49
+ # Maintain old behavior: https://github.com/zarrs/zarrs-python/tree/b36ba797cafec77f5f41a25316be02c718a2b4f8?tab=readme-ov-file#configuration
50
+ validate_checksums = config.get("codec_pipeline.validate_checksums", True)
51
+ if validate_checksums is None:
52
+ validate_checksums = True
44
53
  return CodecPipelineImpl(
45
- codec_metadata_json,
46
- validate_checksums=config.get("codec_pipeline.validate_checksums", None),
47
- store_empty_chunks=config.get("array.write_empty_chunks", None),
54
+ array_metadata_json,
55
+ store_config=store,
56
+ validate_checksums=validate_checksums,
48
57
  chunk_concurrent_minimum=config.get(
49
58
  "codec_pipeline.chunk_concurrent_minimum", None
50
59
  ),
@@ -52,35 +61,14 @@ def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | Non
52
61
  "codec_pipeline.chunk_concurrent_maximum", None
53
62
  ),
54
63
  num_threads=config.get("threading.max_workers", None),
64
+ direct_io=config.get("codec_pipeline.direct_io", False),
55
65
  )
56
66
  except TypeError as e:
57
- if re.match(r"codec (delta|zlib) is not supported", str(e)):
58
- return None
59
- else:
60
- raise e
61
-
62
-
63
- def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]:
64
- for codec in codecs:
65
- if codec.__class__.__name__ == "V2Codec":
66
- codec_dict = codec.to_dict()
67
- if codec_dict.get("filters", None) is not None:
68
- filters = [
69
- json.dumps(filter.get_config())
70
- for filter in codec_dict.get("filters")
71
- ]
72
- else:
73
- filters = None
74
- if codec_dict.get("compressor", None) is not None:
75
- compressor_json = codec_dict.get("compressor").get_config()
76
- compressor = json.dumps(compressor_json)
77
- else:
78
- compressor = None
79
- codecs_v3 = codec_metadata_v2_to_v3(filters, compressor)
80
- for codec in codecs_v3:
81
- yield json.loads(codec)
82
- else:
83
- yield codec.to_dict()
67
+ warn(
68
+ f"Array is unsupported by ZarrsCodecPipeline: {e}",
69
+ category=UserWarning,
70
+ )
71
+ return None
84
72
 
85
73
 
86
74
  class ZarrsCodecPipelineState(TypedDict):
@@ -88,37 +76,47 @@ class ZarrsCodecPipelineState(TypedDict):
88
76
  codecs: tuple[Codec, ...]
89
77
 
90
78
 
79
+ def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
80
+ if isinstance(metadata, ArrayV3Metadata):
81
+ return metadata.codecs
82
+ elif isinstance(metadata, ArrayV2Metadata):
83
+ v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
84
+ return [v2_codec]
85
+
86
+
91
87
  @dataclass
92
88
  class ZarrsCodecPipeline(CodecPipeline):
93
- codecs: tuple[Codec, ...]
89
+ metadata: ArrayMetadata
90
+ store: Store
94
91
  impl: CodecPipelineImpl | None
95
- codec_metadata_json: str
96
92
  python_impl: BatchedCodecPipeline
97
93
 
98
94
  def __getstate__(self) -> ZarrsCodecPipelineState:
99
- return {"codec_metadata_json": self.codec_metadata_json, "codecs": self.codecs}
95
+ return {"metadata": self.metadata, "store": self.store}
100
96
 
101
97
  def __setstate__(self, state: ZarrsCodecPipelineState):
102
- self.codecs = state["codecs"]
103
- self.codec_metadata_json = state["codec_metadata_json"]
104
- self.impl = get_codec_pipeline_impl(self.codec_metadata_json)
105
- self.python_impl = BatchedCodecPipeline.from_codecs(self.codecs)
98
+ self.metadata = state["metadata"]
99
+ self.store = state["store"]
100
+ self.impl = get_codec_pipeline_impl(self.metadata, self.store)
101
+ codecs = array_metadata_to_codecs(self.metadata)
102
+ self.python_impl = BatchedCodecPipeline.from_codecs(codecs)
106
103
 
107
104
  def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
108
- raise NotImplementedError("evolve_from_array_spec")
105
+ return self
109
106
 
110
107
  @classmethod
111
108
  def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
112
- codec_metadata = list(codecs_to_dict(codecs))
113
- codec_metadata_json = json.dumps(codec_metadata)
114
- # TODO: upstream zarr-python has not settled on how to deal with configs yet
115
- # Should they be checked when an array is created, or when an operation is performed?
116
- # https://github.com/zarr-developers/zarr-python/issues/2409
117
- # https://github.com/zarr-developers/zarr-python/pull/2429#issuecomment-2566976567
109
+ return BatchedCodecPipeline.from_codecs(codecs)
110
+
111
+ @classmethod
112
+ def from_array_metadata_and_store(
113
+ cls, array_metadata: ArrayMetadata, store: Store
114
+ ) -> Self:
115
+ codecs = array_metadata_to_codecs(array_metadata)
118
116
  return cls(
119
- codec_metadata_json=codec_metadata_json,
120
- codecs=tuple(codecs),
121
- impl=get_codec_pipeline_impl(codec_metadata_json),
117
+ metadata=array_metadata,
118
+ store=store,
119
+ impl=get_codec_pipeline_impl(array_metadata, store),
122
120
  python_impl=BatchedCodecPipeline.from_codecs(codecs),
123
121
  )
124
122
 
@@ -134,7 +132,7 @@ class ZarrsCodecPipeline(CodecPipeline):
134
132
  yield from self.codecs
135
133
 
136
134
  def validate(
137
- self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
135
+ self, *, shape: tuple[int, ...], dtype: ZDType, chunk_grid: ChunkGrid
138
136
  ) -> None:
139
137
  raise NotImplementedError("validate")
140
138
 
@@ -184,7 +182,7 @@ class ZarrsCodecPipeline(CodecPipeline):
184
182
  out: NDArrayLike = out.as_ndarray_like()
185
183
  await asyncio.to_thread(
186
184
  self.impl.retrieve_chunks_and_apply_index,
187
- chunks_desc,
185
+ chunks_desc.chunk_info_with_indices,
188
186
  out,
189
187
  )
190
188
  return None
@@ -223,7 +221,10 @@ class ZarrsCodecPipeline(CodecPipeline):
223
221
  elif not value_np.flags.c_contiguous:
224
222
  value_np = np.ascontiguousarray(value_np)
225
223
  await asyncio.to_thread(
226
- self.impl.store_chunks_with_indices, chunks_desc, value_np
224
+ self.impl.store_chunks_with_indices,
225
+ chunks_desc.chunk_info_with_indices,
226
+ value_np,
227
+ chunks_desc.write_empty_chunks,
227
228
  )
228
229
  return None
229
230
 
@@ -236,7 +237,7 @@ class ZarrsCodecPipeline(CodecPipeline):
236
237
  # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
237
238
  # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
238
239
  if any(
239
- info.dtype.kind in {"V", "S", "U", "M", "m", "O", "T"}
240
+ info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
240
241
  for (_, info, _, _, _) in batch_info
241
242
  ):
242
243
  raise UnsupportedDataTypeError()
zarrs/utils.py CHANGED
@@ -2,13 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  import operator
4
4
  import os
5
+ from dataclasses import dataclass
5
6
  from functools import reduce
6
7
  from typing import TYPE_CHECKING, Any
7
8
 
8
9
  import numpy as np
9
10
  from zarr.core.array_spec import ArraySpec
10
11
  from zarr.core.indexing import SelectorTuple, is_integer
11
- from zarr.core.metadata.v2 import _default_fill_value
12
12
 
13
13
  from zarrs._internal import Basic, WithSubset
14
14
 
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
  from types import EllipsisType
18
18
 
19
19
  from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.dtype import ZDType
20
21
 
21
22
 
22
23
  # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
@@ -139,21 +140,28 @@ def get_shape_for_selector(
139
140
  return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
140
141
 
141
142
 
142
- def get_implicit_fill_value(dtype: np.dtype, fill_value: Any) -> Any:
143
+ def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
143
144
  if fill_value is None:
144
- fill_value = _default_fill_value(dtype)
145
+ fill_value = dtype.default_scalar()
145
146
  return fill_value
146
147
 
147
148
 
149
+ @dataclass(frozen=True)
150
+ class RustChunkInfo:
151
+ chunk_info_with_indices: list[WithSubset]
152
+ write_empty_chunks: bool
153
+
154
+
148
155
  def make_chunk_info_for_rust_with_indices(
149
156
  batch_info: Iterable[
150
157
  tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
151
158
  ],
152
159
  drop_axes: tuple[int, ...],
153
160
  shape: tuple[int, ...],
154
- ) -> list[WithSubset]:
161
+ ) -> RustChunkInfo:
155
162
  shape = shape if shape else (1,) # constant array
156
163
  chunk_info_with_indices: list[WithSubset] = []
164
+ write_empty_chunks: bool = True
157
165
  for (
158
166
  byte_getter,
159
167
  chunk_spec,
@@ -161,6 +169,7 @@ def make_chunk_info_for_rust_with_indices(
161
169
  out_selection,
162
170
  _,
163
171
  ) in batch_info:
172
+ write_empty_chunks = chunk_spec.config.write_empty_chunks
164
173
  if chunk_spec.fill_value is None:
165
174
  chunk_spec = ArraySpec(
166
175
  chunk_spec.shape,
@@ -193,4 +202,4 @@ def make_chunk_info_for_rust_with_indices(
193
202
  shape=shape,
194
203
  )
195
204
  )
196
- return chunk_info_with_indices
205
+ return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
@@ -1,15 +1,16 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: zarrs
3
- Version: 0.1.5
3
+ Version: 0.2.1
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Classifier: Typing :: Typed
8
- Requires-Dist: numpy >=1.24
9
- Requires-Dist: zarr >=3.0.3, <3.1
8
+ Requires-Dist: numpy>=1.24
9
+ Requires-Dist: zarr>=3.1
10
10
  License-File: LICENSE
11
+ Summary: A CodecPipeline for zarr-python backed by the zarrs Rust crate
11
12
  Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
12
- License: MIT
13
+ License-Expression: MIT
13
14
  Requires-Python: >=3.11
14
15
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
15
16
 
@@ -28,7 +29,6 @@ To use the project, simply install our package (which depends on `zarr-python>=3
28
29
 
29
30
  ```python
30
31
  import zarr
31
- import zarrs
32
32
  zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
33
33
  ```
34
34
 
@@ -63,7 +63,9 @@ The `ZarrsCodecPipeline` specific options are:
63
63
  - `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
64
64
  - Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
65
65
  - `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
66
- - Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
66
+ - Defaults to `True`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
67
+ - `codec_pipeline.direct_io`: enable `O_DIRECT` read/write, needs support from the operating system (currently only Linux) and file system.
68
+ - Defaults to `False`.
67
69
 
68
70
  For example:
69
71
  ```python
@@ -73,14 +75,14 @@ zarr.config.set({
73
75
  "codec_pipeline": {
74
76
  "path": "zarrs.ZarrsCodecPipeline",
75
77
  "validate_checksums": True,
76
- "store_empty_chunks": False,
77
78
  "chunk_concurrent_maximum": None,
78
79
  "chunk_concurrent_minimum": 4,
80
+ "direct_io": False
79
81
  }
80
82
  })
81
83
  ```
82
84
 
83
- If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
85
+ If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
84
86
 
85
87
  ## Concurrency
86
88
 
@@ -0,0 +1,11 @@
1
+ zarrs-0.2.1.dist-info/METADATA,sha256=8Y8S6VPAKwydDwl2oSRsMQlD_VSVUCtbfdm78FM4LZs,7878
2
+ zarrs-0.2.1.dist-info/WHEEL,sha256=N8W3-0eDM6igWj-H12r7VkxoMaJIqJLxUyWCFstEaGg,105
3
+ zarrs-0.2.1.dist-info/entry_points.txt,sha256=EzI6yCIUPDHBHzjDdexuGGYbOLXf8x2ICokOJXnuX3k,68
4
+ zarrs-0.2.1.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
5
+ zarrs/__init__.py,sha256=lRVtAPzCzJkGs4vQrW4UgANq-pC-khS0ZF7HTj4__Hg,489
6
+ zarrs/_internal.abi3.so,sha256=zasHnt8Mym7fh5MkSq0_houhMcW-oYZm_zR1j31eJ5I,10334504
7
+ zarrs/_internal.pyi,sha256=D_sHSSWGoVWLBS3q0dPE3pPJULZZ7cxV-9CErqGp8Z8,1327
8
+ zarrs/pipeline.py,sha256=Ihc-RkEOqDTQvUtuwSJf8TBDVZMn2kYYH2_yATjydTI,8449
9
+ zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ zarrs/utils.py,sha256=mGFsSnYU_jt4QJtlvr2JfxoBDZLe8V3Y8DbVfZSUpmA,7027
11
+ zarrs-0.2.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.7.4)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-abi3-macosx_10_12_x86_64
@@ -0,0 +1,2 @@
1
+ [zarr.codec_pipeline]
2
+ zarrs.codec_pipeline=zarrs:ZarrsCodecPipeline
@@ -1,10 +0,0 @@
1
- zarrs-0.1.5.dist-info/METADATA,sha256=MraaZvJnr2BaC4xLsvHjNIWkaE_KMYsHfDRJ5CJ4nsY,7693
2
- zarrs-0.1.5.dist-info/WHEEL,sha256=LZygbeT1PTQw7a9tONPp78bbG4FZc86U59Z0RFJcoR8,105
3
- zarrs-0.1.5.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
4
- zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
5
- zarrs/_internal.pyi,sha256=H2afxhX5LDKOSR4bOBoH3mXoVqdM24Rc-xdjqMiOE5Y,1421
6
- zarrs/utils.py,sha256=uKIDi2EKRJRFdlyCElEvZrooVXkbmO4ZoE5fTHFNHGo,6763
7
- zarrs/pipeline.py,sha256=GV6Z1xoMdLQpnp4iX4nCuIYZTK1bPDG9nO0-t23F0_I,8708
8
- zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- zarrs/_internal.abi3.so,sha256=rOHAgLFXNSc3DqJsmPOWY31M02VTg2vpNMhLovY5-TI,9573408
10
- zarrs-0.1.5.dist-info/RECORD,,