zarrs 0.1.5__cp311-abi3-manylinux_2_28_aarch64.whl → 0.2.0__cp311-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zarrs/_internal.abi3.so CHANGED
Binary file
zarrs/_internal.pyi CHANGED
@@ -13,10 +13,10 @@ class Basic:
13
13
  class CodecPipelineImpl:
14
14
  def __new__(
15
15
  cls,
16
- metadata: builtins.str,
16
+ array_metadata: builtins.str,
17
+ store_config: StoreConfig,
17
18
  *,
18
19
  validate_checksums: builtins.bool | None = None,
19
- store_empty_chunks: builtins.bool | None = None,
20
20
  chunk_concurrent_minimum: builtins.int | None = None,
21
21
  chunk_concurrent_maximum: builtins.int | None = None,
22
22
  num_threads: builtins.int | None = None,
@@ -30,6 +30,7 @@ class CodecPipelineImpl:
30
30
  self,
31
31
  chunk_descriptions: typing.Sequence[WithSubset],
32
32
  value: numpy.typing.NDArray[typing.Any],
33
+ write_empty_chunks: builtins.bool,
33
34
  ) -> None: ...
34
35
 
35
36
  class FilesystemStoreConfig:
zarrs/pipeline.py CHANGED
@@ -2,25 +2,28 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import json
5
- import re
6
5
  from dataclasses import dataclass
7
6
  from typing import TYPE_CHECKING, TypedDict
7
+ from warnings import warn
8
8
 
9
9
  import numpy as np
10
10
  from zarr.abc.codec import Codec, CodecPipeline
11
+ from zarr.codecs._v2 import V2Codec
11
12
  from zarr.core import BatchedCodecPipeline
12
13
  from zarr.core.config import config
14
+ from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
13
15
 
14
16
  if TYPE_CHECKING:
15
17
  from collections.abc import Generator, Iterable, Iterator
16
18
  from typing import Any, Self
17
19
 
18
- from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.abc.store import ByteGetter, ByteSetter, Store
19
21
  from zarr.core.array_spec import ArraySpec
20
22
  from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
21
23
  from zarr.core.chunk_grids import ChunkGrid
22
24
  from zarr.core.common import ChunkCoords
23
25
  from zarr.core.indexing import SelectorTuple
26
+ from zarr.dtype import ZDType
24
27
 
25
28
  from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
26
29
  from .utils import (
@@ -39,12 +42,15 @@ class UnsupportedMetadataError(Exception):
39
42
  pass
40
43
 
41
44
 
42
- def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | None:
45
+ def get_codec_pipeline_impl(
46
+ metadata: ArrayMetadata, store: Store
47
+ ) -> CodecPipelineImpl | None:
43
48
  try:
49
+ array_metadata_json = json.dumps(metadata.to_dict())
44
50
  return CodecPipelineImpl(
45
- codec_metadata_json,
51
+ array_metadata_json,
52
+ store_config=store,
46
53
  validate_checksums=config.get("codec_pipeline.validate_checksums", None),
47
- store_empty_chunks=config.get("array.write_empty_chunks", None),
48
54
  chunk_concurrent_minimum=config.get(
49
55
  "codec_pipeline.chunk_concurrent_minimum", None
50
56
  ),
@@ -54,10 +60,11 @@ def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | Non
54
60
  num_threads=config.get("threading.max_workers", None),
55
61
  )
56
62
  except TypeError as e:
57
- if re.match(r"codec (delta|zlib) is not supported", str(e)):
58
- return None
59
- else:
60
- raise e
63
+ warn(
64
+ f"Array is unsupported by ZarrsCodecPipeline: {e}",
65
+ category=UserWarning,
66
+ )
67
+ return None
61
68
 
62
69
 
63
70
  def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]:
@@ -88,37 +95,47 @@ class ZarrsCodecPipelineState(TypedDict):
88
95
  codecs: tuple[Codec, ...]
89
96
 
90
97
 
98
+ def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
99
+ if isinstance(metadata, ArrayV3Metadata):
100
+ return metadata.codecs
101
+ elif isinstance(metadata, ArrayV2Metadata):
102
+ v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
103
+ return [v2_codec]
104
+
105
+
91
106
  @dataclass
92
107
  class ZarrsCodecPipeline(CodecPipeline):
93
- codecs: tuple[Codec, ...]
108
+ metadata: ArrayMetadata
109
+ store: Store
94
110
  impl: CodecPipelineImpl | None
95
- codec_metadata_json: str
96
111
  python_impl: BatchedCodecPipeline
97
112
 
98
113
  def __getstate__(self) -> ZarrsCodecPipelineState:
99
- return {"codec_metadata_json": self.codec_metadata_json, "codecs": self.codecs}
114
+ return {"metadata": self.metadata, "store": self.store}
100
115
 
101
116
  def __setstate__(self, state: ZarrsCodecPipelineState):
102
- self.codecs = state["codecs"]
103
- self.codec_metadata_json = state["codec_metadata_json"]
104
- self.impl = get_codec_pipeline_impl(self.codec_metadata_json)
105
- self.python_impl = BatchedCodecPipeline.from_codecs(self.codecs)
117
+ self.metadata = state["metadata"]
118
+ self.store = state["store"]
119
+ self.impl = get_codec_pipeline_impl(self.metadata, self.store)
120
+ codecs = array_metadata_to_codecs(self.metadata)
121
+ self.python_impl = BatchedCodecPipeline.from_codecs(codecs)
106
122
 
107
123
  def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
108
- raise NotImplementedError("evolve_from_array_spec")
124
+ return self
109
125
 
110
126
  @classmethod
111
127
  def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
112
- codec_metadata = list(codecs_to_dict(codecs))
113
- codec_metadata_json = json.dumps(codec_metadata)
114
- # TODO: upstream zarr-python has not settled on how to deal with configs yet
115
- # Should they be checked when an array is created, or when an operation is performed?
116
- # https://github.com/zarr-developers/zarr-python/issues/2409
117
- # https://github.com/zarr-developers/zarr-python/pull/2429#issuecomment-2566976567
128
+ return BatchedCodecPipeline.from_codecs(codecs)
129
+
130
+ @classmethod
131
+ def from_array_metadata_and_store(
132
+ cls, array_metadata: ArrayMetadata, store: Store
133
+ ) -> Self:
134
+ codecs = array_metadata_to_codecs(array_metadata)
118
135
  return cls(
119
- codec_metadata_json=codec_metadata_json,
120
- codecs=tuple(codecs),
121
- impl=get_codec_pipeline_impl(codec_metadata_json),
136
+ metadata=array_metadata,
137
+ store=store,
138
+ impl=get_codec_pipeline_impl(array_metadata, store),
122
139
  python_impl=BatchedCodecPipeline.from_codecs(codecs),
123
140
  )
124
141
 
@@ -134,7 +151,7 @@ class ZarrsCodecPipeline(CodecPipeline):
134
151
  yield from self.codecs
135
152
 
136
153
  def validate(
137
- self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
154
+ self, *, shape: ChunkCoords, dtype: ZDType, chunk_grid: ChunkGrid
138
155
  ) -> None:
139
156
  raise NotImplementedError("validate")
140
157
 
@@ -184,7 +201,7 @@ class ZarrsCodecPipeline(CodecPipeline):
184
201
  out: NDArrayLike = out.as_ndarray_like()
185
202
  await asyncio.to_thread(
186
203
  self.impl.retrieve_chunks_and_apply_index,
187
- chunks_desc,
204
+ chunks_desc.chunk_info_with_indices,
188
205
  out,
189
206
  )
190
207
  return None
@@ -223,7 +240,10 @@ class ZarrsCodecPipeline(CodecPipeline):
223
240
  elif not value_np.flags.c_contiguous:
224
241
  value_np = np.ascontiguousarray(value_np)
225
242
  await asyncio.to_thread(
226
- self.impl.store_chunks_with_indices, chunks_desc, value_np
243
+ self.impl.store_chunks_with_indices,
244
+ chunks_desc.chunk_info_with_indices,
245
+ value_np,
246
+ chunks_desc.write_empty_chunks,
227
247
  )
228
248
  return None
229
249
 
@@ -236,7 +256,7 @@ class ZarrsCodecPipeline(CodecPipeline):
236
256
  # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
237
257
  # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
238
258
  if any(
239
- info.dtype.kind in {"V", "S", "U", "M", "m", "O", "T"}
259
+ info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
240
260
  for (_, info, _, _, _) in batch_info
241
261
  ):
242
262
  raise UnsupportedDataTypeError()
zarrs/utils.py CHANGED
@@ -2,13 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  import operator
4
4
  import os
5
+ from dataclasses import dataclass
5
6
  from functools import reduce
6
7
  from typing import TYPE_CHECKING, Any
7
8
 
8
9
  import numpy as np
9
10
  from zarr.core.array_spec import ArraySpec
10
11
  from zarr.core.indexing import SelectorTuple, is_integer
11
- from zarr.core.metadata.v2 import _default_fill_value
12
12
 
13
13
  from zarrs._internal import Basic, WithSubset
14
14
 
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
  from types import EllipsisType
18
18
 
19
19
  from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.dtype import ZDType
20
21
 
21
22
 
22
23
  # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
@@ -139,12 +140,18 @@ def get_shape_for_selector(
139
140
  return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
140
141
 
141
142
 
142
- def get_implicit_fill_value(dtype: np.dtype, fill_value: Any) -> Any:
143
+ def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
143
144
  if fill_value is None:
144
- fill_value = _default_fill_value(dtype)
145
+ fill_value = dtype.default_scalar()
145
146
  return fill_value
146
147
 
147
148
 
149
+ @dataclass(frozen=True)
150
+ class RustChunkInfo:
151
+ chunk_info_with_indices: list[WithSubset]
152
+ write_empty_chunks: bool
153
+
154
+
148
155
  def make_chunk_info_for_rust_with_indices(
149
156
  batch_info: Iterable[
150
157
  tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
@@ -154,6 +161,7 @@ def make_chunk_info_for_rust_with_indices(
154
161
  ) -> list[WithSubset]:
155
162
  shape = shape if shape else (1,) # constant array
156
163
  chunk_info_with_indices: list[WithSubset] = []
164
+ write_empty_chunks: bool = True
157
165
  for (
158
166
  byte_getter,
159
167
  chunk_spec,
@@ -161,6 +169,7 @@ def make_chunk_info_for_rust_with_indices(
161
169
  out_selection,
162
170
  _,
163
171
  ) in batch_info:
172
+ write_empty_chunks = chunk_spec.config.write_empty_chunks
164
173
  if chunk_spec.fill_value is None:
165
174
  chunk_spec = ArraySpec(
166
175
  chunk_spec.shape,
@@ -193,4 +202,4 @@ def make_chunk_info_for_rust_with_indices(
193
202
  shape=shape,
194
203
  )
195
204
  )
196
- return chunk_info_with_indices
205
+ return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: zarrs
3
- Version: 0.1.5
3
+ Version: 0.2.0
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Classifier: Typing :: Typed
8
8
  Requires-Dist: numpy >=1.24
9
- Requires-Dist: zarr >=3.0.3, <3.1
9
+ Requires-Dist: zarr >=3.1
10
10
  License-File: LICENSE
11
11
  Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
12
12
  License: MIT
@@ -73,14 +73,13 @@ zarr.config.set({
73
73
  "codec_pipeline": {
74
74
  "path": "zarrs.ZarrsCodecPipeline",
75
75
  "validate_checksums": True,
76
- "store_empty_chunks": False,
77
76
  "chunk_concurrent_maximum": None,
78
77
  "chunk_concurrent_minimum": 4,
79
78
  }
80
79
  })
81
80
  ```
82
81
 
83
- If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
82
+ If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
84
83
 
85
84
  ## Concurrency
86
85
 
@@ -0,0 +1,10 @@
1
+ zarrs-0.2.0.dist-info/METADATA,sha256=XxZ7DwO-fwgs0UKqvISPYRv2C58rtWNZTFnUlQ4GN0M,7626
2
+ zarrs-0.2.0.dist-info/WHEEL,sha256=vlVK9XRfjbZsffx4VpMMyFV477nGomrs3TkCmK8HoZM,108
3
+ zarrs-0.2.0.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
4
+ zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
5
+ zarrs/pipeline.py,sha256=9cog6mm1BMwxgRsKyO67mn_DyRdqsDrxykrTpyMyclo,9099
6
+ zarrs/_internal.pyi,sha256=4WAVy2Upg3y_DX0j8AvdAr0_FAMeReDkS57XFc3Y9xE,1448
7
+ zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ zarrs/utils.py,sha256=My01lFsg9ZFxkMajbTKHz7audaSxHJW6CviDjqo2tDs,7030
9
+ zarrs/_internal.abi3.so,sha256=J5mngjmu3CE_IywczCJY9MM0NYh3VLXyOqlXN8As41o,9929656
10
+ zarrs-0.2.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- zarrs-0.1.5.dist-info/METADATA,sha256=MraaZvJnr2BaC4xLsvHjNIWkaE_KMYsHfDRJ5CJ4nsY,7693
2
- zarrs-0.1.5.dist-info/WHEEL,sha256=vlVK9XRfjbZsffx4VpMMyFV477nGomrs3TkCmK8HoZM,108
3
- zarrs-0.1.5.dist-info/licenses/LICENSE,sha256=vwIsJjEfVFehyyqcb7B3dAXAniaFMmk8u7IoiJAfBJ4,1099
4
- zarrs/__init__.py,sha256=4oWtWDZO8r7z4Uh7Fy_brmkxXDpULQdgjlA0iFw98eA,573
5
- zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- zarrs/pipeline.py,sha256=GV6Z1xoMdLQpnp4iX4nCuIYZTK1bPDG9nO0-t23F0_I,8708
7
- zarrs/_internal.pyi,sha256=H2afxhX5LDKOSR4bOBoH3mXoVqdM24Rc-xdjqMiOE5Y,1421
8
- zarrs/utils.py,sha256=uKIDi2EKRJRFdlyCElEvZrooVXkbmO4ZoE5fTHFNHGo,6763
9
- zarrs/_internal.abi3.so,sha256=cYjWNpNHDGGY71MVDv6Z7oE-3k2PrhLUHR1dtwByln4,9704088
10
- zarrs-0.1.5.dist-info/RECORD,,
File without changes