zarrs 0.1.5__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of zarrs might be problematic. Click here for more details.

Files changed (50) hide show
  1. {zarrs-0.1.5 → zarrs-0.2.0}/Cargo.lock +15 -15
  2. {zarrs-0.1.5 → zarrs-0.2.0}/Cargo.toml +1 -1
  3. {zarrs-0.1.5 → zarrs-0.2.0}/PKG-INFO +3 -4
  4. {zarrs-0.1.5 → zarrs-0.2.0}/README.md +1 -2
  5. {zarrs-0.1.5 → zarrs-0.2.0}/pyproject.toml +2 -1
  6. {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/_internal.pyi +3 -2
  7. {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/pipeline.py +50 -30
  8. {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/utils.py +13 -4
  9. {zarrs-0.1.5 → zarrs-0.2.0}/src/chunk_item.rs +2 -11
  10. {zarrs-0.1.5 → zarrs-0.2.0}/src/lib.rs +37 -24
  11. {zarrs-0.1.5 → zarrs-0.2.0}/src/store/filesystem.rs +1 -1
  12. {zarrs-0.1.5 → zarrs-0.2.0}/src/store/http.rs +1 -1
  13. {zarrs-0.1.5 → zarrs-0.2.0}/src/store.rs +1 -3
  14. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_codecs.py +1 -82
  15. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_pipeline.py +25 -0
  16. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_sharding.py +0 -30
  17. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_transpose.py +0 -18
  18. zarrs-0.2.0/tests/test_v2.py +328 -0
  19. zarrs-0.2.0/tests/test_vlen.py +85 -0
  20. zarrs-0.1.5/src/store/manager.rs +0 -61
  21. zarrs-0.1.5/tests/test_blosc.py +0 -57
  22. zarrs-0.1.5/tests/test_v2.py +0 -346
  23. zarrs-0.1.5/tests/test_vlen.py +0 -100
  24. {zarrs-0.1.5 → zarrs-0.2.0}/.github/workflows/cd.yml +0 -0
  25. {zarrs-0.1.5 → zarrs-0.2.0}/.github/workflows/ci.yml +0 -0
  26. {zarrs-0.1.5 → zarrs-0.2.0}/.github/workflows/version-cmp.py +0 -0
  27. {zarrs-0.1.5 → zarrs-0.2.0}/.gitignore +0 -0
  28. {zarrs-0.1.5 → zarrs-0.2.0}/.pre-commit-config.yaml +0 -0
  29. {zarrs-0.1.5 → zarrs-0.2.0}/.readthedocs.yml +0 -0
  30. {zarrs-0.1.5 → zarrs-0.2.0}/LICENSE +0 -0
  31. {zarrs-0.1.5 → zarrs-0.2.0}/docs/Makefile +0 -0
  32. {zarrs-0.1.5 → zarrs-0.2.0}/docs/conf.py +0 -0
  33. {zarrs-0.1.5 → zarrs-0.2.0}/docs/contributing.md +0 -0
  34. {zarrs-0.1.5 → zarrs-0.2.0}/docs/index.md +0 -0
  35. {zarrs-0.1.5 → zarrs-0.2.0}/docs/make.bat +0 -0
  36. {zarrs-0.1.5 → zarrs-0.2.0}/hatch.toml +0 -0
  37. {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/__init__.py +0 -0
  38. {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/py.typed +0 -0
  39. {zarrs-0.1.5 → zarrs-0.2.0}/src/bin/stub_gen.rs +0 -0
  40. {zarrs-0.1.5 → zarrs-0.2.0}/src/concurrency.rs +0 -0
  41. {zarrs-0.1.5 → zarrs-0.2.0}/src/metadata_v2.rs +0 -0
  42. {zarrs-0.1.5 → zarrs-0.2.0}/src/runtime.rs +0 -0
  43. {zarrs-0.1.5 → zarrs-0.2.0}/src/tests.rs +0 -0
  44. {zarrs-0.1.5 → zarrs-0.2.0}/src/utils.rs +0 -0
  45. {zarrs-0.1.5 → zarrs-0.2.0}/tests/conftest.py +0 -0
  46. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_endian.py +0 -0
  47. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_gzip.py +0 -0
  48. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_version.py +0 -0
  49. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_zarrs_http.py +0 -0
  50. {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_zstd.py +0 -0
@@ -244,9 +244,9 @@ dependencies = [
244
244
 
245
245
  [[package]]
246
246
  name = "cc"
247
- version = "1.2.28"
247
+ version = "1.2.29"
248
248
  source = "registry+https://github.com/rust-lang/crates.io-index"
249
- checksum = "4ad45f4f74e4e20eaa392913b7b33a7091c87e59628f4dd27888205ad888843c"
249
+ checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362"
250
250
  dependencies = [
251
251
  "jobserver",
252
252
  "libc",
@@ -305,9 +305,9 @@ dependencies = [
305
305
 
306
306
  [[package]]
307
307
  name = "crc32fast"
308
- version = "1.4.2"
308
+ version = "1.5.0"
309
309
  source = "registry+https://github.com/rust-lang/crates.io-index"
310
- checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
310
+ checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
311
311
  dependencies = [
312
312
  "cfg-if",
313
313
  ]
@@ -759,9 +759,9 @@ dependencies = [
759
759
 
760
760
  [[package]]
761
761
  name = "hyper-util"
762
- version = "0.1.14"
762
+ version = "0.1.15"
763
763
  source = "registry+https://github.com/rust-lang/crates.io-index"
764
- checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb"
764
+ checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df"
765
765
  dependencies = [
766
766
  "base64",
767
767
  "bytes",
@@ -1940,9 +1940,9 @@ dependencies = [
1940
1940
 
1941
1941
  [[package]]
1942
1942
  name = "rustls"
1943
- version = "0.23.28"
1943
+ version = "0.23.29"
1944
1944
  source = "registry+https://github.com/rust-lang/crates.io-index"
1945
- checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643"
1945
+ checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1"
1946
1946
  dependencies = [
1947
1947
  "once_cell",
1948
1948
  "ring",
@@ -1964,9 +1964,9 @@ dependencies = [
1964
1964
 
1965
1965
  [[package]]
1966
1966
  name = "rustls-webpki"
1967
- version = "0.103.3"
1967
+ version = "0.103.4"
1968
1968
  source = "registry+https://github.com/rust-lang/crates.io-index"
1969
- checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435"
1969
+ checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
1970
1970
  dependencies = [
1971
1971
  "ring",
1972
1972
  "rustls-pki-types",
@@ -2258,9 +2258,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
2258
2258
 
2259
2259
  [[package]]
2260
2260
  name = "tokio"
2261
- version = "1.46.0"
2261
+ version = "1.46.1"
2262
2262
  source = "registry+https://github.com/rust-lang/crates.io-index"
2263
- checksum = "1140bb80481756a8cbe10541f37433b459c5aa1e727b4c020fbfebdc25bf3ec4"
2263
+ checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
2264
2264
  dependencies = [
2265
2265
  "backtrace",
2266
2266
  "bytes",
@@ -2893,9 +2893,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
2893
2893
 
2894
2894
  [[package]]
2895
2895
  name = "winnow"
2896
- version = "0.7.11"
2896
+ version = "0.7.12"
2897
2897
  source = "registry+https://github.com/rust-lang/crates.io-index"
2898
- checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
2898
+ checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
2899
2899
  dependencies = [
2900
2900
  "memchr",
2901
2901
  ]
@@ -2984,7 +2984,7 @@ dependencies = [
2984
2984
 
2985
2985
  [[package]]
2986
2986
  name = "zarrs-python"
2987
- version = "0.1.5"
2987
+ version = "0.2.0"
2988
2988
  dependencies = [
2989
2989
  "itertools 0.14.0",
2990
2990
  "numpy",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "zarrs-python"
3
- version = "0.1.5"
3
+ version = "0.2.0"
4
4
  edition = "2021"
5
5
  publish = false
6
6
 
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: zarrs
3
- Version: 0.1.5
3
+ Version: 0.2.0
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Classifier: Typing :: Typed
8
8
  Requires-Dist: numpy >=1.24
9
- Requires-Dist: zarr >=3.0.3, <3.1
9
+ Requires-Dist: zarr >=3.1
10
10
  License-File: LICENSE
11
11
  Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
12
12
  License: MIT
@@ -73,14 +73,13 @@ zarr.config.set({
73
73
  "codec_pipeline": {
74
74
  "path": "zarrs.ZarrsCodecPipeline",
75
75
  "validate_checksums": True,
76
- "store_empty_chunks": False,
77
76
  "chunk_concurrent_maximum": None,
78
77
  "chunk_concurrent_minimum": 4,
79
78
  }
80
79
  })
81
80
  ```
82
81
 
83
- If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
82
+ If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
84
83
 
85
84
  ## Concurrency
86
85
 
@@ -58,14 +58,13 @@ zarr.config.set({
58
58
  "codec_pipeline": {
59
59
  "path": "zarrs.ZarrsCodecPipeline",
60
60
  "validate_checksums": True,
61
- "store_empty_chunks": False,
62
61
  "chunk_concurrent_maximum": None,
63
62
  "chunk_concurrent_minimum": 4,
64
63
  }
65
64
  })
66
65
  ```
67
66
 
68
- If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `store_empty_chunks`, `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
67
+ If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
69
68
 
70
69
  ## Concurrency
71
70
 
@@ -20,7 +20,7 @@ classifiers = [
20
20
  dynamic = ["version"]
21
21
  dependencies = [
22
22
  "numpy>=1.24",
23
- "zarr>=3.0.3,<3.1",
23
+ "zarr>=3.1",
24
24
  ]
25
25
 
26
26
  [dependency-groups]
@@ -31,6 +31,7 @@ test = [
31
31
  "pytest",
32
32
  "pytest-asyncio",
33
33
  "pytest-xdist",
34
+ "pytest-mock",
34
35
  ]
35
36
  doc = ["sphinx>=7.4.6", "myst-parser"]
36
37
  dev = [
@@ -13,10 +13,10 @@ class Basic:
13
13
  class CodecPipelineImpl:
14
14
  def __new__(
15
15
  cls,
16
- metadata: builtins.str,
16
+ array_metadata: builtins.str,
17
+ store_config: StoreConfig,
17
18
  *,
18
19
  validate_checksums: builtins.bool | None = None,
19
- store_empty_chunks: builtins.bool | None = None,
20
20
  chunk_concurrent_minimum: builtins.int | None = None,
21
21
  chunk_concurrent_maximum: builtins.int | None = None,
22
22
  num_threads: builtins.int | None = None,
@@ -30,6 +30,7 @@ class CodecPipelineImpl:
30
30
  self,
31
31
  chunk_descriptions: typing.Sequence[WithSubset],
32
32
  value: numpy.typing.NDArray[typing.Any],
33
+ write_empty_chunks: builtins.bool,
33
34
  ) -> None: ...
34
35
 
35
36
  class FilesystemStoreConfig:
@@ -2,25 +2,28 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import json
5
- import re
6
5
  from dataclasses import dataclass
7
6
  from typing import TYPE_CHECKING, TypedDict
7
+ from warnings import warn
8
8
 
9
9
  import numpy as np
10
10
  from zarr.abc.codec import Codec, CodecPipeline
11
+ from zarr.codecs._v2 import V2Codec
11
12
  from zarr.core import BatchedCodecPipeline
12
13
  from zarr.core.config import config
14
+ from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
13
15
 
14
16
  if TYPE_CHECKING:
15
17
  from collections.abc import Generator, Iterable, Iterator
16
18
  from typing import Any, Self
17
19
 
18
- from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.abc.store import ByteGetter, ByteSetter, Store
19
21
  from zarr.core.array_spec import ArraySpec
20
22
  from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
21
23
  from zarr.core.chunk_grids import ChunkGrid
22
24
  from zarr.core.common import ChunkCoords
23
25
  from zarr.core.indexing import SelectorTuple
26
+ from zarr.dtype import ZDType
24
27
 
25
28
  from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
26
29
  from .utils import (
@@ -39,12 +42,15 @@ class UnsupportedMetadataError(Exception):
39
42
  pass
40
43
 
41
44
 
42
- def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | None:
45
+ def get_codec_pipeline_impl(
46
+ metadata: ArrayMetadata, store: Store
47
+ ) -> CodecPipelineImpl | None:
43
48
  try:
49
+ array_metadata_json = json.dumps(metadata.to_dict())
44
50
  return CodecPipelineImpl(
45
- codec_metadata_json,
51
+ array_metadata_json,
52
+ store_config=store,
46
53
  validate_checksums=config.get("codec_pipeline.validate_checksums", None),
47
- store_empty_chunks=config.get("array.write_empty_chunks", None),
48
54
  chunk_concurrent_minimum=config.get(
49
55
  "codec_pipeline.chunk_concurrent_minimum", None
50
56
  ),
@@ -54,10 +60,11 @@ def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | Non
54
60
  num_threads=config.get("threading.max_workers", None),
55
61
  )
56
62
  except TypeError as e:
57
- if re.match(r"codec (delta|zlib) is not supported", str(e)):
58
- return None
59
- else:
60
- raise e
63
+ warn(
64
+ f"Array is unsupported by ZarrsCodecPipeline: {e}",
65
+ category=UserWarning,
66
+ )
67
+ return None
61
68
 
62
69
 
63
70
  def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]:
@@ -88,37 +95,47 @@ class ZarrsCodecPipelineState(TypedDict):
88
95
  codecs: tuple[Codec, ...]
89
96
 
90
97
 
98
+ def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
99
+ if isinstance(metadata, ArrayV3Metadata):
100
+ return metadata.codecs
101
+ elif isinstance(metadata, ArrayV2Metadata):
102
+ v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
103
+ return [v2_codec]
104
+
105
+
91
106
  @dataclass
92
107
  class ZarrsCodecPipeline(CodecPipeline):
93
- codecs: tuple[Codec, ...]
108
+ metadata: ArrayMetadata
109
+ store: Store
94
110
  impl: CodecPipelineImpl | None
95
- codec_metadata_json: str
96
111
  python_impl: BatchedCodecPipeline
97
112
 
98
113
  def __getstate__(self) -> ZarrsCodecPipelineState:
99
- return {"codec_metadata_json": self.codec_metadata_json, "codecs": self.codecs}
114
+ return {"metadata": self.metadata, "store": self.store}
100
115
 
101
116
  def __setstate__(self, state: ZarrsCodecPipelineState):
102
- self.codecs = state["codecs"]
103
- self.codec_metadata_json = state["codec_metadata_json"]
104
- self.impl = get_codec_pipeline_impl(self.codec_metadata_json)
105
- self.python_impl = BatchedCodecPipeline.from_codecs(self.codecs)
117
+ self.metadata = state["metadata"]
118
+ self.store = state["store"]
119
+ self.impl = get_codec_pipeline_impl(self.metadata, self.store)
120
+ codecs = array_metadata_to_codecs(self.metadata)
121
+ self.python_impl = BatchedCodecPipeline.from_codecs(codecs)
106
122
 
107
123
  def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
108
- raise NotImplementedError("evolve_from_array_spec")
124
+ return self
109
125
 
110
126
  @classmethod
111
127
  def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
112
- codec_metadata = list(codecs_to_dict(codecs))
113
- codec_metadata_json = json.dumps(codec_metadata)
114
- # TODO: upstream zarr-python has not settled on how to deal with configs yet
115
- # Should they be checked when an array is created, or when an operation is performed?
116
- # https://github.com/zarr-developers/zarr-python/issues/2409
117
- # https://github.com/zarr-developers/zarr-python/pull/2429#issuecomment-2566976567
128
+ return BatchedCodecPipeline.from_codecs(codecs)
129
+
130
+ @classmethod
131
+ def from_array_metadata_and_store(
132
+ cls, array_metadata: ArrayMetadata, store: Store
133
+ ) -> Self:
134
+ codecs = array_metadata_to_codecs(array_metadata)
118
135
  return cls(
119
- codec_metadata_json=codec_metadata_json,
120
- codecs=tuple(codecs),
121
- impl=get_codec_pipeline_impl(codec_metadata_json),
136
+ metadata=array_metadata,
137
+ store=store,
138
+ impl=get_codec_pipeline_impl(array_metadata, store),
122
139
  python_impl=BatchedCodecPipeline.from_codecs(codecs),
123
140
  )
124
141
 
@@ -134,7 +151,7 @@ class ZarrsCodecPipeline(CodecPipeline):
134
151
  yield from self.codecs
135
152
 
136
153
  def validate(
137
- self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
154
+ self, *, shape: ChunkCoords, dtype: ZDType, chunk_grid: ChunkGrid
138
155
  ) -> None:
139
156
  raise NotImplementedError("validate")
140
157
 
@@ -184,7 +201,7 @@ class ZarrsCodecPipeline(CodecPipeline):
184
201
  out: NDArrayLike = out.as_ndarray_like()
185
202
  await asyncio.to_thread(
186
203
  self.impl.retrieve_chunks_and_apply_index,
187
- chunks_desc,
204
+ chunks_desc.chunk_info_with_indices,
188
205
  out,
189
206
  )
190
207
  return None
@@ -223,7 +240,10 @@ class ZarrsCodecPipeline(CodecPipeline):
223
240
  elif not value_np.flags.c_contiguous:
224
241
  value_np = np.ascontiguousarray(value_np)
225
242
  await asyncio.to_thread(
226
- self.impl.store_chunks_with_indices, chunks_desc, value_np
243
+ self.impl.store_chunks_with_indices,
244
+ chunks_desc.chunk_info_with_indices,
245
+ value_np,
246
+ chunks_desc.write_empty_chunks,
227
247
  )
228
248
  return None
229
249
 
@@ -236,7 +256,7 @@ class ZarrsCodecPipeline(CodecPipeline):
236
256
  # https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
237
257
  # Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
238
258
  if any(
239
- info.dtype.kind in {"V", "S", "U", "M", "m", "O", "T"}
259
+ info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
240
260
  for (_, info, _, _, _) in batch_info
241
261
  ):
242
262
  raise UnsupportedDataTypeError()
@@ -2,13 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  import operator
4
4
  import os
5
+ from dataclasses import dataclass
5
6
  from functools import reduce
6
7
  from typing import TYPE_CHECKING, Any
7
8
 
8
9
  import numpy as np
9
10
  from zarr.core.array_spec import ArraySpec
10
11
  from zarr.core.indexing import SelectorTuple, is_integer
11
- from zarr.core.metadata.v2 import _default_fill_value
12
12
 
13
13
  from zarrs._internal import Basic, WithSubset
14
14
 
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
  from types import EllipsisType
18
18
 
19
19
  from zarr.abc.store import ByteGetter, ByteSetter
20
+ from zarr.dtype import ZDType
20
21
 
21
22
 
22
23
  # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
@@ -139,12 +140,18 @@ def get_shape_for_selector(
139
140
  return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
140
141
 
141
142
 
142
- def get_implicit_fill_value(dtype: np.dtype, fill_value: Any) -> Any:
143
+ def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
143
144
  if fill_value is None:
144
- fill_value = _default_fill_value(dtype)
145
+ fill_value = dtype.default_scalar()
145
146
  return fill_value
146
147
 
147
148
 
149
+ @dataclass(frozen=True)
150
+ class RustChunkInfo:
151
+ chunk_info_with_indices: list[WithSubset]
152
+ write_empty_chunks: bool
153
+
154
+
148
155
  def make_chunk_info_for_rust_with_indices(
149
156
  batch_info: Iterable[
150
157
  tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
@@ -154,6 +161,7 @@ def make_chunk_info_for_rust_with_indices(
154
161
  ) -> list[WithSubset]:
155
162
  shape = shape if shape else (1,) # constant array
156
163
  chunk_info_with_indices: list[WithSubset] = []
164
+ write_empty_chunks: bool = True
157
165
  for (
158
166
  byte_getter,
159
167
  chunk_spec,
@@ -161,6 +169,7 @@ def make_chunk_info_for_rust_with_indices(
161
169
  out_selection,
162
170
  _,
163
171
  ) in batch_info:
172
+ write_empty_chunks = chunk_spec.config.write_empty_chunks
164
173
  if chunk_spec.fill_value is None:
165
174
  chunk_spec = ArraySpec(
166
175
  chunk_spec.shape,
@@ -193,4 +202,4 @@ def make_chunk_info_for_rust_with_indices(
193
202
  shape=shape,
194
203
  )
195
204
  )
196
- return chunk_info_with_indices
205
+ return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
@@ -14,10 +14,9 @@ use zarrs::{
14
14
  storage::StoreKey,
15
15
  };
16
16
 
17
- use crate::{store::StoreConfig, utils::PyErrExt};
17
+ use crate::utils::PyErrExt;
18
18
 
19
19
  pub(crate) trait ChunksItem {
20
- fn store_config(&self) -> StoreConfig;
21
20
  fn key(&self) -> &StoreKey;
22
21
  fn representation(&self) -> &ChunkRepresentation;
23
22
  }
@@ -26,7 +25,6 @@ pub(crate) trait ChunksItem {
26
25
  #[gen_stub_pyclass]
27
26
  #[pyclass]
28
27
  pub(crate) struct Basic {
29
- store: StoreConfig,
30
28
  key: StoreKey,
31
29
  representation: ChunkRepresentation,
32
30
  }
@@ -62,12 +60,12 @@ fn fill_value_to_bytes(dtype: &str, fill_value: &Bound<'_, PyAny>) -> PyResult<V
62
60
  impl Basic {
63
61
  #[new]
64
62
  fn new(byte_interface: &Bound<'_, PyAny>, chunk_spec: &Bound<'_, PyAny>) -> PyResult<Self> {
65
- let store: StoreConfig = byte_interface.getattr("store")?.extract()?;
66
63
  let path: String = byte_interface.getattr("path")?.extract()?;
67
64
 
68
65
  let chunk_shape = chunk_spec.getattr("shape")?.extract()?;
69
66
  let mut dtype: String = chunk_spec
70
67
  .getattr("dtype")?
68
+ .call_method0("to_native_dtype")?
71
69
  .call_method0("__str__")?
72
70
  .extract()?;
73
71
  if dtype == "object" {
@@ -78,7 +76,6 @@ impl Basic {
78
76
  let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?;
79
77
  let fill_value_bytes = fill_value_to_bytes(&dtype, &fill_value)?;
80
78
  Ok(Self {
81
- store,
82
79
  key: StoreKey::new(path).map_py_err::<PyValueError>()?,
83
80
  representation: get_chunk_representation(chunk_shape, &dtype, fill_value_bytes)?,
84
81
  })
@@ -117,9 +114,6 @@ impl WithSubset {
117
114
  }
118
115
 
119
116
  impl ChunksItem for Basic {
120
- fn store_config(&self) -> StoreConfig {
121
- self.store.clone()
122
- }
123
117
  fn key(&self) -> &StoreKey {
124
118
  &self.key
125
119
  }
@@ -129,9 +123,6 @@ impl ChunksItem for Basic {
129
123
  }
130
124
 
131
125
  impl ChunksItem for WithSubset {
132
- fn store_config(&self) -> StoreConfig {
133
- self.item.store.clone()
134
- }
135
126
  fn key(&self) -> &StoreKey {
136
127
  &self.item.key
137
128
  }
@@ -20,14 +20,15 @@ use unsafe_cell_slice::UnsafeCellSlice;
20
20
  use utils::is_whole_chunk;
21
21
  use zarrs::array::codec::{
22
22
  ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder,
23
+ StoragePartialDecoder,
23
24
  };
24
25
  use zarrs::array::{
25
- copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesFixedDisjointView, ArraySize,
26
- CodecChain, FillValue,
26
+ copy_fill_value_into, update_array_bytes, Array, ArrayBytes, ArrayBytesFixedDisjointView,
27
+ ArrayMetadata, ArraySize, CodecChain, FillValue,
27
28
  };
28
29
  use zarrs::array_subset::ArraySubset;
29
- use zarrs::metadata::v3::MetadataV3;
30
- use zarrs::storage::StoreKey;
30
+ use zarrs::storage::store::MemoryStore;
31
+ use zarrs::storage::{ReadableWritableListableStorage, StorageHandle, StoreKey};
31
32
 
32
33
  mod chunk_item;
33
34
  mod concurrency;
@@ -41,14 +42,14 @@ mod utils;
41
42
  use crate::chunk_item::ChunksItem;
42
43
  use crate::concurrency::ChunkConcurrentLimitAndCodecOptions;
43
44
  use crate::metadata_v2::codec_metadata_v2_to_v3;
44
- use crate::store::StoreManager;
45
+ use crate::store::StoreConfig;
45
46
  use crate::utils::{PyErrExt as _, PyUntypedArrayExt as _};
46
47
 
47
48
  // TODO: Use a OnceLock for store with get_or_try_init when stabilised?
48
49
  #[gen_stub_pyclass]
49
50
  #[pyclass]
50
51
  pub struct CodecPipelineImpl {
51
- pub(crate) stores: StoreManager,
52
+ pub(crate) store: ReadableWritableListableStorage,
52
53
  pub(crate) codec_chain: Arc<CodecChain>,
53
54
  pub(crate) codec_options: CodecOptions,
54
55
  pub(crate) chunk_concurrent_minimum: usize,
@@ -63,7 +64,7 @@ impl CodecPipelineImpl {
63
64
  codec_chain: &CodecChain,
64
65
  codec_options: &CodecOptions,
65
66
  ) -> PyResult<ArrayBytes<'a>> {
66
- let value_encoded = self.stores.get(item)?;
67
+ let value_encoded = self.store.get(item.key()).map_py_err::<PyRuntimeError>()?;
67
68
  let value_decoded = if let Some(value_encoded) = value_encoded {
68
69
  let value_encoded: Vec<u8> = value_encoded.into(); // zero-copy in this case
69
70
  codec_chain
@@ -94,7 +95,7 @@ impl CodecPipelineImpl {
94
95
  .map_py_err::<PyValueError>()?;
95
96
 
96
97
  if value_decoded.is_fill_value(item.representation().fill_value()) {
97
- self.stores.erase(item)
98
+ self.store.erase(item.key()).map_py_err::<PyRuntimeError>()
98
99
  } else {
99
100
  let value_encoded = codec_chain
100
101
  .encode(value_decoded, item.representation(), codec_options)
@@ -102,7 +103,9 @@ impl CodecPipelineImpl {
102
103
  .map_py_err::<PyRuntimeError>()?;
103
104
 
104
105
  // Store the encoded chunk
105
- self.stores.set(item, value_encoded.into())
106
+ self.store
107
+ .set(item.key(), value_encoded.into())
108
+ .map_py_err::<PyRuntimeError>()
106
109
  }
107
110
  }
108
111
 
@@ -204,34 +207,35 @@ impl CodecPipelineImpl {
204
207
  #[pymethods]
205
208
  impl CodecPipelineImpl {
206
209
  #[pyo3(signature = (
207
- metadata,
210
+ array_metadata,
211
+ store_config,
208
212
  *,
209
213
  validate_checksums=None,
210
- store_empty_chunks=None,
211
214
  chunk_concurrent_minimum=None,
212
215
  chunk_concurrent_maximum=None,
213
216
  num_threads=None,
214
217
  ))]
215
218
  #[new]
216
219
  fn new(
217
- metadata: &str,
220
+ array_metadata: &str,
221
+ store_config: StoreConfig,
218
222
  validate_checksums: Option<bool>,
219
- store_empty_chunks: Option<bool>,
220
223
  chunk_concurrent_minimum: Option<usize>,
221
224
  chunk_concurrent_maximum: Option<usize>,
222
225
  num_threads: Option<usize>,
223
226
  ) -> PyResult<Self> {
224
- let metadata: Vec<MetadataV3> =
225
- serde_json::from_str(metadata).map_py_err::<PyTypeError>()?;
226
- let codec_chain =
227
- Arc::new(CodecChain::from_metadata(&metadata).map_py_err::<PyTypeError>()?);
227
+ let metadata: ArrayMetadata =
228
+ serde_json::from_str(array_metadata).map_py_err::<PyTypeError>()?;
229
+
230
+ // TODO: Add a direct metadata -> codec chain method to zarrs
231
+ let store = Arc::new(MemoryStore::new());
232
+ let array = Array::new_with_metadata(store, "/", metadata).map_py_err::<PyTypeError>()?;
233
+ let codec_chain = Arc::new(array.codecs().clone());
234
+
228
235
  let mut codec_options = CodecOptionsBuilder::new();
229
236
  if let Some(validate_checksums) = validate_checksums {
230
237
  codec_options = codec_options.validate_checksums(validate_checksums);
231
238
  }
232
- if let Some(store_empty_chunks) = store_empty_chunks {
233
- codec_options = codec_options.store_empty_chunks(store_empty_chunks);
234
- }
235
239
  let codec_options = codec_options.build();
236
240
 
237
241
  let chunk_concurrent_minimum = chunk_concurrent_minimum
@@ -240,8 +244,11 @@ impl CodecPipelineImpl {
240
244
  chunk_concurrent_maximum.unwrap_or(rayon::current_num_threads());
241
245
  let num_threads = num_threads.unwrap_or(rayon::current_num_threads());
242
246
 
247
+ let store: ReadableWritableListableStorage =
248
+ (&store_config).try_into().map_py_err::<PyTypeError>()?;
249
+
243
250
  Ok(Self {
244
- stores: StoreManager::default(),
251
+ store,
245
252
  codec_chain,
246
253
  codec_options,
247
254
  chunk_concurrent_minimum,
@@ -281,7 +288,9 @@ impl CodecPipelineImpl {
281
288
  partial_chunk_descriptions,
282
289
  map,
283
290
  |item| {
284
- let input_handle = self.stores.decoder(item)?;
291
+ let storage_handle = Arc::new(StorageHandle::new(self.store.clone()));
292
+ let input_handle =
293
+ StoragePartialDecoder::new(storage_handle, item.key().clone());
285
294
  let partial_decoder = self
286
295
  .codec_chain
287
296
  .clone()
@@ -331,7 +340,9 @@ impl CodecPipelineImpl {
331
340
  && chunk_subset.shape() == item.representation().shape_u64()
332
341
  {
333
342
  // See zarrs::array::Array::retrieve_chunk_into
334
- if let Some(chunk_encoded) = self.stores.get(&item)? {
343
+ if let Some(chunk_encoded) =
344
+ self.store.get(item.key()).map_py_err::<PyRuntimeError>()?
345
+ {
335
346
  // Decode the encoded data into the output buffer
336
347
  let chunk_encoded: Vec<u8> = chunk_encoded.into();
337
348
  self.codec_chain.decode_into(
@@ -378,6 +389,7 @@ impl CodecPipelineImpl {
378
389
  py: Python,
379
390
  chunk_descriptions: Vec<chunk_item::WithSubset>,
380
391
  value: &Bound<'_, PyUntypedArray>,
392
+ write_empty_chunks: bool,
381
393
  ) -> PyResult<()> {
382
394
  enum InputValue<'a> {
383
395
  Array(ArrayBytes<'a>),
@@ -395,11 +407,12 @@ impl CodecPipelineImpl {
395
407
  let input_shape: Vec<u64> = value.shape_zarr()?;
396
408
 
397
409
  // Adjust the concurrency based on the codec chain and the first chunk description
398
- let Some((chunk_concurrent_limit, codec_options)) =
410
+ let Some((chunk_concurrent_limit, mut codec_options)) =
399
411
  chunk_descriptions.get_chunk_concurrent_limit_and_codec_options(self)?
400
412
  else {
401
413
  return Ok(());
402
414
  };
415
+ codec_options.set_store_empty_chunks(write_empty_chunks);
403
416
 
404
417
  py.allow_threads(move || {
405
418
  let store_chunk = |item: chunk_item::WithSubset| match &input {
@@ -6,7 +6,7 @@ use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorag
6
6
 
7
7
  use crate::utils::PyErrExt;
8
8
 
9
- #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
9
+ #[derive(Debug, Clone)]
10
10
  #[gen_stub_pyclass]
11
11
  #[pyclass]
12
12
  pub struct FilesystemStoreConfig {
@@ -6,7 +6,7 @@ use zarrs::storage::ReadableWritableListableStorage;
6
6
 
7
7
  use super::opendal_builder_to_sync_store;
8
8
 
9
- #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
9
+ #[derive(Debug, Clone)]
10
10
  #[gen_stub_pyclass]
11
11
  #[pyclass]
12
12
  pub struct HttpStoreConfig {