zarrs 0.1.5__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zarrs might be problematic. Click here for more details.
- {zarrs-0.1.5 → zarrs-0.2.0}/Cargo.lock +15 -15
- {zarrs-0.1.5 → zarrs-0.2.0}/Cargo.toml +1 -1
- {zarrs-0.1.5 → zarrs-0.2.0}/PKG-INFO +3 -4
- {zarrs-0.1.5 → zarrs-0.2.0}/README.md +1 -2
- {zarrs-0.1.5 → zarrs-0.2.0}/pyproject.toml +2 -1
- {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/_internal.pyi +3 -2
- {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/pipeline.py +50 -30
- {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/utils.py +13 -4
- {zarrs-0.1.5 → zarrs-0.2.0}/src/chunk_item.rs +2 -11
- {zarrs-0.1.5 → zarrs-0.2.0}/src/lib.rs +37 -24
- {zarrs-0.1.5 → zarrs-0.2.0}/src/store/filesystem.rs +1 -1
- {zarrs-0.1.5 → zarrs-0.2.0}/src/store/http.rs +1 -1
- {zarrs-0.1.5 → zarrs-0.2.0}/src/store.rs +1 -3
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_codecs.py +1 -82
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_pipeline.py +25 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_sharding.py +0 -30
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_transpose.py +0 -18
- zarrs-0.2.0/tests/test_v2.py +328 -0
- zarrs-0.2.0/tests/test_vlen.py +85 -0
- zarrs-0.1.5/src/store/manager.rs +0 -61
- zarrs-0.1.5/tests/test_blosc.py +0 -57
- zarrs-0.1.5/tests/test_v2.py +0 -346
- zarrs-0.1.5/tests/test_vlen.py +0 -100
- {zarrs-0.1.5 → zarrs-0.2.0}/.github/workflows/cd.yml +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/.github/workflows/ci.yml +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/.github/workflows/version-cmp.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/.gitignore +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/.pre-commit-config.yaml +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/.readthedocs.yml +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/LICENSE +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/docs/Makefile +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/docs/conf.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/docs/contributing.md +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/docs/index.md +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/docs/make.bat +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/hatch.toml +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/__init__.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/python/zarrs/py.typed +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/src/bin/stub_gen.rs +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/src/concurrency.rs +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/src/metadata_v2.rs +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/src/runtime.rs +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/src/tests.rs +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/src/utils.rs +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/conftest.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_endian.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_gzip.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_version.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_zarrs_http.py +0 -0
- {zarrs-0.1.5 → zarrs-0.2.0}/tests/test_zstd.py +0 -0
|
@@ -244,9 +244,9 @@ dependencies = [
|
|
|
244
244
|
|
|
245
245
|
[[package]]
|
|
246
246
|
name = "cc"
|
|
247
|
-
version = "1.2.
|
|
247
|
+
version = "1.2.29"
|
|
248
248
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
249
|
-
checksum = "
|
|
249
|
+
checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362"
|
|
250
250
|
dependencies = [
|
|
251
251
|
"jobserver",
|
|
252
252
|
"libc",
|
|
@@ -305,9 +305,9 @@ dependencies = [
|
|
|
305
305
|
|
|
306
306
|
[[package]]
|
|
307
307
|
name = "crc32fast"
|
|
308
|
-
version = "1.
|
|
308
|
+
version = "1.5.0"
|
|
309
309
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
310
|
-
checksum = "
|
|
310
|
+
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
|
|
311
311
|
dependencies = [
|
|
312
312
|
"cfg-if",
|
|
313
313
|
]
|
|
@@ -759,9 +759,9 @@ dependencies = [
|
|
|
759
759
|
|
|
760
760
|
[[package]]
|
|
761
761
|
name = "hyper-util"
|
|
762
|
-
version = "0.1.
|
|
762
|
+
version = "0.1.15"
|
|
763
763
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
764
|
-
checksum = "
|
|
764
|
+
checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df"
|
|
765
765
|
dependencies = [
|
|
766
766
|
"base64",
|
|
767
767
|
"bytes",
|
|
@@ -1940,9 +1940,9 @@ dependencies = [
|
|
|
1940
1940
|
|
|
1941
1941
|
[[package]]
|
|
1942
1942
|
name = "rustls"
|
|
1943
|
-
version = "0.23.
|
|
1943
|
+
version = "0.23.29"
|
|
1944
1944
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1945
|
-
checksum = "
|
|
1945
|
+
checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1"
|
|
1946
1946
|
dependencies = [
|
|
1947
1947
|
"once_cell",
|
|
1948
1948
|
"ring",
|
|
@@ -1964,9 +1964,9 @@ dependencies = [
|
|
|
1964
1964
|
|
|
1965
1965
|
[[package]]
|
|
1966
1966
|
name = "rustls-webpki"
|
|
1967
|
-
version = "0.103.
|
|
1967
|
+
version = "0.103.4"
|
|
1968
1968
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1969
|
-
checksum = "
|
|
1969
|
+
checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
|
|
1970
1970
|
dependencies = [
|
|
1971
1971
|
"ring",
|
|
1972
1972
|
"rustls-pki-types",
|
|
@@ -2258,9 +2258,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
|
|
2258
2258
|
|
|
2259
2259
|
[[package]]
|
|
2260
2260
|
name = "tokio"
|
|
2261
|
-
version = "1.46.
|
|
2261
|
+
version = "1.46.1"
|
|
2262
2262
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2263
|
-
checksum = "
|
|
2263
|
+
checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
|
|
2264
2264
|
dependencies = [
|
|
2265
2265
|
"backtrace",
|
|
2266
2266
|
"bytes",
|
|
@@ -2893,9 +2893,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
|
|
2893
2893
|
|
|
2894
2894
|
[[package]]
|
|
2895
2895
|
name = "winnow"
|
|
2896
|
-
version = "0.7.
|
|
2896
|
+
version = "0.7.12"
|
|
2897
2897
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2898
|
-
checksum = "
|
|
2898
|
+
checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
|
|
2899
2899
|
dependencies = [
|
|
2900
2900
|
"memchr",
|
|
2901
2901
|
]
|
|
@@ -2984,7 +2984,7 @@ dependencies = [
|
|
|
2984
2984
|
|
|
2985
2985
|
[[package]]
|
|
2986
2986
|
name = "zarrs-python"
|
|
2987
|
-
version = "0.
|
|
2987
|
+
version = "0.2.0"
|
|
2988
2988
|
dependencies = [
|
|
2989
2989
|
"itertools 0.14.0",
|
|
2990
2990
|
"numpy",
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: zarrs
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
7
|
Classifier: Typing :: Typed
|
|
8
8
|
Requires-Dist: numpy >=1.24
|
|
9
|
-
Requires-Dist: zarr >=3.
|
|
9
|
+
Requires-Dist: zarr >=3.1
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
|
|
12
12
|
License: MIT
|
|
@@ -73,14 +73,13 @@ zarr.config.set({
|
|
|
73
73
|
"codec_pipeline": {
|
|
74
74
|
"path": "zarrs.ZarrsCodecPipeline",
|
|
75
75
|
"validate_checksums": True,
|
|
76
|
-
"store_empty_chunks": False,
|
|
77
76
|
"chunk_concurrent_maximum": None,
|
|
78
77
|
"chunk_concurrent_minimum": 4,
|
|
79
78
|
}
|
|
80
79
|
})
|
|
81
80
|
```
|
|
82
81
|
|
|
83
|
-
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `
|
|
82
|
+
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
|
|
84
83
|
|
|
85
84
|
## Concurrency
|
|
86
85
|
|
|
@@ -58,14 +58,13 @@ zarr.config.set({
|
|
|
58
58
|
"codec_pipeline": {
|
|
59
59
|
"path": "zarrs.ZarrsCodecPipeline",
|
|
60
60
|
"validate_checksums": True,
|
|
61
|
-
"store_empty_chunks": False,
|
|
62
61
|
"chunk_concurrent_maximum": None,
|
|
63
62
|
"chunk_concurrent_minimum": 4,
|
|
64
63
|
}
|
|
65
64
|
})
|
|
66
65
|
```
|
|
67
66
|
|
|
68
|
-
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `
|
|
67
|
+
If the `ZarrsCodecPipeline` is pickled, and then un-pickled, and during that time one of `chunk_concurrent_minimum`, `chunk_concurrent_maximum`, or `num_threads` has changed, the newly un-pickled version will pick up the new value. However, once a `ZarrsCodecPipeline` object has been instantiated, these values are then fixed. This may change in the future as guidance from the `zarr` community becomes clear.
|
|
69
68
|
|
|
70
69
|
## Concurrency
|
|
71
70
|
|
|
@@ -20,7 +20,7 @@ classifiers = [
|
|
|
20
20
|
dynamic = ["version"]
|
|
21
21
|
dependencies = [
|
|
22
22
|
"numpy>=1.24",
|
|
23
|
-
"zarr>=3.
|
|
23
|
+
"zarr>=3.1",
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
[dependency-groups]
|
|
@@ -31,6 +31,7 @@ test = [
|
|
|
31
31
|
"pytest",
|
|
32
32
|
"pytest-asyncio",
|
|
33
33
|
"pytest-xdist",
|
|
34
|
+
"pytest-mock",
|
|
34
35
|
]
|
|
35
36
|
doc = ["sphinx>=7.4.6", "myst-parser"]
|
|
36
37
|
dev = [
|
|
@@ -13,10 +13,10 @@ class Basic:
|
|
|
13
13
|
class CodecPipelineImpl:
|
|
14
14
|
def __new__(
|
|
15
15
|
cls,
|
|
16
|
-
|
|
16
|
+
array_metadata: builtins.str,
|
|
17
|
+
store_config: StoreConfig,
|
|
17
18
|
*,
|
|
18
19
|
validate_checksums: builtins.bool | None = None,
|
|
19
|
-
store_empty_chunks: builtins.bool | None = None,
|
|
20
20
|
chunk_concurrent_minimum: builtins.int | None = None,
|
|
21
21
|
chunk_concurrent_maximum: builtins.int | None = None,
|
|
22
22
|
num_threads: builtins.int | None = None,
|
|
@@ -30,6 +30,7 @@ class CodecPipelineImpl:
|
|
|
30
30
|
self,
|
|
31
31
|
chunk_descriptions: typing.Sequence[WithSubset],
|
|
32
32
|
value: numpy.typing.NDArray[typing.Any],
|
|
33
|
+
write_empty_chunks: builtins.bool,
|
|
33
34
|
) -> None: ...
|
|
34
35
|
|
|
35
36
|
class FilesystemStoreConfig:
|
|
@@ -2,25 +2,28 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
-
import re
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from typing import TYPE_CHECKING, TypedDict
|
|
7
|
+
from warnings import warn
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
from zarr.abc.codec import Codec, CodecPipeline
|
|
11
|
+
from zarr.codecs._v2 import V2Codec
|
|
11
12
|
from zarr.core import BatchedCodecPipeline
|
|
12
13
|
from zarr.core.config import config
|
|
14
|
+
from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata
|
|
13
15
|
|
|
14
16
|
if TYPE_CHECKING:
|
|
15
17
|
from collections.abc import Generator, Iterable, Iterator
|
|
16
18
|
from typing import Any, Self
|
|
17
19
|
|
|
18
|
-
from zarr.abc.store import ByteGetter, ByteSetter
|
|
20
|
+
from zarr.abc.store import ByteGetter, ByteSetter, Store
|
|
19
21
|
from zarr.core.array_spec import ArraySpec
|
|
20
22
|
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
|
|
21
23
|
from zarr.core.chunk_grids import ChunkGrid
|
|
22
24
|
from zarr.core.common import ChunkCoords
|
|
23
25
|
from zarr.core.indexing import SelectorTuple
|
|
26
|
+
from zarr.dtype import ZDType
|
|
24
27
|
|
|
25
28
|
from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
|
|
26
29
|
from .utils import (
|
|
@@ -39,12 +42,15 @@ class UnsupportedMetadataError(Exception):
|
|
|
39
42
|
pass
|
|
40
43
|
|
|
41
44
|
|
|
42
|
-
def get_codec_pipeline_impl(
|
|
45
|
+
def get_codec_pipeline_impl(
|
|
46
|
+
metadata: ArrayMetadata, store: Store
|
|
47
|
+
) -> CodecPipelineImpl | None:
|
|
43
48
|
try:
|
|
49
|
+
array_metadata_json = json.dumps(metadata.to_dict())
|
|
44
50
|
return CodecPipelineImpl(
|
|
45
|
-
|
|
51
|
+
array_metadata_json,
|
|
52
|
+
store_config=store,
|
|
46
53
|
validate_checksums=config.get("codec_pipeline.validate_checksums", None),
|
|
47
|
-
store_empty_chunks=config.get("array.write_empty_chunks", None),
|
|
48
54
|
chunk_concurrent_minimum=config.get(
|
|
49
55
|
"codec_pipeline.chunk_concurrent_minimum", None
|
|
50
56
|
),
|
|
@@ -54,10 +60,11 @@ def get_codec_pipeline_impl(codec_metadata_json: str) -> CodecPipelineImpl | Non
|
|
|
54
60
|
num_threads=config.get("threading.max_workers", None),
|
|
55
61
|
)
|
|
56
62
|
except TypeError as e:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
63
|
+
warn(
|
|
64
|
+
f"Array is unsupported by ZarrsCodecPipeline: {e}",
|
|
65
|
+
category=UserWarning,
|
|
66
|
+
)
|
|
67
|
+
return None
|
|
61
68
|
|
|
62
69
|
|
|
63
70
|
def codecs_to_dict(codecs: Iterable[Codec]) -> Generator[dict[str, Any], None, None]:
|
|
@@ -88,37 +95,47 @@ class ZarrsCodecPipelineState(TypedDict):
|
|
|
88
95
|
codecs: tuple[Codec, ...]
|
|
89
96
|
|
|
90
97
|
|
|
98
|
+
def array_metadata_to_codecs(metadata: ArrayMetadata) -> list[Codec]:
|
|
99
|
+
if isinstance(metadata, ArrayV3Metadata):
|
|
100
|
+
return metadata.codecs
|
|
101
|
+
elif isinstance(metadata, ArrayV2Metadata):
|
|
102
|
+
v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
|
|
103
|
+
return [v2_codec]
|
|
104
|
+
|
|
105
|
+
|
|
91
106
|
@dataclass
|
|
92
107
|
class ZarrsCodecPipeline(CodecPipeline):
|
|
93
|
-
|
|
108
|
+
metadata: ArrayMetadata
|
|
109
|
+
store: Store
|
|
94
110
|
impl: CodecPipelineImpl | None
|
|
95
|
-
codec_metadata_json: str
|
|
96
111
|
python_impl: BatchedCodecPipeline
|
|
97
112
|
|
|
98
113
|
def __getstate__(self) -> ZarrsCodecPipelineState:
|
|
99
|
-
return {"
|
|
114
|
+
return {"metadata": self.metadata, "store": self.store}
|
|
100
115
|
|
|
101
116
|
def __setstate__(self, state: ZarrsCodecPipelineState):
|
|
102
|
-
self.
|
|
103
|
-
self.
|
|
104
|
-
self.impl = get_codec_pipeline_impl(self.
|
|
105
|
-
|
|
117
|
+
self.metadata = state["metadata"]
|
|
118
|
+
self.store = state["store"]
|
|
119
|
+
self.impl = get_codec_pipeline_impl(self.metadata, self.store)
|
|
120
|
+
codecs = array_metadata_to_codecs(self.metadata)
|
|
121
|
+
self.python_impl = BatchedCodecPipeline.from_codecs(codecs)
|
|
106
122
|
|
|
107
123
|
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
|
|
108
|
-
|
|
124
|
+
return self
|
|
109
125
|
|
|
110
126
|
@classmethod
|
|
111
127
|
def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
128
|
+
return BatchedCodecPipeline.from_codecs(codecs)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def from_array_metadata_and_store(
|
|
132
|
+
cls, array_metadata: ArrayMetadata, store: Store
|
|
133
|
+
) -> Self:
|
|
134
|
+
codecs = array_metadata_to_codecs(array_metadata)
|
|
118
135
|
return cls(
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
impl=get_codec_pipeline_impl(
|
|
136
|
+
metadata=array_metadata,
|
|
137
|
+
store=store,
|
|
138
|
+
impl=get_codec_pipeline_impl(array_metadata, store),
|
|
122
139
|
python_impl=BatchedCodecPipeline.from_codecs(codecs),
|
|
123
140
|
)
|
|
124
141
|
|
|
@@ -134,7 +151,7 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
134
151
|
yield from self.codecs
|
|
135
152
|
|
|
136
153
|
def validate(
|
|
137
|
-
self, *, shape: ChunkCoords, dtype:
|
|
154
|
+
self, *, shape: ChunkCoords, dtype: ZDType, chunk_grid: ChunkGrid
|
|
138
155
|
) -> None:
|
|
139
156
|
raise NotImplementedError("validate")
|
|
140
157
|
|
|
@@ -184,7 +201,7 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
184
201
|
out: NDArrayLike = out.as_ndarray_like()
|
|
185
202
|
await asyncio.to_thread(
|
|
186
203
|
self.impl.retrieve_chunks_and_apply_index,
|
|
187
|
-
chunks_desc,
|
|
204
|
+
chunks_desc.chunk_info_with_indices,
|
|
188
205
|
out,
|
|
189
206
|
)
|
|
190
207
|
return None
|
|
@@ -223,7 +240,10 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
223
240
|
elif not value_np.flags.c_contiguous:
|
|
224
241
|
value_np = np.ascontiguousarray(value_np)
|
|
225
242
|
await asyncio.to_thread(
|
|
226
|
-
self.impl.store_chunks_with_indices,
|
|
243
|
+
self.impl.store_chunks_with_indices,
|
|
244
|
+
chunks_desc.chunk_info_with_indices,
|
|
245
|
+
value_np,
|
|
246
|
+
chunks_desc.write_empty_chunks,
|
|
227
247
|
)
|
|
228
248
|
return None
|
|
229
249
|
|
|
@@ -236,7 +256,7 @@ class ZarrsCodecPipeline(CodecPipeline):
|
|
|
236
256
|
# https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
|
|
237
257
|
# Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
|
|
238
258
|
if any(
|
|
239
|
-
info.dtype.kind in {"V", "S", "U", "M", "m", "O", "T"}
|
|
259
|
+
info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
|
|
240
260
|
for (_, info, _, _, _) in batch_info
|
|
241
261
|
):
|
|
242
262
|
raise UnsupportedDataTypeError()
|
|
@@ -2,13 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import operator
|
|
4
4
|
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
5
6
|
from functools import reduce
|
|
6
7
|
from typing import TYPE_CHECKING, Any
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
from zarr.core.array_spec import ArraySpec
|
|
10
11
|
from zarr.core.indexing import SelectorTuple, is_integer
|
|
11
|
-
from zarr.core.metadata.v2 import _default_fill_value
|
|
12
12
|
|
|
13
13
|
from zarrs._internal import Basic, WithSubset
|
|
14
14
|
|
|
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
|
|
|
17
17
|
from types import EllipsisType
|
|
18
18
|
|
|
19
19
|
from zarr.abc.store import ByteGetter, ByteSetter
|
|
20
|
+
from zarr.dtype import ZDType
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
# adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
|
|
@@ -139,12 +140,18 @@ def get_shape_for_selector(
|
|
|
139
140
|
return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
|
|
140
141
|
|
|
141
142
|
|
|
142
|
-
def get_implicit_fill_value(dtype:
|
|
143
|
+
def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
|
|
143
144
|
if fill_value is None:
|
|
144
|
-
fill_value =
|
|
145
|
+
fill_value = dtype.default_scalar()
|
|
145
146
|
return fill_value
|
|
146
147
|
|
|
147
148
|
|
|
149
|
+
@dataclass(frozen=True)
|
|
150
|
+
class RustChunkInfo:
|
|
151
|
+
chunk_info_with_indices: list[WithSubset]
|
|
152
|
+
write_empty_chunks: bool
|
|
153
|
+
|
|
154
|
+
|
|
148
155
|
def make_chunk_info_for_rust_with_indices(
|
|
149
156
|
batch_info: Iterable[
|
|
150
157
|
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]
|
|
@@ -154,6 +161,7 @@ def make_chunk_info_for_rust_with_indices(
|
|
|
154
161
|
) -> list[WithSubset]:
|
|
155
162
|
shape = shape if shape else (1,) # constant array
|
|
156
163
|
chunk_info_with_indices: list[WithSubset] = []
|
|
164
|
+
write_empty_chunks: bool = True
|
|
157
165
|
for (
|
|
158
166
|
byte_getter,
|
|
159
167
|
chunk_spec,
|
|
@@ -161,6 +169,7 @@ def make_chunk_info_for_rust_with_indices(
|
|
|
161
169
|
out_selection,
|
|
162
170
|
_,
|
|
163
171
|
) in batch_info:
|
|
172
|
+
write_empty_chunks = chunk_spec.config.write_empty_chunks
|
|
164
173
|
if chunk_spec.fill_value is None:
|
|
165
174
|
chunk_spec = ArraySpec(
|
|
166
175
|
chunk_spec.shape,
|
|
@@ -193,4 +202,4 @@ def make_chunk_info_for_rust_with_indices(
|
|
|
193
202
|
shape=shape,
|
|
194
203
|
)
|
|
195
204
|
)
|
|
196
|
-
return chunk_info_with_indices
|
|
205
|
+
return RustChunkInfo(chunk_info_with_indices, write_empty_chunks)
|
|
@@ -14,10 +14,9 @@ use zarrs::{
|
|
|
14
14
|
storage::StoreKey,
|
|
15
15
|
};
|
|
16
16
|
|
|
17
|
-
use crate::
|
|
17
|
+
use crate::utils::PyErrExt;
|
|
18
18
|
|
|
19
19
|
pub(crate) trait ChunksItem {
|
|
20
|
-
fn store_config(&self) -> StoreConfig;
|
|
21
20
|
fn key(&self) -> &StoreKey;
|
|
22
21
|
fn representation(&self) -> &ChunkRepresentation;
|
|
23
22
|
}
|
|
@@ -26,7 +25,6 @@ pub(crate) trait ChunksItem {
|
|
|
26
25
|
#[gen_stub_pyclass]
|
|
27
26
|
#[pyclass]
|
|
28
27
|
pub(crate) struct Basic {
|
|
29
|
-
store: StoreConfig,
|
|
30
28
|
key: StoreKey,
|
|
31
29
|
representation: ChunkRepresentation,
|
|
32
30
|
}
|
|
@@ -62,12 +60,12 @@ fn fill_value_to_bytes(dtype: &str, fill_value: &Bound<'_, PyAny>) -> PyResult<V
|
|
|
62
60
|
impl Basic {
|
|
63
61
|
#[new]
|
|
64
62
|
fn new(byte_interface: &Bound<'_, PyAny>, chunk_spec: &Bound<'_, PyAny>) -> PyResult<Self> {
|
|
65
|
-
let store: StoreConfig = byte_interface.getattr("store")?.extract()?;
|
|
66
63
|
let path: String = byte_interface.getattr("path")?.extract()?;
|
|
67
64
|
|
|
68
65
|
let chunk_shape = chunk_spec.getattr("shape")?.extract()?;
|
|
69
66
|
let mut dtype: String = chunk_spec
|
|
70
67
|
.getattr("dtype")?
|
|
68
|
+
.call_method0("to_native_dtype")?
|
|
71
69
|
.call_method0("__str__")?
|
|
72
70
|
.extract()?;
|
|
73
71
|
if dtype == "object" {
|
|
@@ -78,7 +76,6 @@ impl Basic {
|
|
|
78
76
|
let fill_value: Bound<'_, PyAny> = chunk_spec.getattr("fill_value")?;
|
|
79
77
|
let fill_value_bytes = fill_value_to_bytes(&dtype, &fill_value)?;
|
|
80
78
|
Ok(Self {
|
|
81
|
-
store,
|
|
82
79
|
key: StoreKey::new(path).map_py_err::<PyValueError>()?,
|
|
83
80
|
representation: get_chunk_representation(chunk_shape, &dtype, fill_value_bytes)?,
|
|
84
81
|
})
|
|
@@ -117,9 +114,6 @@ impl WithSubset {
|
|
|
117
114
|
}
|
|
118
115
|
|
|
119
116
|
impl ChunksItem for Basic {
|
|
120
|
-
fn store_config(&self) -> StoreConfig {
|
|
121
|
-
self.store.clone()
|
|
122
|
-
}
|
|
123
117
|
fn key(&self) -> &StoreKey {
|
|
124
118
|
&self.key
|
|
125
119
|
}
|
|
@@ -129,9 +123,6 @@ impl ChunksItem for Basic {
|
|
|
129
123
|
}
|
|
130
124
|
|
|
131
125
|
impl ChunksItem for WithSubset {
|
|
132
|
-
fn store_config(&self) -> StoreConfig {
|
|
133
|
-
self.item.store.clone()
|
|
134
|
-
}
|
|
135
126
|
fn key(&self) -> &StoreKey {
|
|
136
127
|
&self.item.key
|
|
137
128
|
}
|
|
@@ -20,14 +20,15 @@ use unsafe_cell_slice::UnsafeCellSlice;
|
|
|
20
20
|
use utils::is_whole_chunk;
|
|
21
21
|
use zarrs::array::codec::{
|
|
22
22
|
ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder,
|
|
23
|
+
StoragePartialDecoder,
|
|
23
24
|
};
|
|
24
25
|
use zarrs::array::{
|
|
25
|
-
copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesFixedDisjointView,
|
|
26
|
-
CodecChain, FillValue,
|
|
26
|
+
copy_fill_value_into, update_array_bytes, Array, ArrayBytes, ArrayBytesFixedDisjointView,
|
|
27
|
+
ArrayMetadata, ArraySize, CodecChain, FillValue,
|
|
27
28
|
};
|
|
28
29
|
use zarrs::array_subset::ArraySubset;
|
|
29
|
-
use zarrs::
|
|
30
|
-
use zarrs::storage::StoreKey;
|
|
30
|
+
use zarrs::storage::store::MemoryStore;
|
|
31
|
+
use zarrs::storage::{ReadableWritableListableStorage, StorageHandle, StoreKey};
|
|
31
32
|
|
|
32
33
|
mod chunk_item;
|
|
33
34
|
mod concurrency;
|
|
@@ -41,14 +42,14 @@ mod utils;
|
|
|
41
42
|
use crate::chunk_item::ChunksItem;
|
|
42
43
|
use crate::concurrency::ChunkConcurrentLimitAndCodecOptions;
|
|
43
44
|
use crate::metadata_v2::codec_metadata_v2_to_v3;
|
|
44
|
-
use crate::store::
|
|
45
|
+
use crate::store::StoreConfig;
|
|
45
46
|
use crate::utils::{PyErrExt as _, PyUntypedArrayExt as _};
|
|
46
47
|
|
|
47
48
|
// TODO: Use a OnceLock for store with get_or_try_init when stabilised?
|
|
48
49
|
#[gen_stub_pyclass]
|
|
49
50
|
#[pyclass]
|
|
50
51
|
pub struct CodecPipelineImpl {
|
|
51
|
-
pub(crate)
|
|
52
|
+
pub(crate) store: ReadableWritableListableStorage,
|
|
52
53
|
pub(crate) codec_chain: Arc<CodecChain>,
|
|
53
54
|
pub(crate) codec_options: CodecOptions,
|
|
54
55
|
pub(crate) chunk_concurrent_minimum: usize,
|
|
@@ -63,7 +64,7 @@ impl CodecPipelineImpl {
|
|
|
63
64
|
codec_chain: &CodecChain,
|
|
64
65
|
codec_options: &CodecOptions,
|
|
65
66
|
) -> PyResult<ArrayBytes<'a>> {
|
|
66
|
-
let value_encoded = self.
|
|
67
|
+
let value_encoded = self.store.get(item.key()).map_py_err::<PyRuntimeError>()?;
|
|
67
68
|
let value_decoded = if let Some(value_encoded) = value_encoded {
|
|
68
69
|
let value_encoded: Vec<u8> = value_encoded.into(); // zero-copy in this case
|
|
69
70
|
codec_chain
|
|
@@ -94,7 +95,7 @@ impl CodecPipelineImpl {
|
|
|
94
95
|
.map_py_err::<PyValueError>()?;
|
|
95
96
|
|
|
96
97
|
if value_decoded.is_fill_value(item.representation().fill_value()) {
|
|
97
|
-
self.
|
|
98
|
+
self.store.erase(item.key()).map_py_err::<PyRuntimeError>()
|
|
98
99
|
} else {
|
|
99
100
|
let value_encoded = codec_chain
|
|
100
101
|
.encode(value_decoded, item.representation(), codec_options)
|
|
@@ -102,7 +103,9 @@ impl CodecPipelineImpl {
|
|
|
102
103
|
.map_py_err::<PyRuntimeError>()?;
|
|
103
104
|
|
|
104
105
|
// Store the encoded chunk
|
|
105
|
-
self.
|
|
106
|
+
self.store
|
|
107
|
+
.set(item.key(), value_encoded.into())
|
|
108
|
+
.map_py_err::<PyRuntimeError>()
|
|
106
109
|
}
|
|
107
110
|
}
|
|
108
111
|
|
|
@@ -204,34 +207,35 @@ impl CodecPipelineImpl {
|
|
|
204
207
|
#[pymethods]
|
|
205
208
|
impl CodecPipelineImpl {
|
|
206
209
|
#[pyo3(signature = (
|
|
207
|
-
|
|
210
|
+
array_metadata,
|
|
211
|
+
store_config,
|
|
208
212
|
*,
|
|
209
213
|
validate_checksums=None,
|
|
210
|
-
store_empty_chunks=None,
|
|
211
214
|
chunk_concurrent_minimum=None,
|
|
212
215
|
chunk_concurrent_maximum=None,
|
|
213
216
|
num_threads=None,
|
|
214
217
|
))]
|
|
215
218
|
#[new]
|
|
216
219
|
fn new(
|
|
217
|
-
|
|
220
|
+
array_metadata: &str,
|
|
221
|
+
store_config: StoreConfig,
|
|
218
222
|
validate_checksums: Option<bool>,
|
|
219
|
-
store_empty_chunks: Option<bool>,
|
|
220
223
|
chunk_concurrent_minimum: Option<usize>,
|
|
221
224
|
chunk_concurrent_maximum: Option<usize>,
|
|
222
225
|
num_threads: Option<usize>,
|
|
223
226
|
) -> PyResult<Self> {
|
|
224
|
-
let metadata:
|
|
225
|
-
serde_json::from_str(
|
|
226
|
-
|
|
227
|
-
|
|
227
|
+
let metadata: ArrayMetadata =
|
|
228
|
+
serde_json::from_str(array_metadata).map_py_err::<PyTypeError>()?;
|
|
229
|
+
|
|
230
|
+
// TODO: Add a direct metadata -> codec chain method to zarrs
|
|
231
|
+
let store = Arc::new(MemoryStore::new());
|
|
232
|
+
let array = Array::new_with_metadata(store, "/", metadata).map_py_err::<PyTypeError>()?;
|
|
233
|
+
let codec_chain = Arc::new(array.codecs().clone());
|
|
234
|
+
|
|
228
235
|
let mut codec_options = CodecOptionsBuilder::new();
|
|
229
236
|
if let Some(validate_checksums) = validate_checksums {
|
|
230
237
|
codec_options = codec_options.validate_checksums(validate_checksums);
|
|
231
238
|
}
|
|
232
|
-
if let Some(store_empty_chunks) = store_empty_chunks {
|
|
233
|
-
codec_options = codec_options.store_empty_chunks(store_empty_chunks);
|
|
234
|
-
}
|
|
235
239
|
let codec_options = codec_options.build();
|
|
236
240
|
|
|
237
241
|
let chunk_concurrent_minimum = chunk_concurrent_minimum
|
|
@@ -240,8 +244,11 @@ impl CodecPipelineImpl {
|
|
|
240
244
|
chunk_concurrent_maximum.unwrap_or(rayon::current_num_threads());
|
|
241
245
|
let num_threads = num_threads.unwrap_or(rayon::current_num_threads());
|
|
242
246
|
|
|
247
|
+
let store: ReadableWritableListableStorage =
|
|
248
|
+
(&store_config).try_into().map_py_err::<PyTypeError>()?;
|
|
249
|
+
|
|
243
250
|
Ok(Self {
|
|
244
|
-
|
|
251
|
+
store,
|
|
245
252
|
codec_chain,
|
|
246
253
|
codec_options,
|
|
247
254
|
chunk_concurrent_minimum,
|
|
@@ -281,7 +288,9 @@ impl CodecPipelineImpl {
|
|
|
281
288
|
partial_chunk_descriptions,
|
|
282
289
|
map,
|
|
283
290
|
|item| {
|
|
284
|
-
let
|
|
291
|
+
let storage_handle = Arc::new(StorageHandle::new(self.store.clone()));
|
|
292
|
+
let input_handle =
|
|
293
|
+
StoragePartialDecoder::new(storage_handle, item.key().clone());
|
|
285
294
|
let partial_decoder = self
|
|
286
295
|
.codec_chain
|
|
287
296
|
.clone()
|
|
@@ -331,7 +340,9 @@ impl CodecPipelineImpl {
|
|
|
331
340
|
&& chunk_subset.shape() == item.representation().shape_u64()
|
|
332
341
|
{
|
|
333
342
|
// See zarrs::array::Array::retrieve_chunk_into
|
|
334
|
-
if let Some(chunk_encoded) =
|
|
343
|
+
if let Some(chunk_encoded) =
|
|
344
|
+
self.store.get(item.key()).map_py_err::<PyRuntimeError>()?
|
|
345
|
+
{
|
|
335
346
|
// Decode the encoded data into the output buffer
|
|
336
347
|
let chunk_encoded: Vec<u8> = chunk_encoded.into();
|
|
337
348
|
self.codec_chain.decode_into(
|
|
@@ -378,6 +389,7 @@ impl CodecPipelineImpl {
|
|
|
378
389
|
py: Python,
|
|
379
390
|
chunk_descriptions: Vec<chunk_item::WithSubset>,
|
|
380
391
|
value: &Bound<'_, PyUntypedArray>,
|
|
392
|
+
write_empty_chunks: bool,
|
|
381
393
|
) -> PyResult<()> {
|
|
382
394
|
enum InputValue<'a> {
|
|
383
395
|
Array(ArrayBytes<'a>),
|
|
@@ -395,11 +407,12 @@ impl CodecPipelineImpl {
|
|
|
395
407
|
let input_shape: Vec<u64> = value.shape_zarr()?;
|
|
396
408
|
|
|
397
409
|
// Adjust the concurrency based on the codec chain and the first chunk description
|
|
398
|
-
let Some((chunk_concurrent_limit, codec_options)) =
|
|
410
|
+
let Some((chunk_concurrent_limit, mut codec_options)) =
|
|
399
411
|
chunk_descriptions.get_chunk_concurrent_limit_and_codec_options(self)?
|
|
400
412
|
else {
|
|
401
413
|
return Ok(());
|
|
402
414
|
};
|
|
415
|
+
codec_options.set_store_empty_chunks(write_empty_chunks);
|
|
403
416
|
|
|
404
417
|
py.allow_threads(move || {
|
|
405
418
|
let store_chunk = |item: chunk_item::WithSubset| match &input {
|
|
@@ -6,7 +6,7 @@ use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorag
|
|
|
6
6
|
|
|
7
7
|
use crate::utils::PyErrExt;
|
|
8
8
|
|
|
9
|
-
#[derive(Debug, Clone
|
|
9
|
+
#[derive(Debug, Clone)]
|
|
10
10
|
#[gen_stub_pyclass]
|
|
11
11
|
#[pyclass]
|
|
12
12
|
pub struct FilesystemStoreConfig {
|