zarrs 0.1.0__cp311-abi3-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zarrs might be problematic. Click here for more details.
- zarrs/__init__.py +20 -0
- zarrs/_internal.pyd +0 -0
- zarrs/_internal.pyi +47 -0
- zarrs/pipeline.py +155 -0
- zarrs/py.typed +0 -0
- zarrs/utils.py +185 -0
- zarrs-0.1.0.dist-info/METADATA +159 -0
- zarrs-0.1.0.dist-info/RECORD +10 -0
- zarrs-0.1.0.dist-info/WHEEL +4 -0
- zarrs-0.1.0.dist-info/licenses/LICENSE +21 -0
zarrs/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from zarr.registry import register_pipeline
|
|
2
|
+
|
|
3
|
+
from ._internal import __version__
|
|
4
|
+
from .pipeline import ZarrsCodecPipeline as _ZarrsCodecPipeline
|
|
5
|
+
from .utils import CollapsedDimensionError, DiscontiguousArrayError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Need to do this redirection so people can access the pipeline as `zarrs.ZarrsCodecPipeline` instead of `zarrs.pipeline.ZarrsCodecPipeline`
|
|
9
|
+
class ZarrsCodecPipeline(_ZarrsCodecPipeline):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
register_pipeline(ZarrsCodecPipeline)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ZarrsCodecPipeline",
|
|
17
|
+
"DiscontiguousArrayError",
|
|
18
|
+
"CollapsedDimensionError",
|
|
19
|
+
"__version__",
|
|
20
|
+
]
|
zarrs/_internal.pyd
ADDED
|
Binary file
|
zarrs/_internal.pyi
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# This file is automatically generated by pyo3_stub_gen
|
|
2
|
+
# ruff: noqa: E501, F401
|
|
3
|
+
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
import numpy
|
|
7
|
+
import numpy.typing
|
|
8
|
+
|
|
9
|
+
class CodecPipelineImpl:
|
|
10
|
+
def __new__(
|
|
11
|
+
cls,
|
|
12
|
+
metadata,
|
|
13
|
+
*,
|
|
14
|
+
validate_checksums=...,
|
|
15
|
+
store_empty_chunks=...,
|
|
16
|
+
chunk_concurrent_minimum=...,
|
|
17
|
+
chunk_concurrent_maximum=...,
|
|
18
|
+
num_threads=...,
|
|
19
|
+
): ...
|
|
20
|
+
def retrieve_chunks_and_apply_index(
|
|
21
|
+
self,
|
|
22
|
+
chunk_descriptions: typing.Sequence[
|
|
23
|
+
tuple[
|
|
24
|
+
tuple[str, typing.Sequence[int], str, typing.Sequence[int]],
|
|
25
|
+
typing.Sequence[slice],
|
|
26
|
+
typing.Sequence[slice],
|
|
27
|
+
]
|
|
28
|
+
],
|
|
29
|
+
value: numpy.NDArray[typing.Any],
|
|
30
|
+
) -> None: ...
|
|
31
|
+
def retrieve_chunks(
|
|
32
|
+
self,
|
|
33
|
+
chunk_descriptions: typing.Sequence[
|
|
34
|
+
tuple[str, typing.Sequence[int], str, typing.Sequence[int]]
|
|
35
|
+
],
|
|
36
|
+
) -> list[numpy.typing.NDArray[numpy.uint8]]: ...
|
|
37
|
+
def store_chunks_with_indices(
|
|
38
|
+
self,
|
|
39
|
+
chunk_descriptions: typing.Sequence[
|
|
40
|
+
tuple[
|
|
41
|
+
tuple[str, typing.Sequence[int], str, typing.Sequence[int]],
|
|
42
|
+
typing.Sequence[slice],
|
|
43
|
+
typing.Sequence[slice],
|
|
44
|
+
]
|
|
45
|
+
],
|
|
46
|
+
value: numpy.NDArray[typing.Any],
|
|
47
|
+
) -> None: ...
|
zarrs/pipeline.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from zarr.abc.codec import (
|
|
10
|
+
Codec,
|
|
11
|
+
CodecPipeline,
|
|
12
|
+
)
|
|
13
|
+
from zarr.core.config import config
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Iterable, Iterator
|
|
17
|
+
from typing import Self
|
|
18
|
+
|
|
19
|
+
from zarr.abc.store import ByteGetter, ByteSetter
|
|
20
|
+
from zarr.core.array_spec import ArraySpec
|
|
21
|
+
from zarr.core.buffer import Buffer, NDBuffer
|
|
22
|
+
from zarr.core.chunk_grids import ChunkGrid
|
|
23
|
+
from zarr.core.common import ChunkCoords
|
|
24
|
+
from zarr.core.indexing import SelectorTuple
|
|
25
|
+
|
|
26
|
+
from ._internal import CodecPipelineImpl
|
|
27
|
+
from .utils import (
|
|
28
|
+
CollapsedDimensionError,
|
|
29
|
+
DiscontiguousArrayError,
|
|
30
|
+
make_chunk_info_for_rust,
|
|
31
|
+
make_chunk_info_for_rust_with_indices,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class ZarrsCodecPipeline(CodecPipeline):
|
|
37
|
+
codecs: tuple[Codec, ...]
|
|
38
|
+
impl: CodecPipelineImpl
|
|
39
|
+
|
|
40
|
+
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
|
|
41
|
+
raise NotImplementedError("evolve_from_array_spec")
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
|
|
45
|
+
codec_metadata = [codec.to_dict() for codec in codecs]
|
|
46
|
+
codec_metadata_json = json.dumps(codec_metadata)
|
|
47
|
+
# TODO: upstream zarr-python has not settled on how to deal with configs yet
|
|
48
|
+
# Should they be checked when an array is created, or when an operation is performed?
|
|
49
|
+
# https://github.com/zarr-developers/zarr-python/issues/2409
|
|
50
|
+
# https://github.com/zarr-developers/zarr-python/pull/2429
|
|
51
|
+
return cls(
|
|
52
|
+
codecs=tuple(codecs),
|
|
53
|
+
impl=CodecPipelineImpl(
|
|
54
|
+
codec_metadata_json,
|
|
55
|
+
validate_checksums=config.get(
|
|
56
|
+
"codec_pipeline.validate_checksums", None
|
|
57
|
+
),
|
|
58
|
+
# TODO: upstream zarr-python array.write_empty_chunks is not merged yet #2429
|
|
59
|
+
store_empty_chunks=config.get("array.write_empty_chunks", None),
|
|
60
|
+
chunk_concurrent_minimum=config.get(
|
|
61
|
+
"codec_pipeline.chunk_concurrent_minimum", None
|
|
62
|
+
),
|
|
63
|
+
chunk_concurrent_maximum=config.get(
|
|
64
|
+
"codec_pipeline.chunk_concurrent_maximum", None
|
|
65
|
+
),
|
|
66
|
+
num_threads=config.get("threading.max_workers", None),
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def supports_partial_decode(self) -> bool:
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def supports_partial_encode(self) -> bool:
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
def __iter__(self) -> Iterator[Codec]:
|
|
79
|
+
yield from self.codecs
|
|
80
|
+
|
|
81
|
+
def validate(
|
|
82
|
+
self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
|
|
83
|
+
) -> None:
|
|
84
|
+
raise NotImplementedError("validate")
|
|
85
|
+
|
|
86
|
+
def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int:
|
|
87
|
+
raise NotImplementedError("compute_encoded_size")
|
|
88
|
+
|
|
89
|
+
async def decode(
|
|
90
|
+
self,
|
|
91
|
+
chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]],
|
|
92
|
+
) -> Iterable[NDBuffer | None]:
|
|
93
|
+
raise NotImplementedError("decode")
|
|
94
|
+
|
|
95
|
+
async def encode(
|
|
96
|
+
self,
|
|
97
|
+
chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]],
|
|
98
|
+
) -> Iterable[Buffer | None]:
|
|
99
|
+
raise NotImplementedError("encode")
|
|
100
|
+
|
|
101
|
+
async def read(
|
|
102
|
+
self,
|
|
103
|
+
batch_info: Iterable[
|
|
104
|
+
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
105
|
+
],
|
|
106
|
+
out: NDBuffer,
|
|
107
|
+
drop_axes: tuple[int, ...] = (), # FIXME: unused
|
|
108
|
+
) -> None:
|
|
109
|
+
out = out.as_ndarray_like() # FIXME: Error if array is not in host memory
|
|
110
|
+
if not out.dtype.isnative:
|
|
111
|
+
raise RuntimeError("Non-native byte order not supported")
|
|
112
|
+
try:
|
|
113
|
+
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
|
|
114
|
+
index_in_rust = True
|
|
115
|
+
except (DiscontiguousArrayError, CollapsedDimensionError):
|
|
116
|
+
chunks_desc = make_chunk_info_for_rust(batch_info)
|
|
117
|
+
index_in_rust = False
|
|
118
|
+
if index_in_rust:
|
|
119
|
+
await asyncio.to_thread(
|
|
120
|
+
self.impl.retrieve_chunks_and_apply_index,
|
|
121
|
+
chunks_desc,
|
|
122
|
+
out,
|
|
123
|
+
)
|
|
124
|
+
return None
|
|
125
|
+
chunks = await asyncio.to_thread(self.impl.retrieve_chunks, chunks_desc)
|
|
126
|
+
for chunk, chunk_info in zip(chunks, batch_info):
|
|
127
|
+
out_selection = chunk_info[3]
|
|
128
|
+
selection = chunk_info[2]
|
|
129
|
+
spec = chunk_info[1]
|
|
130
|
+
chunk_reshaped = chunk.view(spec.dtype).reshape(spec.shape)
|
|
131
|
+
chunk_selected = chunk_reshaped[selection]
|
|
132
|
+
if drop_axes:
|
|
133
|
+
chunk_selected = np.squeeze(chunk_selected, axis=drop_axes)
|
|
134
|
+
out[out_selection] = chunk_selected
|
|
135
|
+
|
|
136
|
+
async def write(
|
|
137
|
+
self,
|
|
138
|
+
batch_info: Iterable[
|
|
139
|
+
tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
140
|
+
],
|
|
141
|
+
value: NDBuffer,
|
|
142
|
+
drop_axes: tuple[int, ...] = (),
|
|
143
|
+
) -> None:
|
|
144
|
+
value = value.as_ndarray_like() # FIXME: Error if array is not in host memory
|
|
145
|
+
if not value.dtype.isnative:
|
|
146
|
+
value = np.ascontiguousarray(value, dtype=value.dtype.newbyteorder("="))
|
|
147
|
+
elif not value.flags.c_contiguous:
|
|
148
|
+
value = np.ascontiguousarray(value)
|
|
149
|
+
chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes)
|
|
150
|
+
await asyncio.to_thread(
|
|
151
|
+
self.impl.store_chunks_with_indices,
|
|
152
|
+
chunks_desc,
|
|
153
|
+
value,
|
|
154
|
+
)
|
|
155
|
+
return None
|
zarrs/py.typed
ADDED
|
File without changes
|
zarrs/utils.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import operator
|
|
4
|
+
import os
|
|
5
|
+
from functools import reduce
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from zarr.core.indexing import SelectorTuple, is_integer
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from collections.abc import Iterable
|
|
13
|
+
from types import EllipsisType
|
|
14
|
+
|
|
15
|
+
from zarr.abc.store import ByteGetter, ByteSetter
|
|
16
|
+
from zarr.core.array_spec import ArraySpec
|
|
17
|
+
from zarr.core.common import ChunkCoords
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
|
|
21
|
+
def get_max_threads() -> int:
|
|
22
|
+
return (os.cpu_count() or 1) + 4
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DiscontiguousArrayError(Exception):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CollapsedDimensionError(Exception):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# This is a (mostly) copy of the function from zarr.core.indexing that fixes:
|
|
34
|
+
# DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated
|
|
35
|
+
# TODO: Upstream this fix
|
|
36
|
+
def make_slice_selection(selection: tuple[np.ndarray | float]) -> list[slice]:
|
|
37
|
+
ls: list[slice] = []
|
|
38
|
+
for dim_selection in selection:
|
|
39
|
+
if is_integer(dim_selection):
|
|
40
|
+
ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1))
|
|
41
|
+
elif isinstance(dim_selection, np.ndarray):
|
|
42
|
+
dim_selection = dim_selection.ravel()
|
|
43
|
+
if len(dim_selection) == 1:
|
|
44
|
+
ls.append(
|
|
45
|
+
slice(int(dim_selection.item()), int(dim_selection.item()) + 1, 1)
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
diff = np.diff(dim_selection)
|
|
49
|
+
if (diff != 1).any() and (diff != 0).any():
|
|
50
|
+
raise DiscontiguousArrayError(diff)
|
|
51
|
+
ls.append(slice(dim_selection[0], dim_selection[-1] + 1, 1))
|
|
52
|
+
else:
|
|
53
|
+
ls.append(dim_selection)
|
|
54
|
+
return ls
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[slice]:
|
|
58
|
+
if isinstance(selector_tuple, slice):
|
|
59
|
+
return [selector_tuple]
|
|
60
|
+
if all(isinstance(s, slice) for s in selector_tuple):
|
|
61
|
+
return list(selector_tuple)
|
|
62
|
+
return make_slice_selection(selector_tuple)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def convert_chunk_to_primitive(
|
|
66
|
+
byte_getter: ByteGetter | ByteSetter, chunk_spec: ArraySpec
|
|
67
|
+
) -> tuple[str, ChunkCoords, str, Any]:
|
|
68
|
+
return (
|
|
69
|
+
str(byte_getter),
|
|
70
|
+
chunk_spec.shape,
|
|
71
|
+
str(chunk_spec.dtype),
|
|
72
|
+
chunk_spec.fill_value.tobytes(),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def resulting_shape_from_index(
|
|
77
|
+
array_shape: tuple[int, ...],
|
|
78
|
+
index_tuple: tuple[int | slice | EllipsisType | np.ndarray],
|
|
79
|
+
drop_axes: tuple[int, ...],
|
|
80
|
+
*,
|
|
81
|
+
pad: bool,
|
|
82
|
+
) -> tuple[int, ...]:
|
|
83
|
+
result_shape = []
|
|
84
|
+
advanced_index_shapes = [
|
|
85
|
+
idx.shape for idx in index_tuple if isinstance(idx, np.ndarray)
|
|
86
|
+
]
|
|
87
|
+
basic_shape_index = 0
|
|
88
|
+
|
|
89
|
+
# Broadcast all advanced indices, if any
|
|
90
|
+
if advanced_index_shapes:
|
|
91
|
+
result_shape += np.broadcast_shapes(*advanced_index_shapes)
|
|
92
|
+
# Consume dimensions from array_shape
|
|
93
|
+
basic_shape_index += len(advanced_index_shapes)
|
|
94
|
+
|
|
95
|
+
# Process each remaining index in index_tuple
|
|
96
|
+
for idx in index_tuple:
|
|
97
|
+
if isinstance(idx, int):
|
|
98
|
+
# Integer index reduces dimension, so skip this dimension in array_shape
|
|
99
|
+
basic_shape_index += 1
|
|
100
|
+
elif isinstance(idx, slice):
|
|
101
|
+
if idx.step is not None and idx.step > 1:
|
|
102
|
+
raise DiscontiguousArrayError(
|
|
103
|
+
"Step size greater than 1 is not supported"
|
|
104
|
+
)
|
|
105
|
+
# Slice keeps dimension, adjust size accordingly
|
|
106
|
+
start, stop, _ = idx.indices(array_shape[basic_shape_index])
|
|
107
|
+
result_shape.append(stop - start)
|
|
108
|
+
basic_shape_index += 1
|
|
109
|
+
elif idx is Ellipsis:
|
|
110
|
+
# Calculate number of dimensions that Ellipsis should fill
|
|
111
|
+
num_to_fill = len(array_shape) - len(index_tuple) + 1
|
|
112
|
+
result_shape += array_shape[
|
|
113
|
+
basic_shape_index : basic_shape_index + num_to_fill
|
|
114
|
+
]
|
|
115
|
+
basic_shape_index += num_to_fill
|
|
116
|
+
elif not isinstance(idx, np.ndarray):
|
|
117
|
+
raise ValueError(f"Invalid index type: {type(idx)}")
|
|
118
|
+
|
|
119
|
+
# Step 4: Append remaining dimensions from array_shape if fewer indices were used
|
|
120
|
+
if basic_shape_index < len(array_shape) and pad:
|
|
121
|
+
result_shape += array_shape[basic_shape_index:]
|
|
122
|
+
|
|
123
|
+
return tuple(size for idx, size in enumerate(result_shape) if idx not in drop_axes)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def prod_op(x: Iterable[int]) -> int:
|
|
127
|
+
return reduce(operator.mul, x, 1)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_shape_for_selector(
|
|
131
|
+
selector_tuple: SelectorTuple,
|
|
132
|
+
shape: tuple[int, ...],
|
|
133
|
+
*,
|
|
134
|
+
pad: bool,
|
|
135
|
+
drop_axes: tuple[int, ...] = (),
|
|
136
|
+
) -> tuple[int, ...]:
|
|
137
|
+
if isinstance(selector_tuple, slice | np.ndarray):
|
|
138
|
+
return resulting_shape_from_index(
|
|
139
|
+
shape,
|
|
140
|
+
(selector_tuple,),
|
|
141
|
+
drop_axes,
|
|
142
|
+
pad=pad,
|
|
143
|
+
)
|
|
144
|
+
return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def make_chunk_info_for_rust_with_indices(
|
|
148
|
+
batch_info: Iterable[
|
|
149
|
+
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
150
|
+
],
|
|
151
|
+
drop_axes: tuple[int, ...],
|
|
152
|
+
) -> list[tuple[tuple[str, ChunkCoords, str, Any], list[slice], list[slice]]]:
|
|
153
|
+
chunk_info_with_indices = []
|
|
154
|
+
for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info:
|
|
155
|
+
chunk_info = convert_chunk_to_primitive(byte_getter, chunk_spec)
|
|
156
|
+
out_selection_as_slices = selector_tuple_to_slice_selection(out_selection)
|
|
157
|
+
chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection)
|
|
158
|
+
shape_chunk_selection_slices = get_shape_for_selector(
|
|
159
|
+
tuple(chunk_selection_as_slices),
|
|
160
|
+
chunk_spec.shape,
|
|
161
|
+
pad=True,
|
|
162
|
+
drop_axes=drop_axes,
|
|
163
|
+
)
|
|
164
|
+
shape_chunk_selection = get_shape_for_selector(
|
|
165
|
+
chunk_selection, chunk_spec.shape, pad=True, drop_axes=drop_axes
|
|
166
|
+
)
|
|
167
|
+
if prod_op(shape_chunk_selection) != prod_op(shape_chunk_selection_slices):
|
|
168
|
+
raise CollapsedDimensionError(
|
|
169
|
+
f"{shape_chunk_selection} != {shape_chunk_selection_slices}"
|
|
170
|
+
)
|
|
171
|
+
chunk_info_with_indices.append(
|
|
172
|
+
(chunk_info, out_selection_as_slices, chunk_selection_as_slices)
|
|
173
|
+
)
|
|
174
|
+
return chunk_info_with_indices
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def make_chunk_info_for_rust(
|
|
178
|
+
batch_info: Iterable[
|
|
179
|
+
tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]
|
|
180
|
+
],
|
|
181
|
+
) -> list[tuple[str, ChunkCoords, str, Any]]:
|
|
182
|
+
return list(
|
|
183
|
+
convert_chunk_to_primitive(byte_getter, chunk_spec)
|
|
184
|
+
for (byte_getter, chunk_spec, _, _) in batch_info
|
|
185
|
+
)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: zarrs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Classifier: Programming Language :: Rust
|
|
5
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
7
|
+
Classifier: Typing :: Typed
|
|
8
|
+
Requires-Dist: asciitree
|
|
9
|
+
Requires-Dist: numpy >=1.24
|
|
10
|
+
Requires-Dist: fasteners
|
|
11
|
+
Requires-Dist: numcodecs[msgpack] >=0.10.0
|
|
12
|
+
Requires-Dist: fsspec >2024
|
|
13
|
+
Requires-Dist: crc32c
|
|
14
|
+
Requires-Dist: zstandard
|
|
15
|
+
Requires-Dist: typing-extensions
|
|
16
|
+
Requires-Dist: donfig
|
|
17
|
+
Requires-Dist: pytest
|
|
18
|
+
Requires-Dist: universal-pathlib >=0.2.0
|
|
19
|
+
Requires-Dist: zarr >=3.0.0b2
|
|
20
|
+
Requires-Dist: coverage ; extra == 'test'
|
|
21
|
+
Requires-Dist: pytest ; extra == 'test'
|
|
22
|
+
Requires-Dist: pytest-cov ; extra == 'test'
|
|
23
|
+
Requires-Dist: msgpack ; extra == 'test'
|
|
24
|
+
Requires-Dist: lmdb ; extra == 'test'
|
|
25
|
+
Requires-Dist: s3fs ; extra == 'test'
|
|
26
|
+
Requires-Dist: pytest-asyncio ; extra == 'test'
|
|
27
|
+
Requires-Dist: moto[s3] ; extra == 'test'
|
|
28
|
+
Requires-Dist: flask-cors ; extra == 'test'
|
|
29
|
+
Requires-Dist: flask ; extra == 'test'
|
|
30
|
+
Requires-Dist: requests ; extra == 'test'
|
|
31
|
+
Requires-Dist: mypy ; extra == 'test'
|
|
32
|
+
Requires-Dist: hypothesis ; extra == 'test'
|
|
33
|
+
Requires-Dist: pytest-xdist ; extra == 'test'
|
|
34
|
+
Requires-Dist: maturin ; extra == 'dev'
|
|
35
|
+
Requires-Dist: pip ; extra == 'dev'
|
|
36
|
+
Requires-Dist: pre-commit ; extra == 'dev'
|
|
37
|
+
Requires-Dist: sphinx >=7.4.6 ; extra == 'doc'
|
|
38
|
+
Requires-Dist: myst-parser ; extra == 'doc'
|
|
39
|
+
Provides-Extra: test
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Provides-Extra: doc
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Author: Ilan Gold, Lachlan Deakin, Philipp Angerer
|
|
44
|
+
License: MIT
|
|
45
|
+
Requires-Python: >=3.11
|
|
46
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
47
|
+
|
|
48
|
+
# zarrs-python
|
|
49
|
+
|
|
50
|
+
```{warning}
|
|
51
|
+
⚠️ The version of `zarr-python` we currently depend on is still in pre-release and this
|
|
52
|
+
package is accordingly extremely experimental.
|
|
53
|
+
We cannot guarantee any stability or correctness at the moment, although we have
|
|
54
|
+
tried to do extensive testing and make clear what we think we support and do not.
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
This project serves as a bridge between [`zarrs`](https://docs.rs/zarrs/latest/zarrs/) and [`zarr`](https://zarr.readthedocs.io/en/latest/index.html) via [`PyO3`](https://pyo3.rs/v0.22.3/). The main goal of the project is to speed up i/o.
|
|
58
|
+
|
|
59
|
+
To use the project, simply install our package (which depends on `zarr-python>3.0.0b0`), and run:
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import zarr
|
|
63
|
+
import zarrs
|
|
64
|
+
zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"})
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
You can then use your `zarr` as normal (with some caveats)!
|
|
68
|
+
|
|
69
|
+
## API
|
|
70
|
+
|
|
71
|
+
We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here. We also export two errors, `DiscontiguousArrayError` and `CollapsedDimensionError` that can be thrown in the process of converting to indexers that `zarrs` can understand (see below for more details).
|
|
72
|
+
|
|
73
|
+
### Configuration
|
|
74
|
+
|
|
75
|
+
`ZarrsCodecPipeline` options are exposed through `zarr.config`.
|
|
76
|
+
|
|
77
|
+
Standard `zarr.config` options control some functionality (see the defaults in the [config.py](https://github.com/zarr-developers/zarr-python/blob/main/src/zarr/core/config.py) of `zarr-python`):
|
|
78
|
+
- `threading.max_workers`: the maximum number of threads used internally by the `ZarrsCodecPipeline` on the Rust side.
|
|
79
|
+
- Defaults to the number of threads in the global `rayon` thread pool if set to `None`, which is [typically the number of logical CPUs](https://docs.rs/rayon/latest/rayon/struct.ThreadPoolBuilder.html#method.num_threads).
|
|
80
|
+
- `array.write_empty_chunks`: whether or not to store empty chunks.
|
|
81
|
+
- Defaults to false if `None`. Note that checking for emptiness has some overhead, see [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#store-empty-chunks) for more info.
|
|
82
|
+
- This option name is proposed in [zarr-python #2429](https://github.com/zarr-developers/zarr-python/pull/2429)
|
|
83
|
+
|
|
84
|
+
The `ZarrsCodecPipeline` specific options are:
|
|
85
|
+
- `codec_pipeline.chunk_concurrent_maximum`: the maximum number of chunks stored/retrieved concurrently.
|
|
86
|
+
- Defaults to the number of logical CPUs if `None`. It is constrained by `threading.max_workers` as well.
|
|
87
|
+
- `codec_pipeline.chunk_concurrent_minimum`: the minimum number of chunks retrieved/stored concurrently when balancing chunk/codec concurrency.
|
|
88
|
+
- Defaults to 4 if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#chunk-concurrent-minimum) for more info.
|
|
89
|
+
- `codec_pipeline.validate_checksums`: enable checksum validation (e.g. with the CRC32C codec).
|
|
90
|
+
- Defaults to true if `None`. See [here](https://docs.rs/zarrs/latest/zarrs/config/struct.Config.html#validate-checksums) for more info.
|
|
91
|
+
|
|
92
|
+
For example:
|
|
93
|
+
```python
|
|
94
|
+
zarr.config.set({
|
|
95
|
+
"threading.max_workers": None,
|
|
96
|
+
"array.write_empty_chunks": False,
|
|
97
|
+
"codec_pipeline": {
|
|
98
|
+
"path": "zarrs.ZarrsCodecPipeline",
|
|
99
|
+
"validate_checksums": True,
|
|
100
|
+
"store_empty_chunks": False,
|
|
101
|
+
"chunk_concurrent_maximum": None,
|
|
102
|
+
"chunk_concurrent_minimum": 4,
|
|
103
|
+
}
|
|
104
|
+
})
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Concurrency
|
|
108
|
+
|
|
109
|
+
Concurrency can be classified into two types:
|
|
110
|
+
- chunk (outer) concurrency: the number of chunks retrieved/stored concurrently.
|
|
111
|
+
- This is chosen automatically based on various factors, such as the chunk size and codecs.
|
|
112
|
+
- It is constrained between `codec_pipeline.chunk_concurrent_minimum` and `codec_pipeline.chunk_concurrent_maximum` for operations involving multiple chunks.
|
|
113
|
+
- codec (inner) concurrency: the number of threads encoding/decoding a chunk.
|
|
114
|
+
- This is chosen automatically in combination with the chunk concurrency.
|
|
115
|
+
|
|
116
|
+
The product of the chunk and codec concurrency will approximately match `threading.max_workers`.
|
|
117
|
+
|
|
118
|
+
Chunk concurrency is typically favored because:
|
|
119
|
+
- parallel encoding/decoding can have a high overhead with some codecs, especially with small chunks, and
|
|
120
|
+
- it is advantageous to retrieve/store multiple chunks concurrently, especially with high latency stores.
|
|
121
|
+
|
|
122
|
+
`zarrs-python` will often favor codec concurrency with sharded arrays, as they are well suited to codec concurrency.
|
|
123
|
+
|
|
124
|
+
## Supported Indexing Methods
|
|
125
|
+
|
|
126
|
+
We **do not** officially support the following indexing methods. Some of these methods may error out, others may not:
|
|
127
|
+
|
|
128
|
+
1. Any `oindex` or `vindex` integer `np.ndarray` indexing with dimensionality >=3 i.e.,
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
arr[np.array([...]), :, np.array([...])]
|
|
132
|
+
arr[np.array([...]), np.array([...]), np.array([...])]
|
|
133
|
+
arr[np.array([...]), np.array([...]), np.array([...])] = ...
|
|
134
|
+
arr.oindex[np.array([...]), np.array([...]), np.array([...])] = ...
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
2. Any `vindex` or `oindex` discontinuous integer `np.ndarray` indexing for writes in 2D
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
arr[np.array([0, 5]), :] = ...
|
|
141
|
+
arr.oindex[np.array([0, 5]), :] = ...
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
3. `vindex` writes in 2D where both indexers are integer `np.ndarray` indices i.e.,
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
arr[np.array([...]), np.array([...])] = ...
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
4. Ellipsis indexing. We have tested some, but others fail even with `zarr-python`'s default codec pipeline. Thus for now we advise proceeding with caution here.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
arr[0:10, ..., 0:5]
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Otherwise, we believe that we support your indexing case: slices, ints, and all integer `np.ndarray` indices in 2D for reading, contiguous integer `np.ndarray` indices along one axis for writing etc. Please file an issue if you believe we have more holes in our coverage than we are aware of or you wish to contribute! For example, we have an [issue in zarrs for integer-array indexing](https://github.com/LDeakin/zarrs/issues/52) that would unblock a lot of these issues!
|
|
157
|
+
|
|
158
|
+
That being said, using non-contiguous integer `np.ndarray` indexing for reads may not be as fast as expected given the performance of other supported methods. Until `zarrs` supports integer indexing, only fetching chunks is done in `rust` while indexing then occurs in `python`.
|
|
159
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
zarrs-0.1.0.dist-info/METADATA,sha256=VVcA3Tg_ZiPJbKKRGHcdkh8EBF6FhNccFYCNYSsVaMg,8034
|
|
2
|
+
zarrs-0.1.0.dist-info/WHEEL,sha256=IIOEIeL6JkNdHLaHuqnvZYoUNEEUPYODpDuSBpZNzPA,95
|
|
3
|
+
zarrs-0.1.0.dist-info/licenses/LICENSE,sha256=JRFtUrB6PKdja3SAsuPOyvJ25Z9zGYOQqpcq8vGH4VI,1120
|
|
4
|
+
zarrs/pipeline.py,sha256=U2JgDsUZnqthtlo_tNkXBAsEu8duzNYgdja5xQiz5r8,5731
|
|
5
|
+
zarrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
zarrs/utils.py,sha256=Qk36lsjwXFPPhzTUDvV655bAaXEq_eDSU_4BadnRZfw,6773
|
|
7
|
+
zarrs/_internal.pyi,sha256=2pUSpGTgC8ZWe4P_Yj8tLFiJIaqLk6dBhu_8FLQZ8zk,1377
|
|
8
|
+
zarrs/__init__.py,sha256=sH-kAXq8toCGbdGm1IOxr2JambnwnzqNxwrSjmv5hlk,593
|
|
9
|
+
zarrs/_internal.pyd,sha256=zCX5Z7xqG0BHH8-1lBYLV9mScmB1i_DSMhXVyYCw4Pk,2421760
|
|
10
|
+
zarrs-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Ilan Gold, Lachlan Deakin, Philipp Angerer
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|