tensogram-zarr 0.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensogram_zarr-0.14.0/.gitignore +26 -0
- tensogram_zarr-0.14.0/PKG-INFO +22 -0
- tensogram_zarr-0.14.0/pyproject.toml +46 -0
- tensogram_zarr-0.14.0/src/tensogram_zarr/__init__.py +34 -0
- tensogram_zarr-0.14.0/src/tensogram_zarr/mapping.py +370 -0
- tensogram_zarr-0.14.0/src/tensogram_zarr/store.py +690 -0
- tensogram_zarr-0.14.0/tests/__init__.py +8 -0
- tensogram_zarr-0.14.0/tests/conftest.py +124 -0
- tensogram_zarr-0.14.0/tests/test_coverage.py +1295 -0
- tensogram_zarr-0.14.0/tests/test_edge_cases.py +478 -0
- tensogram_zarr-0.14.0/tests/test_mapping.py +314 -0
- tensogram_zarr-0.14.0/tests/test_remote.py +240 -0
- tensogram_zarr-0.14.0/tests/test_roundtrip.py +223 -0
- tensogram_zarr-0.14.0/tests/test_store_read.py +217 -0
- tensogram_zarr-0.14.0/tests/test_store_write.py +159 -0
- tensogram_zarr-0.14.0/tests/test_zarr_integration.py +203 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
.claude/*
|
|
2
|
+
!.claude/commands/
|
|
3
|
+
.weave/
|
|
4
|
+
.sisyphus/
|
|
5
|
+
.coverage
|
|
6
|
+
*.dylib.dSYM/
|
|
7
|
+
**/target
|
|
8
|
+
**/pkg
|
|
9
|
+
**/build/
|
|
10
|
+
python/bindings/Cargo.lock
|
|
11
|
+
|
|
12
|
+
/docs/book
|
|
13
|
+
**/.venv
|
|
14
|
+
**/.ruff_cache
|
|
15
|
+
**/__pycache__
|
|
16
|
+
*.so
|
|
17
|
+
*.dylib
|
|
18
|
+
*.pyd
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
*~
|
|
22
|
+
.DS_Store
|
|
23
|
+
.idea/
|
|
24
|
+
rust/tensogram-grib/Cargo.lock
|
|
25
|
+
rust/tensogram-netcdf/Cargo.lock
|
|
26
|
+
rust/tensogram-wasm/Cargo.lock
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tensogram-zarr
|
|
3
|
+
Version: 0.14.0
|
|
4
|
+
Summary: Zarr v3 store backend for tensogram .tgm files
|
|
5
|
+
Project-URL: Homepage, https://sites.ecmwf.int/docs/tensogram/main
|
|
6
|
+
Project-URL: Repository, https://github.com/ecmwf/tensogram
|
|
7
|
+
Project-URL: Documentation, https://sites.ecmwf.int/docs/tensogram/main
|
|
8
|
+
Author-email: ECMWF <software@ecmwf.int>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: tensogram<0.15,>=0.14.0
|
|
18
|
+
Requires-Dist: zarr>=3.0
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tensogram-zarr"
|
|
7
|
+
version = "0.14.0"
|
|
8
|
+
description = "Zarr v3 store backend for tensogram .tgm files"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
authors = [{name = "ECMWF", email = "software@ecmwf.int"}]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"License :: OSI Approved :: Apache Software License",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Topic :: Scientific/Engineering",
|
|
17
|
+
"Topic :: Scientific/Engineering :: Atmospheric Science",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"tensogram>=0.14.0,<0.15",
|
|
21
|
+
"zarr>=3.0",
|
|
22
|
+
"numpy",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://sites.ecmwf.int/docs/tensogram/main"
|
|
27
|
+
Repository = "https://github.com/ecmwf/tensogram"
|
|
28
|
+
Documentation = "https://sites.ecmwf.int/docs/tensogram/main"
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = ["pytest>=7.0", "pytest-asyncio>=0.23", "ruff>=0.4"]
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.wheel]
|
|
34
|
+
packages = ["src/tensogram_zarr"]
|
|
35
|
+
|
|
36
|
+
[tool.ruff]
|
|
37
|
+
line-length = 99
|
|
38
|
+
target-version = "py310"
|
|
39
|
+
|
|
40
|
+
[tool.ruff.lint]
|
|
41
|
+
select = ["E", "W", "F", "I", "N", "UP", "B", "SIM", "PT", "RUF"]
|
|
42
|
+
ignore = ["RUF012", "SIM117"]
|
|
43
|
+
|
|
44
|
+
[tool.pytest.ini_options]
|
|
45
|
+
testpaths = ["tests"]
|
|
46
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# (C) Copyright 2026- ECMWF and individual contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation nor
|
|
7
|
+
# does it submit to any jurisdiction.
|
|
8
|
+
|
|
9
|
+
"""Zarr v3 store backend for tensogram .tgm files.
|
|
10
|
+
|
|
11
|
+
Provides ``TensogramStore`` — a Zarr v3 ``Store`` that reads and writes
|
|
12
|
+
Tensogram wire-format (``.tgm``) files through the standard Zarr API.
|
|
13
|
+
|
|
14
|
+
Usage::
|
|
15
|
+
|
|
16
|
+
import zarr
|
|
17
|
+
from tensogram_zarr import TensogramStore
|
|
18
|
+
|
|
19
|
+
# Read existing .tgm through Zarr
|
|
20
|
+
store = TensogramStore.open_tgm("data.tgm")
|
|
21
|
+
root = zarr.open_group(store=store, mode="r")
|
|
22
|
+
arr = root["temperature"][:]
|
|
23
|
+
|
|
24
|
+
# Write new .tgm through Zarr
|
|
25
|
+
import numpy as np
|
|
26
|
+
store = TensogramStore("output.tgm", mode="w")
|
|
27
|
+
root = zarr.open_group(store=store, mode="w")
|
|
28
|
+
root.create_array("temperature", data=np.random.rand(100, 200).astype(np.float32))
|
|
29
|
+
store.close()
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from tensogram_zarr.store import TensogramStore
|
|
33
|
+
|
|
34
|
+
__all__ = ["TensogramStore"]
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
# (C) Copyright 2026- ECMWF and individual contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation nor
|
|
7
|
+
# does it submit to any jurisdiction.
|
|
8
|
+
|
|
9
|
+
"""Bidirectional mapping between Tensogram and Zarr v3 metadata.
|
|
10
|
+
|
|
11
|
+
Converts TGM dtypes, descriptors, and global metadata into Zarr v3
|
|
12
|
+
``zarr.json`` structures and back.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import math
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Dtype mapping
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
# TGM dtype string → (Zarr v3 data_type, numpy dtype string)
|
|
28
|
+
_TGM_TO_ZARR_DTYPE: dict[str, tuple[str, str]] = {
|
|
29
|
+
"float16": ("float16", "<f2"),
|
|
30
|
+
"bfloat16": ("bfloat16", "<V2"), # no native numpy; raw 2-byte
|
|
31
|
+
"float32": ("float32", "<f4"),
|
|
32
|
+
"float64": ("float64", "<f8"),
|
|
33
|
+
"complex64": ("complex64", "<c8"),
|
|
34
|
+
"complex128": ("complex128", "<c16"),
|
|
35
|
+
"int8": ("int8", "|i1"),
|
|
36
|
+
"int16": ("int16", "<i2"),
|
|
37
|
+
"int32": ("int32", "<i4"),
|
|
38
|
+
"int64": ("int64", "<i8"),
|
|
39
|
+
"uint8": ("uint8", "|u1"),
|
|
40
|
+
"uint16": ("uint16", "<u2"),
|
|
41
|
+
"uint32": ("uint32", "<u4"),
|
|
42
|
+
"uint64": ("uint64", "<u8"),
|
|
43
|
+
"bitmask": ("uint8", "|u1"), # bitmask exposed as uint8
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Zarr v3 data_type → TGM dtype string
|
|
47
|
+
_ZARR_TO_TGM_DTYPE: dict[str, str] = {
|
|
48
|
+
"float16": "float16",
|
|
49
|
+
"bfloat16": "bfloat16",
|
|
50
|
+
"float32": "float32",
|
|
51
|
+
"float64": "float64",
|
|
52
|
+
"complex64": "complex64",
|
|
53
|
+
"complex128": "complex128",
|
|
54
|
+
"int8": "int8",
|
|
55
|
+
"int16": "int16",
|
|
56
|
+
"int32": "int32",
|
|
57
|
+
"int64": "int64",
|
|
58
|
+
"uint8": "uint8",
|
|
59
|
+
"uint16": "uint16",
|
|
60
|
+
"uint32": "uint32",
|
|
61
|
+
"uint64": "uint64",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# numpy dtype → TGM dtype string (used on write path)
|
|
65
|
+
_NP_TO_TGM_DTYPE: dict[np.dtype, str] = {
|
|
66
|
+
np.dtype("float16"): "float16",
|
|
67
|
+
np.dtype("float32"): "float32",
|
|
68
|
+
np.dtype("float64"): "float64",
|
|
69
|
+
np.dtype("complex64"): "complex64",
|
|
70
|
+
np.dtype("complex128"): "complex128",
|
|
71
|
+
np.dtype("int8"): "int8",
|
|
72
|
+
np.dtype("int16"): "int16",
|
|
73
|
+
np.dtype("int32"): "int32",
|
|
74
|
+
np.dtype("int64"): "int64",
|
|
75
|
+
np.dtype("uint8"): "uint8",
|
|
76
|
+
np.dtype("uint16"): "uint16",
|
|
77
|
+
np.dtype("uint32"): "uint32",
|
|
78
|
+
np.dtype("uint64"): "uint64",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def tgm_dtype_to_zarr(tgm_dtype: str) -> str:
|
|
83
|
+
"""Convert a TGM dtype string to a Zarr v3 data_type string."""
|
|
84
|
+
pair = _TGM_TO_ZARR_DTYPE.get(tgm_dtype)
|
|
85
|
+
if pair is None:
|
|
86
|
+
raise ValueError(f"unsupported TGM dtype: {tgm_dtype!r}")
|
|
87
|
+
return pair[0]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def tgm_dtype_to_numpy(tgm_dtype: str) -> np.dtype:
|
|
91
|
+
"""Convert a TGM dtype string to a numpy dtype."""
|
|
92
|
+
pair = _TGM_TO_ZARR_DTYPE.get(tgm_dtype)
|
|
93
|
+
if pair is None:
|
|
94
|
+
raise ValueError(f"unsupported TGM dtype: {tgm_dtype!r}")
|
|
95
|
+
return np.dtype(pair[1])
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def zarr_dtype_to_tgm(zarr_dtype: str) -> str:
|
|
99
|
+
"""Convert a Zarr v3 data_type string to a TGM dtype string."""
|
|
100
|
+
result = _ZARR_TO_TGM_DTYPE.get(zarr_dtype)
|
|
101
|
+
if result is None:
|
|
102
|
+
raise ValueError(f"unsupported Zarr dtype: {zarr_dtype!r}")
|
|
103
|
+
return result
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def numpy_dtype_to_tgm(dtype: np.dtype) -> str:
|
|
107
|
+
"""Convert a numpy dtype to a TGM dtype string."""
|
|
108
|
+
result = _NP_TO_TGM_DTYPE.get(dtype)
|
|
109
|
+
if result is None:
|
|
110
|
+
raise ValueError(f"unsupported numpy dtype: {dtype!r}")
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
# Zarr v3 metadata synthesis (read path: TGM → zarr.json)
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def build_group_zarr_json(
|
|
120
|
+
meta: Any,
|
|
121
|
+
variable_names: list[str],
|
|
122
|
+
) -> dict[str, Any]:
|
|
123
|
+
"""Synthesize a Zarr v3 group ``zarr.json`` from TGM GlobalMetadata.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
meta : tensogram.Metadata
|
|
128
|
+
Decoded TGM global metadata.
|
|
129
|
+
variable_names : list[str]
|
|
130
|
+
Names of the arrays in this group (for informational attributes).
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
dict
|
|
135
|
+
A Zarr v3 group metadata dict ready for JSON serialization.
|
|
136
|
+
"""
|
|
137
|
+
attrs: dict[str, Any] = {}
|
|
138
|
+
|
|
139
|
+
# Merge extra metadata (message-level annotations)
|
|
140
|
+
if hasattr(meta, "extra") and meta.extra:
|
|
141
|
+
attrs.update(meta.extra)
|
|
142
|
+
|
|
143
|
+
attrs["_tensogram_version"] = meta.version
|
|
144
|
+
attrs["_tensogram_variables"] = variable_names
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
"zarr_format": 3,
|
|
148
|
+
"node_type": "group",
|
|
149
|
+
"attributes": attrs,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def build_array_zarr_json(
|
|
154
|
+
desc: Any,
|
|
155
|
+
per_object_meta: dict[str, Any] | None = None,
|
|
156
|
+
) -> dict[str, Any]:
|
|
157
|
+
"""Synthesize a Zarr v3 array ``zarr.json`` from a TGM DataObjectDescriptor.
|
|
158
|
+
|
|
159
|
+
The array is treated as a single chunk (chunk_shape == shape) since each
|
|
160
|
+
TGM data object is a monolithic tensor.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
desc : tensogram.DataObjectDescriptor
|
|
165
|
+
The decoded object descriptor.
|
|
166
|
+
per_object_meta : dict, optional
|
|
167
|
+
Per-object metadata from ``meta.base[i]``.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
dict
|
|
172
|
+
A Zarr v3 array metadata dict ready for JSON serialization.
|
|
173
|
+
"""
|
|
174
|
+
shape = list(desc.shape)
|
|
175
|
+
zarr_dtype = tgm_dtype_to_zarr(desc.dtype)
|
|
176
|
+
|
|
177
|
+
# Single chunk covering the whole array
|
|
178
|
+
chunk_shape = list(shape) if shape else [1]
|
|
179
|
+
|
|
180
|
+
# Build codec chain: just bytes (no Zarr-level compression; data is
|
|
181
|
+
# already encoded/compressed inside TGM)
|
|
182
|
+
codecs = [
|
|
183
|
+
{
|
|
184
|
+
"name": "bytes",
|
|
185
|
+
"configuration": {"endian": "little"},
|
|
186
|
+
},
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
# Attributes from per-object metadata + descriptor params
|
|
190
|
+
attrs: dict[str, Any] = {}
|
|
191
|
+
if per_object_meta:
|
|
192
|
+
attrs.update(per_object_meta)
|
|
193
|
+
if desc.params:
|
|
194
|
+
# Encoding params stored under _tensogram prefix to avoid clashes
|
|
195
|
+
attrs["_tensogram_params"] = dict(desc.params)
|
|
196
|
+
|
|
197
|
+
attrs["_tensogram_encoding"] = desc.encoding
|
|
198
|
+
attrs["_tensogram_filter"] = desc.filter
|
|
199
|
+
attrs["_tensogram_compression"] = desc.compression
|
|
200
|
+
if desc.hash:
|
|
201
|
+
attrs["_tensogram_hash"] = desc.hash
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
"zarr_format": 3,
|
|
205
|
+
"node_type": "array",
|
|
206
|
+
"shape": shape,
|
|
207
|
+
"data_type": zarr_dtype,
|
|
208
|
+
"chunk_grid": {
|
|
209
|
+
"name": "regular",
|
|
210
|
+
"configuration": {"chunk_shape": chunk_shape},
|
|
211
|
+
},
|
|
212
|
+
"chunk_key_encoding": {
|
|
213
|
+
"name": "default",
|
|
214
|
+
"configuration": {"separator": "/"},
|
|
215
|
+
},
|
|
216
|
+
"codecs": codecs,
|
|
217
|
+
"fill_value": _default_fill_value(zarr_dtype),
|
|
218
|
+
"attributes": attrs,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
_FLOAT_LIKE_PREFIXES = ("float", "bfloat", "complex")
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _default_fill_value(zarr_dtype: str) -> Any:
|
|
226
|
+
"""Return a sensible default fill value for a Zarr dtype."""
|
|
227
|
+
if any(zarr_dtype.startswith(p) for p in _FLOAT_LIKE_PREFIXES):
|
|
228
|
+
return float("nan")
|
|
229
|
+
return 0
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
# TGM metadata reconstruction (write path: zarr.json → TGM)
|
|
234
|
+
# ---------------------------------------------------------------------------
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def parse_array_zarr_json(zarr_meta: dict[str, Any]) -> dict[str, Any]:
|
|
238
|
+
"""Extract TGM-relevant fields from a Zarr v3 array ``zarr.json``.
|
|
239
|
+
|
|
240
|
+
Returns a dict with keys: ``shape``, ``dtype``, ``byte_order``,
|
|
241
|
+
``encoding``, ``filter``, ``compression``, ``attrs``.
|
|
242
|
+
"""
|
|
243
|
+
shape = zarr_meta["shape"]
|
|
244
|
+
zarr_dtype = zarr_meta["data_type"]
|
|
245
|
+
tgm_dtype = zarr_dtype_to_tgm(zarr_dtype)
|
|
246
|
+
|
|
247
|
+
# Try to recover byte order from codecs
|
|
248
|
+
byte_order = "little"
|
|
249
|
+
for codec in zarr_meta.get("codecs", []):
|
|
250
|
+
if codec.get("name") == "bytes":
|
|
251
|
+
byte_order = codec.get("configuration", {}).get("endian", "little")
|
|
252
|
+
|
|
253
|
+
# Work on a copy to avoid mutating the caller's dict
|
|
254
|
+
attrs = dict(zarr_meta.get("attributes", {}))
|
|
255
|
+
|
|
256
|
+
encoding = attrs.pop("_tensogram_encoding", "none")
|
|
257
|
+
filt = attrs.pop("_tensogram_filter", "none")
|
|
258
|
+
compression = attrs.pop("_tensogram_compression", "none")
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
"shape": shape,
|
|
262
|
+
"dtype": tgm_dtype,
|
|
263
|
+
"byte_order": byte_order,
|
|
264
|
+
"encoding": encoding,
|
|
265
|
+
"filter": filt,
|
|
266
|
+
"compression": compression,
|
|
267
|
+
"attrs": {k: v for k, v in attrs.items() if not k.startswith("_tensogram_")},
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def serialize_zarr_json(meta: dict[str, Any]) -> bytes:
|
|
272
|
+
"""Serialize a zarr.json dict to UTF-8 JSON bytes.
|
|
273
|
+
|
|
274
|
+
Non-finite float values (NaN, Infinity, -Infinity) are converted to
|
|
275
|
+
their Zarr v3 string sentinels so the output is valid RFC 8259 JSON.
|
|
276
|
+
"""
|
|
277
|
+
safe = _json_safe_metadata(meta)
|
|
278
|
+
return json.dumps(safe, separators=(",", ":"), sort_keys=True, allow_nan=False).encode("utf-8")
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _json_safe_metadata(obj: Any) -> Any:
|
|
282
|
+
"""Recursively replace non-finite floats with Zarr v3 string sentinels.
|
|
283
|
+
|
|
284
|
+
RFC 8259 forbids bare ``NaN`` / ``Infinity`` tokens in JSON. Zarr v3
|
|
285
|
+
uses the string values ``"NaN"``, ``"Infinity"``, ``"-Infinity"`` for
|
|
286
|
+
fill_value and similar fields.
|
|
287
|
+
"""
|
|
288
|
+
if isinstance(obj, float):
|
|
289
|
+
if math.isnan(obj):
|
|
290
|
+
return "NaN"
|
|
291
|
+
if math.isinf(obj):
|
|
292
|
+
return "Infinity" if obj > 0 else "-Infinity"
|
|
293
|
+
return obj
|
|
294
|
+
if isinstance(obj, dict):
|
|
295
|
+
return {k: _json_safe_metadata(v) for k, v in obj.items()}
|
|
296
|
+
if isinstance(obj, (list, tuple)):
|
|
297
|
+
return [_json_safe_metadata(v) for v in obj]
|
|
298
|
+
return obj
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def deserialize_zarr_json(data: bytes) -> dict[str, Any]:
|
|
302
|
+
"""Deserialize UTF-8 JSON bytes to a dict.
|
|
303
|
+
|
|
304
|
+
Raises ``ValueError`` with context if the data is not valid JSON.
|
|
305
|
+
"""
|
|
306
|
+
try:
|
|
307
|
+
return json.loads(data)
|
|
308
|
+
except (json.JSONDecodeError, UnicodeDecodeError) as exc:
|
|
309
|
+
preview = data[:80].hex()
|
|
310
|
+
raise ValueError(
|
|
311
|
+
f"invalid zarr.json content ({len(data)} bytes, starts {preview!s}): {exc}"
|
|
312
|
+
) from exc
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# ---------------------------------------------------------------------------
|
|
316
|
+
# Variable naming
|
|
317
|
+
# ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
# Dotted-path metadata keys to try for variable naming, in priority order.
|
|
320
|
+
_VARIABLE_NAME_KEYS = [
|
|
321
|
+
"name",
|
|
322
|
+
"mars.param",
|
|
323
|
+
"param",
|
|
324
|
+
"mars.shortName",
|
|
325
|
+
"shortName",
|
|
326
|
+
]
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def resolve_variable_name(
|
|
330
|
+
obj_index: int,
|
|
331
|
+
per_object_meta: dict[str, Any] | None,
|
|
332
|
+
common_meta: dict[str, Any] | None = None,
|
|
333
|
+
variable_key: str | None = None,
|
|
334
|
+
) -> str:
|
|
335
|
+
"""Determine the Zarr variable name for a TGM data object.
|
|
336
|
+
|
|
337
|
+
Tries ``variable_key`` first if given, then ``_VARIABLE_NAME_KEYS``,
|
|
338
|
+
then falls back to ``object_<index>``.
|
|
339
|
+
|
|
340
|
+
Only ``per_object_meta`` (from ``meta.base[i]``) is consulted for
|
|
341
|
+
naming. ``common_meta`` (from ``meta.extra``) is accepted for API
|
|
342
|
+
compatibility but is **not** searched — variable names should come
|
|
343
|
+
from per-object metadata to avoid all objects in a message sharing
|
|
344
|
+
the same name.
|
|
345
|
+
"""
|
|
346
|
+
source = per_object_meta or {}
|
|
347
|
+
|
|
348
|
+
# Try explicit key first
|
|
349
|
+
keys_to_try = [variable_key] if variable_key else []
|
|
350
|
+
keys_to_try.extend(_VARIABLE_NAME_KEYS)
|
|
351
|
+
|
|
352
|
+
for key in keys_to_try:
|
|
353
|
+
val = _dotted_get(source, key)
|
|
354
|
+
if val is not None:
|
|
355
|
+
return str(val)
|
|
356
|
+
|
|
357
|
+
return f"object_{obj_index}"
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _dotted_get(d: dict[str, Any], path: str) -> Any:
|
|
361
|
+
"""Resolve a dotted key path like ``mars.param`` in a nested dict."""
|
|
362
|
+
parts = path.split(".")
|
|
363
|
+
current: Any = d
|
|
364
|
+
for part in parts:
|
|
365
|
+
if not isinstance(current, dict):
|
|
366
|
+
return None
|
|
367
|
+
current = current.get(part)
|
|
368
|
+
if current is None:
|
|
369
|
+
return None
|
|
370
|
+
return current
|