tensogram-xarray 0.16.1__tar.gz → 0.18.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensogram_xarray-0.18.0/.gitignore +24 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/PKG-INFO +3 -3
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/pyproject.toml +3 -3
- tensogram_xarray-0.18.0/src/tensogram_xarray/mapping.py +263 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/src/tensogram_xarray/merge.py +191 -55
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/src/tensogram_xarray/store.py +166 -120
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_backend.py +1 -1
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_coverage.py +306 -72
- tensogram_xarray-0.18.0/tests/test_edge_cases.py +1130 -0
- tensogram_xarray-0.18.0/tests/test_issue_67_descriptor_name_fallback.py +96 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_merge.py +1 -1
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_nd_range.py +2 -2
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_remote.py +1 -1
- tensogram_xarray-0.16.1/.gitignore +0 -29
- tensogram_xarray-0.16.1/src/tensogram_xarray/mapping.py +0 -91
- tensogram_xarray-0.16.1/tests/test_edge_cases.py +0 -487
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/README.md +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/src/tensogram_xarray/__init__.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/src/tensogram_xarray/array.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/src/tensogram_xarray/backend.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/src/tensogram_xarray/coords.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/src/tensogram_xarray/scanner.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/__init__.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/conftest.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_array.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_coords.py +0 -0
- {tensogram_xarray-0.16.1 → tensogram_xarray-0.18.0}/tests/test_mapping.py +0 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
.DS_Store
|
|
2
|
+
.claude/*
|
|
3
|
+
!.claude/commands/
|
|
4
|
+
.weave/
|
|
5
|
+
.sisyphus/
|
|
6
|
+
.coverage
|
|
7
|
+
*.dylib.dSYM/
|
|
8
|
+
**/target
|
|
9
|
+
**/pkg
|
|
10
|
+
**/build/
|
|
11
|
+
docs/book/
|
|
12
|
+
python/**/dist/
|
|
13
|
+
python/bindings/Cargo.lock
|
|
14
|
+
rust/tensogram-grib/Cargo.lock
|
|
15
|
+
rust/tensogram-netcdf/Cargo.lock
|
|
16
|
+
rust/tensogram-wasm/Cargo.lock
|
|
17
|
+
# Python virtualenv, caches, and maturin-installed extension modules
|
|
18
|
+
.venv/
|
|
19
|
+
**/__pycache__/
|
|
20
|
+
*.pyc
|
|
21
|
+
python/bindings/python/tensogram/tensogram*.so
|
|
22
|
+
# TODO do we want to have uv.locks ignored?
|
|
23
|
+
**/uv.lock
|
|
24
|
+
examples/jupyter/tensogram_jupyter_examples.egg-info/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tensogram-xarray
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.18.0
|
|
4
4
|
Summary: xarray backend engine for tensogram .tgm files
|
|
5
5
|
Project-URL: Homepage, https://sites.ecmwf.int/docs/tensogram/main
|
|
6
6
|
Project-URL: Repository, https://github.com/ecmwf/tensogram
|
|
@@ -12,9 +12,9 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Topic :: Scientific/Engineering
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
|
|
15
|
-
Requires-Python: >=3.
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
16
|
Requires-Dist: numpy
|
|
17
|
-
Requires-Dist: tensogram<0.
|
|
17
|
+
Requires-Dist: tensogram<0.19,>=0.18.0
|
|
18
18
|
Requires-Dist: xarray>=2022.06
|
|
19
19
|
Provides-Extra: dask
|
|
20
20
|
Requires-Dist: dask[array]; extra == 'dask'
|
|
@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tensogram-xarray"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.18.0"
|
|
8
8
|
description = "xarray backend engine for tensogram .tgm files"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
11
|
license = "Apache-2.0"
|
|
12
12
|
authors = [{name = "ECMWF", email = "software@ecmwf.int"}]
|
|
13
13
|
classifiers = [
|
|
@@ -18,7 +18,7 @@ classifiers = [
|
|
|
18
18
|
"Topic :: Scientific/Engineering :: Atmospheric Science",
|
|
19
19
|
]
|
|
20
20
|
dependencies = [
|
|
21
|
-
"tensogram>=0.
|
|
21
|
+
"tensogram>=0.18.0,<0.19",
|
|
22
22
|
"xarray>=2022.06",
|
|
23
23
|
"numpy",
|
|
24
24
|
]
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# (C) Copyright 2026- ECMWF and individual contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation nor
|
|
7
|
+
# does it submit to any jurisdiction.
|
|
8
|
+
|
|
9
|
+
"""Dimension and variable naming for the xarray backend.
|
|
10
|
+
|
|
11
|
+
Handles the ``dim_names`` and ``variable_key`` parameters that let callers
|
|
12
|
+
control how tensogram data maps to xarray dimensions and variable names,
|
|
13
|
+
including the per-object ``base[i]["dim_names"]`` opt-in convention.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from collections.abc import Mapping, Sequence
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
PER_OBJECT_DIM_NAMES_KEY = "dim_names"
|
|
25
|
+
EXTRA_DIM_NAMES_KEY = "dim_names"
|
|
26
|
+
|
|
27
|
+
# Metadata keys that encode xarray structure rather than user attributes.
|
|
28
|
+
# These are read by the backend to shape the Dataset but must not leak into
|
|
29
|
+
# :attr:`xarray.Variable.attrs` or participate in the :mod:`merge` path's
|
|
30
|
+
# hypercube grouping (otherwise hint-like keys could become outer dims).
|
|
31
|
+
STRUCTURAL_META_KEYS: frozenset[str] = frozenset({PER_OBJECT_DIM_NAMES_KEY})
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def strip_structural_keys(meta: Mapping[str, Any]) -> dict[str, Any]:
|
|
35
|
+
"""Return a copy of *meta* without :data:`STRUCTURAL_META_KEYS`."""
|
|
36
|
+
return {k: v for k, v in meta.items() if k not in STRUCTURAL_META_KEYS}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def resolve_dim_names(
|
|
40
|
+
ndim: int,
|
|
41
|
+
user_dim_names: Sequence[str] | None,
|
|
42
|
+
) -> list[str]:
|
|
43
|
+
"""Return dimension names for a tensor with *ndim* axes.
|
|
44
|
+
|
|
45
|
+
If *user_dim_names* is provided, it must have exactly *ndim* entries.
|
|
46
|
+
Otherwise generic ``dim_0``, ``dim_1``, ... names are generated.
|
|
47
|
+
"""
|
|
48
|
+
if user_dim_names is not None:
|
|
49
|
+
names = list(user_dim_names)
|
|
50
|
+
if len(names) != ndim:
|
|
51
|
+
msg = (
|
|
52
|
+
f"dim_names has {len(names)} entries but tensor has {ndim} "
|
|
53
|
+
f"dimensions. Provide exactly {ndim} names."
|
|
54
|
+
)
|
|
55
|
+
raise ValueError(msg)
|
|
56
|
+
return names
|
|
57
|
+
return [f"dim_{i}" for i in range(ndim)]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _looks_like_string_sequence(raw: Any) -> bool:
|
|
61
|
+
"""True if *raw* is a list/tuple/sequence of items, excluding ``str``/``bytes``."""
|
|
62
|
+
if isinstance(raw, (str, bytes, bytearray)):
|
|
63
|
+
return False
|
|
64
|
+
return isinstance(raw, Sequence)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def parse_per_object_dim_names(
|
|
68
|
+
ndim: int,
|
|
69
|
+
obj_meta: Mapping[str, Any] | None,
|
|
70
|
+
) -> list[str] | None:
|
|
71
|
+
"""Return validated per-object dim names or ``None`` when absent/malformed.
|
|
72
|
+
|
|
73
|
+
The per-object hint lives at ``base[i]["dim_names"]`` and must be a
|
|
74
|
+
sequence (but not ``str``) of exactly *ndim* non-empty distinct strings.
|
|
75
|
+
Any deviation yields ``None`` (logged at DEBUG), so malformed hints
|
|
76
|
+
silently fall through the priority chain rather than crashing or
|
|
77
|
+
corrupting dim assignment.
|
|
78
|
+
"""
|
|
79
|
+
if not obj_meta:
|
|
80
|
+
return None
|
|
81
|
+
raw = obj_meta.get(PER_OBJECT_DIM_NAMES_KEY)
|
|
82
|
+
if raw is None:
|
|
83
|
+
return None
|
|
84
|
+
if not _looks_like_string_sequence(raw):
|
|
85
|
+
logger.debug(
|
|
86
|
+
"per-object %s hint is not a list/sequence (got %s); ignoring",
|
|
87
|
+
PER_OBJECT_DIM_NAMES_KEY,
|
|
88
|
+
type(raw).__name__,
|
|
89
|
+
)
|
|
90
|
+
return None
|
|
91
|
+
names = list(raw)
|
|
92
|
+
if len(names) != ndim:
|
|
93
|
+
logger.debug(
|
|
94
|
+
"per-object %s hint has %d entries but ndim=%d; ignoring",
|
|
95
|
+
PER_OBJECT_DIM_NAMES_KEY,
|
|
96
|
+
len(names),
|
|
97
|
+
ndim,
|
|
98
|
+
)
|
|
99
|
+
return None
|
|
100
|
+
if not all(isinstance(n, str) and n for n in names):
|
|
101
|
+
logger.debug(
|
|
102
|
+
"per-object %s hint contains non-string or empty entries; ignoring",
|
|
103
|
+
PER_OBJECT_DIM_NAMES_KEY,
|
|
104
|
+
)
|
|
105
|
+
return None
|
|
106
|
+
if len(set(names)) != ndim:
|
|
107
|
+
logger.debug(
|
|
108
|
+
"per-object %s hint contains duplicate entries %r; ignoring",
|
|
109
|
+
PER_OBJECT_DIM_NAMES_KEY,
|
|
110
|
+
names,
|
|
111
|
+
)
|
|
112
|
+
return None
|
|
113
|
+
return names
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def parse_extra_dim_names_hint(
|
|
117
|
+
ndim: int,
|
|
118
|
+
raw: Any,
|
|
119
|
+
) -> list[str] | dict[int, str]:
|
|
120
|
+
"""Return parsed ``_extra_["dim_names"]`` hint.
|
|
121
|
+
|
|
122
|
+
Accepts two legacy formats:
|
|
123
|
+
|
|
124
|
+
* list (preferred) — axis-ordered names, length must equal *ndim*
|
|
125
|
+
* dict — size-to-name mapping (string keys coerced to int)
|
|
126
|
+
|
|
127
|
+
Invalid hints yield an empty dict so callers can iterate uniformly.
|
|
128
|
+
"""
|
|
129
|
+
if raw is None:
|
|
130
|
+
return {}
|
|
131
|
+
if isinstance(raw, list):
|
|
132
|
+
try:
|
|
133
|
+
names = [str(n) for n in raw]
|
|
134
|
+
except (TypeError, ValueError):
|
|
135
|
+
return {}
|
|
136
|
+
if len(names) == ndim:
|
|
137
|
+
return names
|
|
138
|
+
return {}
|
|
139
|
+
if isinstance(raw, dict):
|
|
140
|
+
try:
|
|
141
|
+
return {int(k): str(v) for k, v in raw.items()}
|
|
142
|
+
except (TypeError, ValueError):
|
|
143
|
+
return {}
|
|
144
|
+
return {}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def resolve_dims_for_axes(
|
|
148
|
+
shape: tuple[int, ...],
|
|
149
|
+
*,
|
|
150
|
+
user_dim_names: Sequence[str] | None,
|
|
151
|
+
coord_dim_sizes: Mapping[str, int],
|
|
152
|
+
per_object_meta: Mapping[str, Any] | None,
|
|
153
|
+
extra_dim_names_hint: Any,
|
|
154
|
+
) -> list[tuple[str, bool]]:
|
|
155
|
+
"""Return ``(name, is_generic_fallback)`` per axis using the full priority chain.
|
|
156
|
+
|
|
157
|
+
Priority (highest to lowest):
|
|
158
|
+
|
|
159
|
+
1. ``user_dim_names`` — explicit caller kwarg.
|
|
160
|
+
2. Coord size-match — an existing coord dim whose size equals the axis size.
|
|
161
|
+
3. Per-object ``base[i]["dim_names"]`` — validated by
|
|
162
|
+
:func:`parse_per_object_dim_names`.
|
|
163
|
+
4. ``_extra_["dim_names"]`` — list or size-to-name dict, parsed by
|
|
164
|
+
:func:`parse_extra_dim_names_hint`.
|
|
165
|
+
5. Generic ``dim_{axis}`` fallback — flagged ``is_generic_fallback=True``
|
|
166
|
+
so the caller can disambiguate on collision.
|
|
167
|
+
|
|
168
|
+
Only axes from step 5 are flagged generic; all earlier sources count as
|
|
169
|
+
user-visible hints and are never auto-renamed. A hinted-name collision
|
|
170
|
+
is surfaced separately by the caller's disambiguation pass.
|
|
171
|
+
"""
|
|
172
|
+
ndim = len(shape)
|
|
173
|
+
|
|
174
|
+
if user_dim_names is not None:
|
|
175
|
+
return [(name, False) for name in resolve_dim_names(ndim, user_dim_names)]
|
|
176
|
+
|
|
177
|
+
per_obj = parse_per_object_dim_names(ndim, per_object_meta)
|
|
178
|
+
|
|
179
|
+
size_to_coord: dict[int, list[str]] = {}
|
|
180
|
+
for cname, csize in coord_dim_sizes.items():
|
|
181
|
+
size_to_coord.setdefault(csize, []).append(cname)
|
|
182
|
+
|
|
183
|
+
extra_hints = parse_extra_dim_names_hint(ndim, extra_dim_names_hint)
|
|
184
|
+
|
|
185
|
+
dims: list[tuple[str, bool]] = []
|
|
186
|
+
used: set[str] = set()
|
|
187
|
+
for axis, axis_size in enumerate(shape):
|
|
188
|
+
name: str | None = None
|
|
189
|
+
if axis_size in size_to_coord:
|
|
190
|
+
for cname in size_to_coord[axis_size]:
|
|
191
|
+
if cname not in used:
|
|
192
|
+
name = cname
|
|
193
|
+
break
|
|
194
|
+
if name is None and per_obj is not None:
|
|
195
|
+
candidate = per_obj[axis]
|
|
196
|
+
if candidate not in used:
|
|
197
|
+
name = candidate
|
|
198
|
+
if name is None and isinstance(extra_hints, list):
|
|
199
|
+
candidate = extra_hints[axis]
|
|
200
|
+
if candidate not in used:
|
|
201
|
+
name = candidate
|
|
202
|
+
if name is None and isinstance(extra_hints, dict) and axis_size in extra_hints:
|
|
203
|
+
candidate = extra_hints[axis_size]
|
|
204
|
+
if candidate not in used:
|
|
205
|
+
name = candidate
|
|
206
|
+
if name is None:
|
|
207
|
+
dims.append((f"dim_{axis}", True))
|
|
208
|
+
continue
|
|
209
|
+
dims.append((name, False))
|
|
210
|
+
used.add(name)
|
|
211
|
+
return dims
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _resolve_dotted(meta: dict[str, Any], dotted_key: str) -> Any:
|
|
215
|
+
"""Resolve a dotted key path like ``mars.param`` in a nested dict."""
|
|
216
|
+
parts = dotted_key.split(".")
|
|
217
|
+
current: Any = meta
|
|
218
|
+
for part in parts:
|
|
219
|
+
if not isinstance(current, dict) or part not in current:
|
|
220
|
+
return None
|
|
221
|
+
current = current[part]
|
|
222
|
+
return current
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# Dotted-path metadata keys to try for variable naming, in priority order.
|
|
226
|
+
# Must match the priority chain in tensogram-zarr's mapping.py.
|
|
227
|
+
_VARIABLE_NAME_KEYS = [
|
|
228
|
+
"name",
|
|
229
|
+
"mars.param",
|
|
230
|
+
"param",
|
|
231
|
+
"mars.shortName",
|
|
232
|
+
"shortName",
|
|
233
|
+
]
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def resolve_variable_name(
|
|
237
|
+
obj_index: int,
|
|
238
|
+
per_object_meta: dict[str, Any],
|
|
239
|
+
variable_key: str | None,
|
|
240
|
+
) -> str:
|
|
241
|
+
"""Determine the xarray variable name for a data object.
|
|
242
|
+
|
|
243
|
+
If *variable_key* is given (e.g. ``"mars.param"``), the value at that
|
|
244
|
+
dotted path in the per-object metadata is used. Otherwise the function
|
|
245
|
+
tries ``_VARIABLE_NAME_KEYS`` in priority order, then falls back to a
|
|
246
|
+
generic ``"object_<index>"`` name.
|
|
247
|
+
|
|
248
|
+
The priority chain matches ``tensogram-zarr``'s ``resolve_variable_name``
|
|
249
|
+
so that the same ``.tgm`` file produces consistent variable names
|
|
250
|
+
regardless of which backend opens it.
|
|
251
|
+
"""
|
|
252
|
+
source = per_object_meta or {}
|
|
253
|
+
|
|
254
|
+
# Try explicit key first, then the standard priority chain.
|
|
255
|
+
keys_to_try = [variable_key] if variable_key else []
|
|
256
|
+
keys_to_try.extend(_VARIABLE_NAME_KEYS)
|
|
257
|
+
|
|
258
|
+
for key in keys_to_try:
|
|
259
|
+
val = _resolve_dotted(source, key)
|
|
260
|
+
if val is not None:
|
|
261
|
+
return str(val)
|
|
262
|
+
|
|
263
|
+
return f"object_{obj_index}"
|