tensogram-xarray 0.16.1__tar.gz → 0.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/.gitignore +8 -15
  2. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/PKG-INFO +3 -3
  3. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/pyproject.toml +3 -3
  4. tensogram_xarray-0.17.0/src/tensogram_xarray/mapping.py +263 -0
  5. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/src/tensogram_xarray/merge.py +191 -55
  6. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/src/tensogram_xarray/store.py +150 -119
  7. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_coverage.py +304 -72
  8. tensogram_xarray-0.17.0/tests/test_edge_cases.py +1130 -0
  9. tensogram_xarray-0.17.0/tests/test_issue_67_descriptor_name_fallback.py +96 -0
  10. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_merge.py +1 -1
  11. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_nd_range.py +2 -2
  12. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_remote.py +1 -1
  13. tensogram_xarray-0.16.1/src/tensogram_xarray/mapping.py +0 -91
  14. tensogram_xarray-0.16.1/tests/test_edge_cases.py +0 -487
  15. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/README.md +0 -0
  16. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/src/tensogram_xarray/__init__.py +0 -0
  17. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/src/tensogram_xarray/array.py +0 -0
  18. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/src/tensogram_xarray/backend.py +0 -0
  19. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/src/tensogram_xarray/coords.py +0 -0
  20. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/src/tensogram_xarray/scanner.py +0 -0
  21. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/__init__.py +0 -0
  22. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/conftest.py +0 -0
  23. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_array.py +0 -0
  24. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_backend.py +0 -0
  25. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_coords.py +0 -0
  26. {tensogram_xarray-0.16.1 → tensogram_xarray-0.17.0}/tests/test_mapping.py +0 -0
@@ -7,23 +7,16 @@
7
7
  **/target
8
8
  **/pkg
9
9
  **/build/
10
+ docs/book/
10
11
  python/**/dist/
11
12
  python/bindings/Cargo.lock
12
-
13
- /docs/book
14
- **/.venv
15
- **/.ruff_cache
16
- **/__pycache__
17
- **/.pytest_cache
18
- **/.ipynb_checkpoints
19
- *.so
20
- *.dylib
21
- *.pyd
22
- *.swp
23
- *.swo
24
- *~
25
- .DS_Store
26
- .idea/
27
13
  rust/tensogram-grib/Cargo.lock
28
14
  rust/tensogram-netcdf/Cargo.lock
29
15
  rust/tensogram-wasm/Cargo.lock
16
+ # Python virtualenv, caches, and maturin-installed extension modules
17
+ .venv/
18
+ **/__pycache__/
19
+ *.pyc
20
+ python/bindings/python/tensogram/tensogram*.so
21
+ # TODO do we want to have uv.locks ignored?
22
+ **/uv.lock
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tensogram-xarray
3
- Version: 0.16.1
3
+ Version: 0.17.0
4
4
  Summary: xarray backend engine for tensogram .tgm files
5
5
  Project-URL: Homepage, https://sites.ecmwf.int/docs/tensogram/main
6
6
  Project-URL: Repository, https://github.com/ecmwf/tensogram
@@ -12,9 +12,9 @@ Classifier: License :: OSI Approved :: Apache Software License
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Topic :: Scientific/Engineering
14
14
  Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
15
- Requires-Python: >=3.9
15
+ Requires-Python: >=3.11
16
16
  Requires-Dist: numpy
17
- Requires-Dist: tensogram<0.17,>=0.16.1
17
+ Requires-Dist: tensogram<0.18,>=0.17.0
18
18
  Requires-Dist: xarray>=2022.06
19
19
  Provides-Extra: dask
20
20
  Requires-Dist: dask[array]; extra == 'dask'
@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tensogram-xarray"
7
- version = "0.16.1"
7
+ version = "0.17.0"
8
8
  description = "xarray backend engine for tensogram .tgm files"
9
9
  readme = "README.md"
10
- requires-python = ">=3.9"
10
+ requires-python = ">=3.11"
11
11
  license = "Apache-2.0"
12
12
  authors = [{name = "ECMWF", email = "software@ecmwf.int"}]
13
13
  classifiers = [
@@ -18,7 +18,7 @@ classifiers = [
18
18
  "Topic :: Scientific/Engineering :: Atmospheric Science",
19
19
  ]
20
20
  dependencies = [
21
- "tensogram>=0.16.1,<0.17",
21
+ "tensogram>=0.17.0,<0.18",
22
22
  "xarray>=2022.06",
23
23
  "numpy",
24
24
  ]
@@ -0,0 +1,263 @@
1
+ # (C) Copyright 2026- ECMWF and individual contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation nor
7
+ # does it submit to any jurisdiction.
8
+
9
+ """Dimension and variable naming for the xarray backend.
10
+
11
+ Handles the ``dim_names`` and ``variable_key`` parameters that let callers
12
+ control how tensogram data maps to xarray dimensions and variable names,
13
+ including the per-object ``base[i]["dim_names"]`` opt-in convention.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ from collections.abc import Mapping, Sequence
20
+ from typing import Any
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ PER_OBJECT_DIM_NAMES_KEY = "dim_names"
25
+ EXTRA_DIM_NAMES_KEY = "dim_names"
26
+
27
+ # Metadata keys that encode xarray structure rather than user attributes.
28
+ # These are read by the backend to shape the Dataset but must not leak into
29
+ # :attr:`xarray.Variable.attrs` or participate in the :mod:`merge` path's
30
+ # hypercube grouping (otherwise hint-like keys could become outer dims).
31
+ STRUCTURAL_META_KEYS: frozenset[str] = frozenset({PER_OBJECT_DIM_NAMES_KEY})
32
+
33
+
34
+ def strip_structural_keys(meta: Mapping[str, Any]) -> dict[str, Any]:
35
+ """Return a copy of *meta* without :data:`STRUCTURAL_META_KEYS`."""
36
+ return {k: v for k, v in meta.items() if k not in STRUCTURAL_META_KEYS}
37
+
38
+
39
+ def resolve_dim_names(
40
+ ndim: int,
41
+ user_dim_names: Sequence[str] | None,
42
+ ) -> list[str]:
43
+ """Return dimension names for a tensor with *ndim* axes.
44
+
45
+ If *user_dim_names* is provided, it must have exactly *ndim* entries.
46
+ Otherwise generic ``dim_0``, ``dim_1``, ... names are generated.
47
+ """
48
+ if user_dim_names is not None:
49
+ names = list(user_dim_names)
50
+ if len(names) != ndim:
51
+ msg = (
52
+ f"dim_names has {len(names)} entries but tensor has {ndim} "
53
+ f"dimensions. Provide exactly {ndim} names."
54
+ )
55
+ raise ValueError(msg)
56
+ return names
57
+ return [f"dim_{i}" for i in range(ndim)]
58
+
59
+
60
+ def _looks_like_string_sequence(raw: Any) -> bool:
61
+ """True if *raw* is a list/tuple/sequence of items, excluding ``str``/``bytes``."""
62
+ if isinstance(raw, (str, bytes, bytearray)):
63
+ return False
64
+ return isinstance(raw, Sequence)
65
+
66
+
67
+ def parse_per_object_dim_names(
68
+ ndim: int,
69
+ obj_meta: Mapping[str, Any] | None,
70
+ ) -> list[str] | None:
71
+ """Return validated per-object dim names or ``None`` when absent/malformed.
72
+
73
+ The per-object hint lives at ``base[i]["dim_names"]`` and must be a
74
+ sequence (but not ``str``) of exactly *ndim* non-empty distinct strings.
75
+ Any deviation yields ``None`` (logged at DEBUG), so malformed hints
76
+ silently fall through the priority chain rather than crashing or
77
+ corrupting dim assignment.
78
+ """
79
+ if not obj_meta:
80
+ return None
81
+ raw = obj_meta.get(PER_OBJECT_DIM_NAMES_KEY)
82
+ if raw is None:
83
+ return None
84
+ if not _looks_like_string_sequence(raw):
85
+ logger.debug(
86
+ "per-object %s hint is not a list/sequence (got %s); ignoring",
87
+ PER_OBJECT_DIM_NAMES_KEY,
88
+ type(raw).__name__,
89
+ )
90
+ return None
91
+ names = list(raw)
92
+ if len(names) != ndim:
93
+ logger.debug(
94
+ "per-object %s hint has %d entries but ndim=%d; ignoring",
95
+ PER_OBJECT_DIM_NAMES_KEY,
96
+ len(names),
97
+ ndim,
98
+ )
99
+ return None
100
+ if not all(isinstance(n, str) and n for n in names):
101
+ logger.debug(
102
+ "per-object %s hint contains non-string or empty entries; ignoring",
103
+ PER_OBJECT_DIM_NAMES_KEY,
104
+ )
105
+ return None
106
+ if len(set(names)) != ndim:
107
+ logger.debug(
108
+ "per-object %s hint contains duplicate entries %r; ignoring",
109
+ PER_OBJECT_DIM_NAMES_KEY,
110
+ names,
111
+ )
112
+ return None
113
+ return names
114
+
115
+
116
+ def parse_extra_dim_names_hint(
117
+ ndim: int,
118
+ raw: Any,
119
+ ) -> list[str] | dict[int, str]:
120
+ """Return parsed ``_extra_["dim_names"]`` hint.
121
+
122
+ Accepts two legacy formats:
123
+
124
+ * list (preferred) — axis-ordered names, length must equal *ndim*
125
+ * dict — size-to-name mapping (string keys coerced to int)
126
+
127
+ Invalid hints yield an empty dict so callers can iterate uniformly.
128
+ """
129
+ if raw is None:
130
+ return {}
131
+ if isinstance(raw, list):
132
+ try:
133
+ names = [str(n) for n in raw]
134
+ except (TypeError, ValueError):
135
+ return {}
136
+ if len(names) == ndim:
137
+ return names
138
+ return {}
139
+ if isinstance(raw, dict):
140
+ try:
141
+ return {int(k): str(v) for k, v in raw.items()}
142
+ except (TypeError, ValueError):
143
+ return {}
144
+ return {}
145
+
146
+
147
+ def resolve_dims_for_axes(
148
+ shape: tuple[int, ...],
149
+ *,
150
+ user_dim_names: Sequence[str] | None,
151
+ coord_dim_sizes: Mapping[str, int],
152
+ per_object_meta: Mapping[str, Any] | None,
153
+ extra_dim_names_hint: Any,
154
+ ) -> list[tuple[str, bool]]:
155
+ """Return ``(name, is_generic_fallback)`` per axis using the full priority chain.
156
+
157
+ Priority (highest to lowest):
158
+
159
+ 1. ``user_dim_names`` — explicit caller kwarg.
160
+ 2. Coord size-match — an existing coord dim whose size equals the axis size.
161
+ 3. Per-object ``base[i]["dim_names"]`` — validated by
162
+ :func:`parse_per_object_dim_names`.
163
+ 4. ``_extra_["dim_names"]`` — list or size-to-name dict, parsed by
164
+ :func:`parse_extra_dim_names_hint`.
165
+ 5. Generic ``dim_{axis}`` fallback — flagged ``is_generic_fallback=True``
166
+ so the caller can disambiguate on collision.
167
+
168
+ Only axes from step 5 are flagged generic; all earlier sources count as
169
+ user-visible hints and are never auto-renamed. A hinted-name collision
170
+ is surfaced separately by the caller's disambiguation pass.
171
+ """
172
+ ndim = len(shape)
173
+
174
+ if user_dim_names is not None:
175
+ return [(name, False) for name in resolve_dim_names(ndim, user_dim_names)]
176
+
177
+ per_obj = parse_per_object_dim_names(ndim, per_object_meta)
178
+
179
+ size_to_coord: dict[int, list[str]] = {}
180
+ for cname, csize in coord_dim_sizes.items():
181
+ size_to_coord.setdefault(csize, []).append(cname)
182
+
183
+ extra_hints = parse_extra_dim_names_hint(ndim, extra_dim_names_hint)
184
+
185
+ dims: list[tuple[str, bool]] = []
186
+ used: set[str] = set()
187
+ for axis, axis_size in enumerate(shape):
188
+ name: str | None = None
189
+ if axis_size in size_to_coord:
190
+ for cname in size_to_coord[axis_size]:
191
+ if cname not in used:
192
+ name = cname
193
+ break
194
+ if name is None and per_obj is not None:
195
+ candidate = per_obj[axis]
196
+ if candidate not in used:
197
+ name = candidate
198
+ if name is None and isinstance(extra_hints, list):
199
+ candidate = extra_hints[axis]
200
+ if candidate not in used:
201
+ name = candidate
202
+ if name is None and isinstance(extra_hints, dict) and axis_size in extra_hints:
203
+ candidate = extra_hints[axis_size]
204
+ if candidate not in used:
205
+ name = candidate
206
+ if name is None:
207
+ dims.append((f"dim_{axis}", True))
208
+ continue
209
+ dims.append((name, False))
210
+ used.add(name)
211
+ return dims
212
+
213
+
214
+ def _resolve_dotted(meta: dict[str, Any], dotted_key: str) -> Any:
215
+ """Resolve a dotted key path like ``mars.param`` in a nested dict."""
216
+ parts = dotted_key.split(".")
217
+ current: Any = meta
218
+ for part in parts:
219
+ if not isinstance(current, dict) or part not in current:
220
+ return None
221
+ current = current[part]
222
+ return current
223
+
224
+
225
+ # Dotted-path metadata keys to try for variable naming, in priority order.
226
+ # Must match the priority chain in tensogram-zarr's mapping.py.
227
+ _VARIABLE_NAME_KEYS = [
228
+ "name",
229
+ "mars.param",
230
+ "param",
231
+ "mars.shortName",
232
+ "shortName",
233
+ ]
234
+
235
+
236
+ def resolve_variable_name(
237
+ obj_index: int,
238
+ per_object_meta: dict[str, Any],
239
+ variable_key: str | None,
240
+ ) -> str:
241
+ """Determine the xarray variable name for a data object.
242
+
243
+ If *variable_key* is given (e.g. ``"mars.param"``), the value at that
244
+ dotted path in the per-object metadata is used. Otherwise the function
245
+ tries ``_VARIABLE_NAME_KEYS`` in priority order, then falls back to a
246
+ generic ``"object_<index>"`` name.
247
+
248
+ The priority chain matches ``tensogram-zarr``'s ``resolve_variable_name``
249
+ so that the same ``.tgm`` file produces consistent variable names
250
+ regardless of which backend opens it.
251
+ """
252
+ source = per_object_meta or {}
253
+
254
+ # Try explicit key first, then the standard priority chain.
255
+ keys_to_try = [variable_key] if variable_key else []
256
+ keys_to_try.extend(_VARIABLE_NAME_KEYS)
257
+
258
+ for key in keys_to_try:
259
+ val = _resolve_dotted(source, key)
260
+ if val is not None:
261
+ return str(val)
262
+
263
+ return f"object_{obj_index}"