tensogram-xarray 0.20.0__tar.gz → 0.21.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/PKG-INFO +2 -2
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/pyproject.toml +2 -2
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/array.py +16 -3
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/backend.py +23 -5
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/merge.py +7 -9
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/store.py +4 -6
- tensogram_xarray-0.21.0/tests/test_verify_hash.py +123 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/.gitignore +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/README.md +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/__init__.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/coords.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/mapping.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/src/tensogram_xarray/scanner.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/__init__.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/conftest.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_array.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_backend.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_coords.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_coverage.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_edge_cases.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_issue_67_descriptor_name_fallback.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_mapping.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_merge.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_nd_range.py +0 -0
- {tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_remote.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tensogram-xarray
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.21.0
|
|
4
4
|
Summary: xarray backend engine for tensogram .tgm files
|
|
5
5
|
Project-URL: Homepage, https://sites.ecmwf.int/docs/tensogram/main
|
|
6
6
|
Project-URL: Repository, https://github.com/ecmwf/tensogram
|
|
@@ -14,7 +14,7 @@ Classifier: Topic :: Scientific/Engineering
|
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
|
|
15
15
|
Requires-Python: >=3.11
|
|
16
16
|
Requires-Dist: numpy
|
|
17
|
-
Requires-Dist: tensogram<0.
|
|
17
|
+
Requires-Dist: tensogram<0.22,>=0.21.0
|
|
18
18
|
Requires-Dist: xarray>=2022.06
|
|
19
19
|
Provides-Extra: dask
|
|
20
20
|
Requires-Dist: dask[array]; extra == 'dask'
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "tensogram-xarray"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.21.0"
|
|
8
8
|
description = "xarray backend engine for tensogram .tgm files"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -18,7 +18,7 @@ classifiers = [
|
|
|
18
18
|
"Topic :: Scientific/Engineering :: Atmospheric Science",
|
|
19
19
|
]
|
|
20
20
|
dependencies = [
|
|
21
|
-
"tensogram>=0.
|
|
21
|
+
"tensogram>=0.21.0,<0.22",
|
|
22
22
|
"xarray>=2022.06",
|
|
23
23
|
"numpy",
|
|
24
24
|
]
|
|
@@ -200,11 +200,11 @@ class TensogramBackendArray(BackendArray):
|
|
|
200
200
|
dtype: np.dtype,
|
|
201
201
|
supports_range: bool,
|
|
202
202
|
*,
|
|
203
|
-
verify_hash: bool = False,
|
|
204
203
|
range_threshold: float = DEFAULT_RANGE_THRESHOLD,
|
|
205
204
|
lock: threading.Lock | None = None,
|
|
206
205
|
storage_options: dict[str, Any] | None = None,
|
|
207
206
|
shared_file: Any | None = None,
|
|
207
|
+
verify_hash: bool = False,
|
|
208
208
|
):
|
|
209
209
|
import tensogram
|
|
210
210
|
|
|
@@ -215,10 +215,10 @@ class TensogramBackendArray(BackendArray):
|
|
|
215
215
|
self.shape = shape
|
|
216
216
|
self.dtype = dtype
|
|
217
217
|
self.supports_range = supports_range
|
|
218
|
-
self.verify_hash = verify_hash
|
|
219
218
|
self.range_threshold = range_threshold
|
|
220
219
|
self.storage_options = storage_options
|
|
221
220
|
self._shared_file = shared_file
|
|
221
|
+
self.verify_hash = verify_hash
|
|
222
222
|
|
|
223
223
|
# -- pickle support (no open handles stored) ----------------------------
|
|
224
224
|
|
|
@@ -267,12 +267,25 @@ class TensogramBackendArray(BackendArray):
|
|
|
267
267
|
total_elements = math.prod(self.shape)
|
|
268
268
|
|
|
269
269
|
if total_elements > 0 and total_requested / total_elements <= self.range_threshold:
|
|
270
|
+
# Per the decode-time verification contract
|
|
271
|
+
# (PLAN_DECODE_HASH_VERIFICATION §6, Q6): the
|
|
272
|
+
# range-decode fast path does *not* verify
|
|
273
|
+
# hashes — verifying a whole-frame hash would
|
|
274
|
+
# require reading every byte the optimisation
|
|
275
|
+
# is designed to avoid. When `verify_hash` is
|
|
276
|
+
# True, callers who care about integrity should
|
|
277
|
+
# set ``range_threshold=0`` to force every read
|
|
278
|
+
# through the full-decode path below. We
|
|
279
|
+
# *do* allow this fast path even under
|
|
280
|
+
# ``verify_hash=True`` so the user keeps the
|
|
281
|
+
# remote-fetch cost characteristics they
|
|
282
|
+
# opted in to via ``range_threshold``; the
|
|
283
|
+
# verification simply does not apply.
|
|
270
284
|
arr = f.file_decode_range(
|
|
271
285
|
self.msg_index,
|
|
272
286
|
obj_index=self.obj_index,
|
|
273
287
|
ranges=flat_ranges,
|
|
274
288
|
join=True,
|
|
275
|
-
verify_hash=self.verify_hash,
|
|
276
289
|
native_byte_order=True,
|
|
277
290
|
)
|
|
278
291
|
return np.asarray(arr).reshape(out_shape)
|
|
@@ -55,9 +55,9 @@ class TensogramBackendEntrypoint(BackendEntrypoint):
|
|
|
55
55
|
variable_key: str | None = None,
|
|
56
56
|
message_index: int = 0,
|
|
57
57
|
merge_objects: bool = False,
|
|
58
|
-
verify_hash: bool = False,
|
|
59
58
|
range_threshold: float = 0.5,
|
|
60
59
|
storage_options: dict[str, Any] | None = None,
|
|
60
|
+
verify_hash: bool = False,
|
|
61
61
|
) -> xr.Dataset:
|
|
62
62
|
"""Open a single tensogram message as an :class:`xr.Dataset`.
|
|
63
63
|
|
|
@@ -79,8 +79,6 @@ class TensogramBackendEntrypoint(BackendEntrypoint):
|
|
|
79
79
|
If *True*, attempt to merge objects across messages by stacking
|
|
80
80
|
along metadata dimensions that vary. When *False* (default),
|
|
81
81
|
only the single message at *message_index* is opened.
|
|
82
|
-
verify_hash
|
|
83
|
-
Whether to verify xxh3 hashes during decode.
|
|
84
82
|
range_threshold
|
|
85
83
|
Maximum fraction of total array elements (0.0-1.0) for which
|
|
86
84
|
partial ``decode_range()`` is used instead of a full
|
|
@@ -88,6 +86,26 @@ class TensogramBackendEntrypoint(BackendEntrypoint):
|
|
|
88
86
|
storage_options
|
|
89
87
|
Key-value pairs forwarded to the object store backend when
|
|
90
88
|
the path is a remote URL. Ignored for local files.
|
|
89
|
+
verify_hash
|
|
90
|
+
When *True*, every full ``decode_object`` (or
|
|
91
|
+
``file_decode_object``) call materialised by this Dataset's
|
|
92
|
+
lazy backing arrays is verified against its inline xxh3
|
|
93
|
+
hash; ``MissingHashError`` / ``HashMismatchError`` from
|
|
94
|
+
the underlying tensogram bindings propagate to the
|
|
95
|
+
caller's first read.
|
|
96
|
+
|
|
97
|
+
**Caveat — partial-range fast path is unverified.** Per
|
|
98
|
+
the decode-time verification contract (see
|
|
99
|
+
``plans/DESIGN.md`` §"Integrity Hashing" and
|
|
100
|
+
``plans/WIRE_FORMAT.md`` §11.1), ``decode_range`` reads
|
|
101
|
+
only a slice of the encoded payload and cannot meaningfully
|
|
102
|
+
verify a whole-frame hash. When ``verify_hash=True`` and
|
|
103
|
+
the lazy reader chooses ``file_decode_range`` (because
|
|
104
|
+
the requested slice is below ``range_threshold``), no
|
|
105
|
+
verification happens for that read. Set
|
|
106
|
+
``range_threshold=0`` to force every read through the
|
|
107
|
+
full-decode path if you need consistent integrity
|
|
108
|
+
coverage.
|
|
91
109
|
|
|
92
110
|
Returns
|
|
93
111
|
-------
|
|
@@ -107,9 +125,9 @@ class TensogramBackendEntrypoint(BackendEntrypoint):
|
|
|
107
125
|
file_path,
|
|
108
126
|
dim_names=dim_names,
|
|
109
127
|
variable_key=variable_key,
|
|
110
|
-
verify_hash=verify_hash,
|
|
111
128
|
range_threshold=range_threshold,
|
|
112
129
|
storage_options=storage_options,
|
|
130
|
+
verify_hash=verify_hash,
|
|
113
131
|
)
|
|
114
132
|
if not datasets:
|
|
115
133
|
return xr.Dataset()
|
|
@@ -120,9 +138,9 @@ class TensogramBackendEntrypoint(BackendEntrypoint):
|
|
|
120
138
|
msg_index=message_index,
|
|
121
139
|
dim_names=dim_names,
|
|
122
140
|
variable_key=variable_key,
|
|
123
|
-
verify_hash=verify_hash,
|
|
124
141
|
range_threshold=range_threshold,
|
|
125
142
|
storage_options=storage_options,
|
|
143
|
+
verify_hash=verify_hash,
|
|
126
144
|
)
|
|
127
145
|
|
|
128
146
|
drop_set = set(drop_variables) if drop_variables else None
|
|
@@ -51,8 +51,8 @@ def open_datasets(
|
|
|
51
51
|
*,
|
|
52
52
|
dim_names: Sequence[str] | None = None,
|
|
53
53
|
variable_key: str | None = None,
|
|
54
|
-
verify_hash: bool = False,
|
|
55
54
|
range_threshold: float = 0.5,
|
|
55
|
+
verify_hash: bool = False,
|
|
56
56
|
storage_options: dict[str, Any] | None = None,
|
|
57
57
|
) -> list[xr.Dataset]:
|
|
58
58
|
"""Open a ``.tgm`` file, auto-grouping into compatible Datasets.
|
|
@@ -69,8 +69,6 @@ def open_datasets(
|
|
|
69
69
|
Explicit dimension names for the innermost tensor axes.
|
|
70
70
|
variable_key
|
|
71
71
|
Dotted metadata key path for variable naming.
|
|
72
|
-
verify_hash
|
|
73
|
-
Whether to verify hashes on decode.
|
|
74
72
|
range_threshold
|
|
75
73
|
Maximum fraction of total array elements for which partial
|
|
76
74
|
``decode_range()`` is used. Default ``0.5``.
|
|
@@ -114,8 +112,8 @@ def open_datasets(
|
|
|
114
112
|
shape=shape,
|
|
115
113
|
dtype=np_dtype,
|
|
116
114
|
supports_range=_supports_range_decode(obj.descriptor),
|
|
117
|
-
verify_hash=verify_hash,
|
|
118
115
|
range_threshold=range_threshold,
|
|
116
|
+
verify_hash=verify_hash,
|
|
119
117
|
lock=lock,
|
|
120
118
|
storage_options=storage_options,
|
|
121
119
|
shared_file=shared_file,
|
|
@@ -479,8 +477,8 @@ def _single_object_dataset(
|
|
|
479
477
|
shape=shape,
|
|
480
478
|
dtype=np_dtype,
|
|
481
479
|
supports_range=_supports_range_decode(obj.descriptor),
|
|
482
|
-
verify_hash=verify_hash,
|
|
483
480
|
range_threshold=range_threshold,
|
|
481
|
+
verify_hash=verify_hash,
|
|
484
482
|
lock=lock,
|
|
485
483
|
storage_options=storage_options,
|
|
486
484
|
shared_file=shared_file,
|
|
@@ -532,8 +530,8 @@ def _flat_group_dataset(
|
|
|
532
530
|
shape=obj.shape,
|
|
533
531
|
dtype=np_dtype,
|
|
534
532
|
supports_range=_supports_range_decode(obj.descriptor),
|
|
535
|
-
verify_hash=verify_hash,
|
|
536
533
|
range_threshold=range_threshold,
|
|
534
|
+
verify_hash=verify_hash,
|
|
537
535
|
lock=lock,
|
|
538
536
|
storage_options=storage_options,
|
|
539
537
|
shared_file=shared_file,
|
|
@@ -696,8 +694,8 @@ def _build_multi_variable_dataset(
|
|
|
696
694
|
shape=inner_shape,
|
|
697
695
|
dtype=np_dtype,
|
|
698
696
|
supports_range=_supports_range_decode(obj.descriptor),
|
|
699
|
-
verify_hash=verify_hash,
|
|
700
697
|
range_threshold=range_threshold,
|
|
698
|
+
verify_hash=verify_hash,
|
|
701
699
|
lock=lock,
|
|
702
700
|
storage_options=storage_options,
|
|
703
701
|
shared_file=shared_file,
|
|
@@ -786,8 +784,8 @@ def _build_multi_variable_dataset(
|
|
|
786
784
|
shape=inner_shape,
|
|
787
785
|
dtype=np_dtype,
|
|
788
786
|
supports_range=_supports_range_decode(obj.descriptor),
|
|
789
|
-
verify_hash=verify_hash,
|
|
790
787
|
range_threshold=range_threshold,
|
|
788
|
+
verify_hash=verify_hash,
|
|
791
789
|
lock=lock,
|
|
792
790
|
storage_options=storage_options,
|
|
793
791
|
shared_file=shared_file,
|
|
@@ -816,8 +814,8 @@ def _build_multi_variable_dataset(
|
|
|
816
814
|
shape=inner_shape,
|
|
817
815
|
dtype=np_dtype,
|
|
818
816
|
supports_range=_supports_range_decode(obj.descriptor),
|
|
819
|
-
verify_hash=verify_hash,
|
|
820
817
|
range_threshold=range_threshold,
|
|
818
|
+
verify_hash=verify_hash,
|
|
821
819
|
lock=lock,
|
|
822
820
|
storage_options=storage_options,
|
|
823
821
|
shared_file=shared_file,
|
|
@@ -196,8 +196,6 @@ class TensogramDataStore:
|
|
|
196
196
|
Optional user-specified dimension names for data variables.
|
|
197
197
|
variable_key
|
|
198
198
|
Optional dotted metadata path for variable naming.
|
|
199
|
-
verify_hash
|
|
200
|
-
Whether to verify object hashes on decode.
|
|
201
199
|
range_threshold
|
|
202
200
|
Maximum fraction of total array elements (0.0-1.0) for which
|
|
203
201
|
partial ``decode_range()`` is used. Default ``0.5``.
|
|
@@ -214,9 +212,9 @@ class TensogramDataStore:
|
|
|
214
212
|
msg_index: int = 0,
|
|
215
213
|
dim_names: Sequence[str] | None = None,
|
|
216
214
|
variable_key: str | None = None,
|
|
217
|
-
verify_hash: bool = False,
|
|
218
215
|
range_threshold: float = 0.5,
|
|
219
216
|
storage_options: dict[str, Any] | None = None,
|
|
217
|
+
verify_hash: bool = False,
|
|
220
218
|
):
|
|
221
219
|
import tensogram
|
|
222
220
|
|
|
@@ -225,9 +223,9 @@ class TensogramDataStore:
|
|
|
225
223
|
self.msg_index = msg_index
|
|
226
224
|
self.dim_names = dim_names
|
|
227
225
|
self.variable_key = variable_key
|
|
228
|
-
self.verify_hash = verify_hash
|
|
229
226
|
self.range_threshold = range_threshold
|
|
230
227
|
self.storage_options = storage_options
|
|
228
|
+
self.verify_hash = verify_hash
|
|
231
229
|
self._lock = threading.Lock()
|
|
232
230
|
self._backend_arrays: list[TensogramBackendArray] = []
|
|
233
231
|
|
|
@@ -347,11 +345,11 @@ class TensogramDataStore:
|
|
|
347
345
|
shape=shape,
|
|
348
346
|
dtype=np_dtype,
|
|
349
347
|
supports_range=_supports_range_decode(desc),
|
|
350
|
-
verify_hash=self.verify_hash,
|
|
351
348
|
range_threshold=self.range_threshold,
|
|
352
349
|
lock=self._lock,
|
|
353
350
|
storage_options=self.storage_options,
|
|
354
351
|
shared_file=self._file,
|
|
352
|
+
verify_hash=self.verify_hash,
|
|
355
353
|
)
|
|
356
354
|
self._backend_arrays.append(backend_array)
|
|
357
355
|
lazy_data = indexing.LazilyIndexedArray(backend_array)
|
|
@@ -387,11 +385,11 @@ class TensogramDataStore:
|
|
|
387
385
|
shape=shape,
|
|
388
386
|
dtype=np_dtype,
|
|
389
387
|
supports_range=_supports_range_decode(desc),
|
|
390
|
-
verify_hash=self.verify_hash,
|
|
391
388
|
range_threshold=self.range_threshold,
|
|
392
389
|
lock=self._lock,
|
|
393
390
|
storage_options=self.storage_options,
|
|
394
391
|
shared_file=self._file,
|
|
392
|
+
verify_hash=self.verify_hash,
|
|
395
393
|
)
|
|
396
394
|
self._backend_arrays.append(backend_array)
|
|
397
395
|
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# (C) Copyright 2026- ECMWF and individual contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation nor
|
|
7
|
+
# does it submit to any jurisdiction.
|
|
8
|
+
|
|
9
|
+
"""verify_hash threading through the xarray backend.
|
|
10
|
+
|
|
11
|
+
The xarray backend re-exposes the upstream
|
|
12
|
+
``DecodeOptions::verify_hash`` flag via the ``open_dataset(...,
|
|
13
|
+
verify_hash=True)`` keyword. When the lazy backing arrays
|
|
14
|
+
materialise data via ``file_decode_object`` /
|
|
15
|
+
``decode_object``, the kwarg propagates and integrity errors
|
|
16
|
+
(``MissingHashError`` / ``HashMismatchError``) bubble up to
|
|
17
|
+
the caller's first read.
|
|
18
|
+
|
|
19
|
+
Per Q6 in ``PLAN_DECODE_HASH_VERIFICATION.md``: the partial-
|
|
20
|
+
range fast path silently does *not* verify (range decode
|
|
21
|
+
does not accept ``verify_hash``). Set ``range_threshold=0``
|
|
22
|
+
to force every read through the full-decode path if you
|
|
23
|
+
need uniform coverage.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import numpy as np
|
|
29
|
+
import pytest
|
|
30
|
+
import tensogram
|
|
31
|
+
import xarray as xr
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _build_unhashed_message(tmp_path) -> str:
|
|
35
|
+
"""Encode a 1-object f32 message with hashing off + write to disk.
|
|
36
|
+
|
|
37
|
+
Returns the file path. The unhashed encoding is what makes
|
|
38
|
+
cell C (`verify_hash=True` → MissingHashError) testable.
|
|
39
|
+
"""
|
|
40
|
+
meta = {"version": 3}
|
|
41
|
+
desc = {
|
|
42
|
+
"type": "ntensor",
|
|
43
|
+
"ndim": 1,
|
|
44
|
+
"shape": [4],
|
|
45
|
+
"strides": [1],
|
|
46
|
+
"dtype": "float32",
|
|
47
|
+
"encoding": "none",
|
|
48
|
+
"filter": "none",
|
|
49
|
+
"compression": "none",
|
|
50
|
+
}
|
|
51
|
+
data = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
|
|
52
|
+
msg = bytes(tensogram.encode(meta, [(desc, data)], hash=None))
|
|
53
|
+
path = tmp_path / "unhashed.tgm"
|
|
54
|
+
path.write_bytes(msg)
|
|
55
|
+
return str(path)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _build_hashed_message(tmp_path) -> str:
|
|
59
|
+
"""Encode a 1-object f32 message with hashing on + write to disk."""
|
|
60
|
+
meta = {"version": 3}
|
|
61
|
+
desc = {
|
|
62
|
+
"type": "ntensor",
|
|
63
|
+
"ndim": 1,
|
|
64
|
+
"shape": [4],
|
|
65
|
+
"strides": [1],
|
|
66
|
+
"dtype": "float32",
|
|
67
|
+
"encoding": "none",
|
|
68
|
+
"filter": "none",
|
|
69
|
+
"compression": "none",
|
|
70
|
+
}
|
|
71
|
+
data = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
|
|
72
|
+
msg = bytes(tensogram.encode(meta, [(desc, data)], hash="xxh3"))
|
|
73
|
+
path = tmp_path / "hashed.tgm"
|
|
74
|
+
path.write_bytes(msg)
|
|
75
|
+
return str(path)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class TestOpenDatasetVerifyHash:
|
|
79
|
+
def test_verify_hash_default_is_false(self, tmp_path):
|
|
80
|
+
"""Default ``verify_hash=False`` decodes both hashed and
|
|
81
|
+
unhashed fixtures cleanly."""
|
|
82
|
+
ds_hashed = xr.open_dataset(_build_hashed_message(tmp_path), engine="tensogram")
|
|
83
|
+
# Force materialisation.
|
|
84
|
+
np.asarray(ds_hashed[next(iter(ds_hashed.data_vars))].values)
|
|
85
|
+
ds_hashed.close()
|
|
86
|
+
|
|
87
|
+
ds_unhashed = xr.open_dataset(_build_unhashed_message(tmp_path), engine="tensogram")
|
|
88
|
+
np.asarray(ds_unhashed[next(iter(ds_unhashed.data_vars))].values)
|
|
89
|
+
ds_unhashed.close()
|
|
90
|
+
|
|
91
|
+
def test_verify_hash_true_succeeds_on_hashed_dataset(self, tmp_path):
|
|
92
|
+
"""Cell B equivalent on the xarray surface: opening a
|
|
93
|
+
hashed file with ``verify_hash=True`` and pulling data
|
|
94
|
+
materialises cleanly."""
|
|
95
|
+
ds = xr.open_dataset(
|
|
96
|
+
_build_hashed_message(tmp_path),
|
|
97
|
+
engine="tensogram",
|
|
98
|
+
verify_hash=True,
|
|
99
|
+
# Force the full-decode path so the verification fires.
|
|
100
|
+
range_threshold=0.0,
|
|
101
|
+
)
|
|
102
|
+
arr = np.asarray(ds[next(iter(ds.data_vars))].values)
|
|
103
|
+
np.testing.assert_array_equal(arr, [1.0, 2.0, 3.0, 4.0])
|
|
104
|
+
ds.close()
|
|
105
|
+
|
|
106
|
+
def test_verify_hash_true_raises_missing_hash_on_unhashed(self, tmp_path):
|
|
107
|
+
"""Cell C on xarray: open a hashless file with
|
|
108
|
+
``verify_hash=True`` and the first read raises
|
|
109
|
+
``MissingHashError`` from the underlying
|
|
110
|
+
:mod:`tensogram` bindings."""
|
|
111
|
+
ds = xr.open_dataset(
|
|
112
|
+
_build_unhashed_message(tmp_path),
|
|
113
|
+
engine="tensogram",
|
|
114
|
+
verify_hash=True,
|
|
115
|
+
# Force the full-decode path so the verification fires.
|
|
116
|
+
range_threshold=0.0,
|
|
117
|
+
)
|
|
118
|
+
try:
|
|
119
|
+
with pytest.raises(tensogram.MissingHashError) as excinfo:
|
|
120
|
+
_ = np.asarray(ds[next(iter(ds.data_vars))].values)
|
|
121
|
+
assert excinfo.value.object_index == 0
|
|
122
|
+
finally:
|
|
123
|
+
ds.close()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tensogram_xarray-0.20.0 → tensogram_xarray-0.21.0}/tests/test_issue_67_descriptor_name_fallback.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|