tensogram-xarray 0.21.0__tar.gz → 0.22.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/.gitignore +7 -0
  2. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/PKG-INFO +2 -2
  3. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/pyproject.toml +2 -2
  4. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_coverage.py +268 -0
  5. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/README.md +0 -0
  6. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/__init__.py +0 -0
  7. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/array.py +0 -0
  8. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/backend.py +0 -0
  9. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/coords.py +0 -0
  10. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/mapping.py +0 -0
  11. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/merge.py +0 -0
  12. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/scanner.py +0 -0
  13. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/src/tensogram_xarray/store.py +0 -0
  14. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/__init__.py +0 -0
  15. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/conftest.py +0 -0
  16. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_array.py +0 -0
  17. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_backend.py +0 -0
  18. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_coords.py +0 -0
  19. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_edge_cases.py +0 -0
  20. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_issue_67_descriptor_name_fallback.py +0 -0
  21. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_mapping.py +0 -0
  22. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_merge.py +0 -0
  23. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_nd_range.py +0 -0
  24. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_remote.py +0 -0
  25. {tensogram_xarray-0.21.0 → tensogram_xarray-0.22.0}/tests/test_verify_hash.py +0 -0
@@ -10,6 +10,8 @@
10
10
  **/build/
11
11
  docs/book/
12
12
  python/**/dist/
13
+ # Release wheel staging from `make python-dist-extras` / `make release-check`
14
+ /dist/
13
15
  python/bindings/Cargo.lock
14
16
  rust/tensogram-grib/Cargo.lock
15
17
  rust/tensogram-netcdf/Cargo.lock
@@ -18,6 +20,11 @@ rust/tensogram-wasm/Cargo.lock
18
20
  .venv/
19
21
  **/__pycache__/
20
22
  *.pyc
23
+
24
+ # cargo-mutants working directory + per-run logs
25
+ mutants.out/
26
+ mutants.out.*/
27
+ mutants*.log
21
28
  python/bindings/python/tensogram/tensogram*.so
22
29
  # TODO do we want to have uv.locks ignored?
23
30
  **/uv.lock
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tensogram-xarray
3
- Version: 0.21.0
3
+ Version: 0.22.0
4
4
  Summary: xarray backend engine for tensogram .tgm files
5
5
  Project-URL: Homepage, https://sites.ecmwf.int/docs/tensogram/main
6
6
  Project-URL: Repository, https://github.com/ecmwf/tensogram
@@ -14,7 +14,7 @@ Classifier: Topic :: Scientific/Engineering
14
14
  Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
15
15
  Requires-Python: >=3.11
16
16
  Requires-Dist: numpy
17
- Requires-Dist: tensogram<0.22,>=0.21.0
17
+ Requires-Dist: tensogram<0.23,>=0.22.0
18
18
  Requires-Dist: xarray>=2022.06
19
19
  Provides-Extra: dask
20
20
  Requires-Dist: dask[array]; extra == 'dask'
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tensogram-xarray"
7
- version = "0.21.0"
7
+ version = "0.22.0"
8
8
  description = "xarray backend engine for tensogram .tgm files"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -18,7 +18,7 @@ classifiers = [
18
18
  "Topic :: Scientific/Engineering :: Atmospheric Science",
19
19
  ]
20
20
  dependencies = [
21
- "tensogram>=0.21.0,<0.22",
21
+ "tensogram>=0.22.0,<0.23",
22
22
  "xarray>=2022.06",
23
23
  "numpy",
24
24
  ]
@@ -1782,3 +1782,271 @@ class TestMergePathDimResolution:
1782
1782
  "merge path must not misread message-level _extra_['dim_names'] as a per-object hint"
1783
1783
  )
1784
1784
  assert datasets, "expected at least one Dataset despite inconsistent hints"
1785
+
1786
+
1787
+ # ---------------------------------------------------------------------------
1788
+ # Coverage pass 4: remaining audit gaps (deterministic, no-network)
1789
+ # ---------------------------------------------------------------------------
1790
+
1791
+
1792
+ def _serve_tgm_bytes(msg: bytes):
1793
+ """Serve raw .tgm *msg* bytes over a local loopback HTTP server.
1794
+
1795
+ Returns ``(url, shutdown)``. ``shutdown`` must be called to stop the
1796
+ server. Supports Range requests so the tensogram object-store backend
1797
+ can byte-range fetch descriptors/objects.
1798
+ """
1799
+ import http.server
1800
+ import threading
1801
+
1802
+ class Handler(http.server.BaseHTTPRequestHandler):
1803
+ def log_message(self, fmt, *args):
1804
+ pass
1805
+
1806
+ def do_HEAD(self):
1807
+ self.send_response(200)
1808
+ self.send_header("Content-Length", str(len(msg)))
1809
+ self.send_header("Accept-Ranges", "bytes")
1810
+ self.end_headers()
1811
+
1812
+ def do_GET(self):
1813
+ range_header = self.headers.get("Range")
1814
+ if range_header and range_header.startswith("bytes="):
1815
+ spec = range_header[6:]
1816
+ if spec.startswith("-"):
1817
+ n = int(spec[1:])
1818
+ s, e = max(0, len(msg) - n), len(msg)
1819
+ else:
1820
+ parts = spec.split("-")
1821
+ s = int(parts[0])
1822
+ e = int(parts[1]) + 1 if parts[1] else len(msg)
1823
+ e = min(e, len(msg))
1824
+ if s >= len(msg):
1825
+ self.send_response(416)
1826
+ self.end_headers()
1827
+ return
1828
+ chunk = msg[s:e]
1829
+ self.send_response(206)
1830
+ self.send_header("Content-Range", f"bytes {s}-{e - 1}/{len(msg)}")
1831
+ self.send_header("Content-Length", str(len(chunk)))
1832
+ self.end_headers()
1833
+ self.wfile.write(chunk)
1834
+ else:
1835
+ self.send_response(200)
1836
+ self.send_header("Content-Length", str(len(msg)))
1837
+ self.end_headers()
1838
+ self.wfile.write(msg)
1839
+
1840
+ server = http.server.ThreadingHTTPServer(("127.0.0.1", 0), Handler)
1841
+ port = server.server_address[1]
1842
+ thread = threading.Thread(target=server.serve_forever, daemon=True)
1843
+ thread.start()
1844
+
1845
+ def shutdown():
1846
+ server.shutdown()
1847
+ server.server_close()
1848
+ thread.join(timeout=5)
1849
+
1850
+ return f"http://127.0.0.1:{port}/test.tgm", shutdown
1851
+
1852
+
1853
+ class TestArrayGetFile:
1854
+ """Cover array.py ``_get_file`` branches."""
1855
+
1856
+ def test_get_file_returns_shared(self):
1857
+ """``_get_file`` short-circuits to the shared handle."""
1858
+ from tensogram_xarray.array import TensogramBackendArray
1859
+
1860
+ sentinel = object()
1861
+ ba = TensogramBackendArray(
1862
+ "/nowhere.tgm",
1863
+ 0,
1864
+ 0,
1865
+ (2,),
1866
+ np.dtype("float32"),
1867
+ supports_range=False,
1868
+ shared_file=sentinel,
1869
+ )
1870
+ assert ba._get_file() is sentinel
1871
+
1872
+ def test_get_file_remote_opens_remote(self):
1873
+ """``_get_file`` calls ``open_remote`` for a remote URL."""
1874
+ from unittest.mock import patch
1875
+
1876
+ from tensogram_xarray.array import TensogramBackendArray
1877
+
1878
+ with (
1879
+ patch("tensogram.is_remote_url", return_value=True),
1880
+ patch("tensogram.TensogramFile") as mock_file,
1881
+ ):
1882
+ ba = TensogramBackendArray(
1883
+ "s3://bucket/x.tgm",
1884
+ 0,
1885
+ 0,
1886
+ (2,),
1887
+ np.dtype("float32"),
1888
+ supports_range=False,
1889
+ storage_options={"region": "eu"},
1890
+ )
1891
+ ba._get_file()
1892
+ mock_file.open_remote.assert_called_once_with("s3://bucket/x.tgm", {"region": "eu"})
1893
+
1894
+
1895
+ class TestArrayDecodeRangeFallbackReal:
1896
+ """Cover array.py decode_range exception fallback."""
1897
+
1898
+ def test_file_decode_range_failure_falls_back(self, tmp_path: Path):
1899
+ """When ``f.file_decode_range`` raises, full decode is used instead."""
1900
+ from tensogram_xarray.array import TensogramBackendArray
1901
+
1902
+ data = np.arange(12, dtype=np.float32).reshape(3, 4)
1903
+ path = str(tmp_path / "range_fail.tgm")
1904
+ with tensogram.TensogramFile.create(path) as f:
1905
+ f.append({"version": 3}, [(_desc([3, 4]), data)])
1906
+
1907
+ class _FailingRange:
1908
+ """Wraps a real file but raises in the range-decode fast path."""
1909
+
1910
+ def __init__(self, inner):
1911
+ self._inner = inner
1912
+
1913
+ def file_decode_range(self, *args, **kwargs):
1914
+ raise RuntimeError("range decode unavailable")
1915
+
1916
+ def read_message(self, *args, **kwargs):
1917
+ return self._inner.read_message(*args, **kwargs)
1918
+
1919
+ with tensogram.TensogramFile.open(path) as inner:
1920
+ wrapper = _FailingRange(inner)
1921
+ ba = TensogramBackendArray(
1922
+ path,
1923
+ 0,
1924
+ 0,
1925
+ (3, 4),
1926
+ np.dtype("float32"),
1927
+ supports_range=True,
1928
+ range_threshold=1.0,
1929
+ shared_file=wrapper,
1930
+ )
1931
+ result = ba._raw_indexing_method((slice(0, 1), slice(0, 2)))
1932
+ np.testing.assert_array_equal(result, data[0:1, 0:2])
1933
+
1934
+
1935
+ class TestExpandKeyIntBranch:
1936
+ """Cover array.py ``_expand_key_to_indices`` int branch."""
1937
+
1938
+ def test_integer_key_wrapped_in_list(self):
1939
+ from tensogram_xarray.array import _expand_key_to_indices
1940
+
1941
+ result = _expand_key_to_indices((3, slice(1, 3)), (5, 10))
1942
+ assert result[0] == [3]
1943
+ assert result[1] == [1, 2]
1944
+
1945
+
1946
+ class TestMappingExtraHintErrorPaths:
1947
+ """Cover mapping.py ``parse_extra_dim_names_hint`` error branches."""
1948
+
1949
+ def test_list_element_str_raises_returns_empty(self):
1950
+ """A list whose elements fail ``str()`` yields ``{}``."""
1951
+ from tensogram_xarray.mapping import parse_extra_dim_names_hint
1952
+
1953
+ class _Unstringable:
1954
+ def __str__(self):
1955
+ raise ValueError("cannot stringify")
1956
+
1957
+ assert parse_extra_dim_names_hint(2, [_Unstringable(), _Unstringable()]) == {}
1958
+
1959
+ def test_dict_non_int_key_returns_empty(self):
1960
+ """A dict with a non-integer key yields ``{}``."""
1961
+ from tensogram_xarray.mapping import parse_extra_dim_names_hint
1962
+
1963
+ assert parse_extra_dim_names_hint(2, {"not_an_int": "values"}) == {}
1964
+
1965
+
1966
+ class TestScannerRemote:
1967
+ """Cover scanner.py remote scan path."""
1968
+
1969
+ def test_scan_file_remote(self):
1970
+ """``scan_file`` opens remote URLs and decodes descriptors lazily."""
1971
+ data = np.arange(6, dtype=np.float32)
1972
+ msg = bytes(tensogram.encode({"version": 3, "source": "remote"}, [(_desc([6]), data)]))
1973
+ url, shutdown = _serve_tgm_bytes(msg)
1974
+ try:
1975
+ index = scan_file(url)
1976
+ finally:
1977
+ shutdown()
1978
+ assert len(index.objects) == 1
1979
+ assert index.objects[0].shape == (6,)
1980
+ assert index.objects[0].common_meta.get("source") == "remote"
1981
+
1982
+
1983
+ class TestMergeRemote:
1984
+ """Cover merge.py remote shared-file open and close."""
1985
+
1986
+ def test_open_datasets_remote_and_close(self):
1987
+ """``open_datasets`` opens a remote shared file and closes cleanly."""
1988
+ data = np.arange(6, dtype=np.float32)
1989
+ msg = bytes(tensogram.encode({"version": 3}, [(_desc([6]), data)]))
1990
+ url, shutdown = _serve_tgm_bytes(msg)
1991
+ try:
1992
+ datasets = open_datasets(url)
1993
+ assert len(datasets) >= 1
1994
+ var = next(iter(datasets[0].data_vars.values()))
1995
+ np.testing.assert_array_equal(var.values, data)
1996
+ # Closing triggers the remote `_close_shared` callback.
1997
+ for ds in datasets:
1998
+ ds.close()
1999
+ finally:
2000
+ shutdown()
2001
+
2002
+
2003
+ class TestPartitionKeysUnhashable:
2004
+ """Cover merge.py ``_partition_keys`` unhashable fallback."""
2005
+
2006
+ def test_unhashable_values_treated_as_constant(self):
2007
+ """Values that remain unhashable after ``_make_hashable`` -> constant."""
2008
+ # ``set`` passes through ``_make_hashable`` unchanged and a set of
2009
+ # sets raises TypeError, exercising the except branch.
2010
+ kv = {"flags": [{1, 2}, {3, 4}]}
2011
+ const, vary = _partition_keys(kv)
2012
+ assert "flags" in const
2013
+ assert const["flags"] == {1, 2}
2014
+ assert vary == {}
2015
+
2016
+
2017
+ class TestHypercubeMissingEntry:
2018
+ """Cover merge.py missing-hypercube-entry guards."""
2019
+
2020
+ def test_missing_entry_without_variable_key(self, tmp_path: Path):
2021
+ """A duplicate + a missing grid corner raises in ``_hypercube_dataset``."""
2022
+ path = str(tmp_path / "missing_entry.tgm")
2023
+ # 4 objects: (a,1),(a,2),(b,1),(b,1) -> 2x2 grid count matches (4) yet
2024
+ # (b,2) is absent and (b,1) is duplicated.
2025
+ combos = [("a", "1"), ("a", "2"), ("b", "1"), ("b", "1")]
2026
+ with tensogram.TensogramFile.create(path) as f:
2027
+ for k1, k2 in combos:
2028
+ f.append(
2029
+ {"version": 3, "base": [{"k1": k1, "k2": k2}]},
2030
+ [(_desc([2, 3]), np.ones((2, 3), dtype=np.float32))],
2031
+ )
2032
+ with pytest.raises(ValueError, match="hypercube has a missing entry"):
2033
+ open_datasets(path)
2034
+
2035
+ def test_missing_entry_with_variable_key(self, tmp_path: Path):
2036
+ """Same defect inside a ``variable_key`` sub-group raises."""
2037
+ path = str(tmp_path / "missing_entry_var.tgm")
2038
+ rows = [
2039
+ ("2t", "d1", "L1"),
2040
+ ("2t", "d1", "L2"),
2041
+ ("2t", "d2", "L1"),
2042
+ ("2t", "d2", "L1"), # duplicate; (d2, L2) missing
2043
+ ("10u", "d1", "L1"),
2044
+ ]
2045
+ with tensogram.TensogramFile.create(path) as f:
2046
+ for param, date, level in rows:
2047
+ f.append(
2048
+ {"version": 3, "base": [{"param": param, "date": date, "level": level}]},
2049
+ [(_desc([2, 3]), np.ones((2, 3), dtype=np.float32))],
2050
+ )
2051
+ with pytest.raises(ValueError, match="hypercube has a missing entry"):
2052
+ open_datasets(path, variable_key="param")