views-frames 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
views_frames/index.py ADDED
@@ -0,0 +1,309 @@
1
+ """`SpatioTemporalIndex` — the genuinely-reused alignment primitive.
2
+
3
+ `{time, unit, level}` integer arrays plus **same-level** pure-numpy alignment
4
+ (intersect / reindex / is_superset_of / argsort / searchsorted). Cross-level
5
+ (cm↔pgm) alignment is exposed via `cross_level_align`, whose mapping is
6
+ **injected by the consumer** and never embedded or fetched here (ADR-014,
7
+ register C-14).
8
+
9
+ The same-level join is the pure-numpy unwrap of the proven
10
+ `pd.Index.get_indexer` pattern in `views-faoapi/.../data/handlers.py`.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from collections.abc import Mapping
16
+
17
+ import numpy as np
18
+ from numpy.typing import NDArray
19
+
20
+ from views_frames._typing import IntArray
21
+ from views_frames._validation import validate_identifiers
22
+ from views_frames.spatial_level import SpatialLevel
23
+
24
+
25
+ class SpatioTemporalIndex:
26
+ """An immutable ``{time, unit, level}`` row index with same-level alignment.
27
+
28
+ **Row-uniqueness stance (register C-21).** A frame *may* contain duplicate
29
+ ``(time, unit)`` rows — ``cross_level_align`` deliberately produces them (many
30
+ pgm cells map to one country, to be summed by ``aggregate_distributions``), so
31
+ uniqueness is **not** a global invariant and is **not** validated at
32
+ construction. The **same-level joins** (``searchsorted``/``reindex``/
33
+ ``intersect``/``is_superset_of``), however, assume one row per ``(time, unit)``
34
+ and give undefined results on duplicates. A consumer that needs that guarantee
35
+ should check :meth:`has_unique_rows` before joining; the default path stays
36
+ allocation-free.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ time: IntArray,
42
+ unit: IntArray,
43
+ level: SpatialLevel,
44
+ ) -> None:
45
+ if not isinstance(level, SpatialLevel):
46
+ raise TypeError(
47
+ f"level must be a SpatialLevel, got {type(level).__name__}"
48
+ )
49
+ is_array = isinstance(time, np.ndarray) and time.ndim >= 1
50
+ n = int(time.shape[0]) if is_array else -1
51
+ validate_identifiers({"time": time, "unit": unit}, n_rows=n)
52
+ # store as read-only views so the value object cannot be mutated in place
53
+ self._time = np.ascontiguousarray(time)
54
+ self._unit = np.ascontiguousarray(unit)
55
+ self._time.setflags(write=False)
56
+ self._unit.setflags(write=False)
57
+ self._level = level
58
+
59
+ # ---- core surface -------------------------------------------------------
60
+
61
+ @property
62
+ def time(self) -> IntArray:
63
+ """The time identifier array (read-only)."""
64
+ return self._time
65
+
66
+ @property
67
+ def unit(self) -> IntArray:
68
+ """The unit identifier array (read-only)."""
69
+ return self._unit
70
+
71
+ @property
72
+ def level(self) -> SpatialLevel:
73
+ """The spatial level (cm/pgm) of these rows."""
74
+ return self._level
75
+
76
+ @property
77
+ def n_rows(self) -> int:
78
+ """Number of rows (the first axis length)."""
79
+ return int(self._time.shape[0])
80
+
81
+ @property
82
+ def identifiers(self) -> dict[str, IntArray]:
83
+ """The integer identifier arrays, keyed by name."""
84
+ return {"time": self._time, "unit": self._unit}
85
+
86
+ def __len__(self) -> int:
87
+ return self.n_rows
88
+
89
+ def __eq__(self, other: object) -> bool:
90
+ if not isinstance(other, SpatioTemporalIndex):
91
+ return NotImplemented
92
+ return (
93
+ self._level == other._level
94
+ and np.array_equal(self._time, other._time)
95
+ and np.array_equal(self._unit, other._unit)
96
+ )
97
+
98
+ def __hash__(self) -> int: # value objects are immutable; hash by identity surface
99
+ return hash((self._level, self._time.tobytes(), self._unit.tobytes()))
100
+
101
+ # ---- internal key representation ---------------------------------------
102
+
103
+ def _keys(self) -> NDArray[np.int64]:
104
+ """A contiguous ``(N, 2)`` int64 ``(time, unit)`` key array."""
105
+ return np.ascontiguousarray(
106
+ np.stack([self._time.astype(np.int64), self._unit.astype(np.int64)], axis=1)
107
+ )
108
+
109
+ @staticmethod
110
+ def _row_view(keys: NDArray[np.int64]) -> NDArray[np.void]:
111
+ """View each ``(time, unit)`` row as a single void scalar for set ops."""
112
+ return np.ascontiguousarray(keys).view(
113
+ np.dtype((np.void, keys.dtype.itemsize * keys.shape[1]))
114
+ ).reshape(-1)
115
+
116
+ def _require_same_level(self, other: SpatioTemporalIndex) -> None:
117
+ if self._level != other._level:
118
+ raise ValueError(
119
+ "same-level operation requires equal SpatialLevel; "
120
+ f"got {self._level} and {other._level}. Use cross_level_align."
121
+ )
122
+
123
+ # ---- same-level alignment ----------------------------------------------
124
+
125
+ def argsort(self) -> NDArray[np.intp]:
126
+ """Positions that sort the rows by ``(time, unit)`` (time-major)."""
127
+ return np.asarray(np.lexsort((self._unit, self._time)), dtype=np.intp)
128
+
129
+ def searchsorted(self, other: SpatioTemporalIndex) -> NDArray[np.intp]:
130
+ """For each row of ``other``, its position in ``self`` (-1 if absent).
131
+
132
+ The pure-numpy analogue of ``pd.Index.get_indexer``: a same-level join.
133
+ """
134
+ self._require_same_level(other)
135
+ self_rows = self._row_view(self._keys())
136
+ other_rows = self._row_view(other._keys())
137
+ order = np.argsort(self_rows, kind="stable")
138
+ sorted_rows = self_rows[order]
139
+ pos = np.searchsorted(sorted_rows, other_rows)
140
+ pos = np.clip(pos, 0, len(sorted_rows) - 1)
141
+ found = sorted_rows[pos] == other_rows
142
+ result = np.where(found, order[pos], -1)
143
+ return result.astype(np.intp)
144
+
145
+ def reindex(self, other: SpatioTemporalIndex) -> NDArray[np.intp]:
146
+ """Alias of :meth:`searchsorted` — positions to align ``self`` to ``other``."""
147
+ return self.searchsorted(other)
148
+
149
+ def is_superset_of(self, other: SpatioTemporalIndex) -> bool:
150
+ """True iff every row of ``other`` is present in ``self`` (same level)."""
151
+ self._require_same_level(other)
152
+ self_rows = self._row_view(self._keys())
153
+ other_rows = self._row_view(other._keys())
154
+ return bool(np.isin(other_rows, self_rows).all())
155
+
156
+ def intersect(self, other: SpatioTemporalIndex) -> SpatioTemporalIndex:
157
+ """A new index of the rows present in **both** ``self`` and ``other``."""
158
+ self._require_same_level(other)
159
+ common = np.intersect1d(
160
+ self._row_view(self._keys()), self._row_view(other._keys())
161
+ )
162
+ keys = common.view(np.int64).reshape(-1, 2)
163
+ return SpatioTemporalIndex(
164
+ time=keys[:, 0].copy(), unit=keys[:, 1].copy(), level=self._level
165
+ )
166
+
167
+ def has_unique_rows(self) -> bool:
168
+ """True iff every ``(time, unit)`` row is unique (register C-21).
169
+
170
+ Duplicates are **allowed** in a frame, but the same-level joins
171
+ (``searchsorted``/``reindex``/``intersect``/``is_superset_of``) assume
172
+ uniqueness. Call this before joining if the caller cannot otherwise
173
+ guarantee it. ``O(n log n)``; not run by default.
174
+ """
175
+ rows = self._row_view(self._keys())
176
+ return bool(len(np.unique(rows)) == rows.shape[0])
177
+
178
+ def select(
179
+ self, indexer: IntArray | NDArray[np.bool_]
180
+ ) -> SpatioTemporalIndex:
181
+ """A new index of the rows at integer positions **or** a boolean mask.
182
+
183
+ The row-selection primitive the frame-level ``select``/``reindex`` build on:
184
+ ``indexer`` is applied to ``time`` and ``unit`` by numpy fancy indexing
185
+ (so an integer position array reorders/repeats, a boolean mask filters).
186
+ """
187
+ return SpatioTemporalIndex(
188
+ time=self._time[indexer], unit=self._unit[indexer], level=self._level
189
+ )
190
+
191
+ # ---- cross-level alignment (ADR-014) -----------------------------------
192
+
193
+ def cross_level_align(
194
+ self,
195
+ mapping: Mapping[tuple[int, int], int],
196
+ target_level: SpatialLevel,
197
+ ) -> SpatioTemporalIndex:
198
+ """Remap each row's ``unit`` to ``target_level`` using an injected mapping.
199
+
200
+ The cross-level (cm↔pgm) join needs an external, **time-varying**
201
+ ``(time, unit) -> target_unit`` mapping (e.g. ``(month_id, priogrid_id) ->
202
+ country_id``): a cell's country assignment changes by month, so the key is
203
+ ``(time, unit)``, not ``unit`` alone (ADR-014; register C-20). The leaf owns
204
+ this **operation**; the **mapping is supplied by the caller** and is never
205
+ embedded or fetched here. Time is preserved.
206
+
207
+ The remap is vectorized — ``(time, unit)`` keys are viewed as void scalars
208
+ and matched with a single ``searchsorted`` against the sorted mapping keys —
209
+ so it scales to the full grid (no per-row Python loop; register C-22).
210
+
211
+ Args:
212
+ mapping: A ``{(time, unit): target_unit}`` mapping injected by the
213
+ consumer, keyed by the ``(time, unit)`` pair.
214
+ target_level: The ``SpatialLevel`` of the produced index.
215
+
216
+ Raises:
217
+ ValueError: ``mapping`` is missing/empty, is not keyed by ``(time,
218
+ unit)`` pairs, or a row's ``(time, unit)`` has no entry in
219
+ ``mapping`` (the leaf never guesses a mapping).
220
+ TypeError: ``target_level`` is not a ``SpatialLevel``.
221
+ """
222
+ if not isinstance(target_level, SpatialLevel):
223
+ got = type(target_level).__name__
224
+ raise TypeError(f"target_level must be a SpatialLevel, got {got}")
225
+ if mapping is None or len(mapping) == 0:
226
+ raise ValueError(
227
+ "cross_level_align requires an injected (time, unit)->target_unit "
228
+ "mapping; the leaf never embeds or fetches it (ADR-014)."
229
+ )
230
+ map_keys = np.array(list(mapping.keys()), dtype=np.int64)
231
+ if map_keys.ndim != 2 or map_keys.shape[1] != 2:
232
+ raise ValueError(
233
+ "cross_level_align mapping must be keyed by (time, unit) pairs "
234
+ "(register C-20); got keys that are not 2-tuples."
235
+ )
236
+ map_vals = np.array(list(mapping.values()), dtype=self._unit.dtype)
237
+ return self._remap(map_keys, map_vals, target_level)
238
+
239
+ def cross_level_align_arrays(
240
+ self,
241
+ map_keys: IntArray,
242
+ map_vals: IntArray,
243
+ target_level: SpatialLevel,
244
+ ) -> SpatioTemporalIndex:
245
+ """Columnar form of :meth:`cross_level_align` for grid-scale mappings.
246
+
247
+ Identical semantics, but the ``(time, unit) -> target_unit`` mapping is
248
+ injected as **parallel arrays** — ``map_keys`` of shape ``(M, 2)`` and
249
+ ``map_vals`` of shape ``(M,)`` — rather than a Python ``dict``. At full-grid
250
+ scale building and materializing a ~10.5M-key dict is the dominant cost
251
+ (~30× slower, ~10× the memory of the columnar form; register C-26); a
252
+ producer that already holds the mapping columnar passes it straight through.
253
+
254
+ Raises:
255
+ ValueError: ``map_keys`` is not ``(M, 2)``, ``map_vals`` is not length
256
+ ``M``, the mapping is empty, or a row's ``(time, unit)`` is absent.
257
+ TypeError: ``target_level`` is not a ``SpatialLevel``.
258
+ """
259
+ if not isinstance(target_level, SpatialLevel):
260
+ got = type(target_level).__name__
261
+ raise TypeError(f"target_level must be a SpatialLevel, got {got}")
262
+ keys = np.ascontiguousarray(map_keys, dtype=np.int64)
263
+ vals = np.asarray(map_vals)
264
+ if keys.ndim != 2 or keys.shape[1] != 2:
265
+ raise ValueError(
266
+ "cross_level_align_arrays map_keys must be an (M, 2) array of "
267
+ "(time, unit) rows (register C-20/C-26)."
268
+ )
269
+ if vals.shape != (keys.shape[0],):
270
+ raise ValueError(
271
+ "cross_level_align_arrays map_vals must be a length-M array "
272
+ "aligned to map_keys."
273
+ )
274
+ if keys.shape[0] == 0:
275
+ raise ValueError(
276
+ "cross_level_align_arrays requires a non-empty mapping; the leaf "
277
+ "never embeds or fetches it (ADR-014)."
278
+ )
279
+ return self._remap(keys, vals, target_level)
280
+
281
+ def _remap(
282
+ self, map_keys: IntArray, map_vals: IntArray, target_level: SpatialLevel
283
+ ) -> SpatioTemporalIndex:
284
+ """The vectorized ``(time, unit) -> target`` remap shared by both entries.
285
+
286
+ ``map_keys`` is coerced to a contiguous int64 ``(M, 2)`` so the void-view
287
+ keys match ``self``'s; a single ``searchsorted`` does the lookup; a missing
288
+ ``(time, unit)`` fails loud.
289
+ """
290
+ keys = np.ascontiguousarray(map_keys, dtype=np.int64)
291
+ map_rows = self._row_view(keys)
292
+ order = np.argsort(map_rows, kind="stable")
293
+ sorted_rows = map_rows[order]
294
+
295
+ self_rows = self._row_view(self._keys())
296
+ pos = np.clip(
297
+ np.searchsorted(sorted_rows, self_rows), 0, len(sorted_rows) - 1
298
+ )
299
+ found = sorted_rows[pos] == self_rows
300
+ if not bool(found.all()):
301
+ miss = int(np.argmax(~found))
302
+ t, u = int(self._time[miss]), int(self._unit[miss])
303
+ raise ValueError(
304
+ f"(time, unit) ({t}, {u}) has no entry in the injected mapping"
305
+ )
306
+ mapped = np.asarray(map_vals[order[pos]], dtype=self._unit.dtype)
307
+ return SpatioTemporalIndex(
308
+ time=self._time.copy(), unit=mapped, level=target_level
309
+ )
@@ -0,0 +1,13 @@
1
+ """Serialization adapters — frame ↔ bytes *format*, never *transport* (ADR-009).
2
+
3
+ Two scalable formats; list-in-cell object-dtype is banned (README §7):
4
+
5
+ - `npz` — native ``values.npy`` + ``identifiers.npz`` (mmap-capable).
6
+ - `arrow` — flat-columnar parquet (the scalable interchange format).
7
+
8
+ `pyarrow` is imported only inside this subpackage, never in the core frames.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ __all__: list[str] = []
@@ -0,0 +1,103 @@
1
+ """Flat-columnar (parquet) serialization — the scalable interchange format.
2
+
3
+ One scalar cell per ``(time, unit, sample)`` (features become columns for a 3-D
4
+ feature frame); the scalable replacement for the banned list-in-cell encoding
5
+ (README §7). This is the **only** module permitted to import ``pyarrow`` (the
6
+ optional ``[arrow]`` extra). Operates on a frame's state dict (register C-09).
7
+
8
+ The reconstruction shape (``n_features`` / ``n_samples``) and the header (level,
9
+ metadata, feature_names) ride in the parquet schema key-value metadata so the
10
+ round-trip is exact.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ import numpy as np
20
+ import pyarrow as pa
21
+ import pyarrow.parquet as pq
22
+ from numpy.typing import NDArray
23
+
24
+ from views_frames._typing import IntArray
25
+
26
+
27
+ def save(
28
+ path: Path | str,
29
+ *,
30
+ values: NDArray[np.float32],
31
+ time: IntArray,
32
+ unit: IntArray,
33
+ level: str,
34
+ metadata: dict[str, Any],
35
+ feature_names: list[str] | None = None,
36
+ ) -> None:
37
+ """Write a frame's state as flat-columnar parquet (one scalar cell per row)."""
38
+ if values.ndim == 2:
39
+ n, s = values.shape
40
+ n_features = 0
41
+ elif values.ndim == 3:
42
+ n, n_features, s = values.shape
43
+ else:
44
+ raise ValueError(f"unsupported values.ndim={values.ndim}")
45
+
46
+ time_col = np.repeat(time, s)
47
+ unit_col = np.repeat(unit, s)
48
+ sample_col = np.tile(np.arange(s, dtype=np.int32), n)
49
+ columns: dict[str, NDArray[Any]] = {
50
+ "time": time_col,
51
+ "unit": unit_col,
52
+ "sample": sample_col,
53
+ }
54
+ if values.ndim == 2:
55
+ columns["value"] = values.reshape(n * s)
56
+ else:
57
+ for f in range(n_features):
58
+ columns[f"f{f}"] = np.ascontiguousarray(values[:, f, :]).reshape(n * s)
59
+
60
+ header = {
61
+ "level": level,
62
+ "metadata": metadata,
63
+ "feature_names": feature_names,
64
+ "n_features": n_features,
65
+ "n_samples": s,
66
+ "ndim": int(values.ndim),
67
+ }
68
+ table = pa.table(columns)
69
+ table = table.replace_schema_metadata({"views_frames": json.dumps(header)})
70
+ pq.write_table(table, str(path))
71
+
72
+
73
+ def load(path: Path | str) -> dict[str, Any]:
74
+ """Read a flat-columnar parquet frame state written by :func:`save`."""
75
+ table = pq.read_table(str(path))
76
+ raw = table.schema.metadata or {}
77
+ header = json.loads(raw[b"views_frames"].decode())
78
+ s = int(header["n_samples"])
79
+ ndim = int(header["ndim"])
80
+
81
+ time_col = table.column("time").to_numpy()
82
+ unit_col = table.column("unit").to_numpy()
83
+ n = time_col.shape[0] // s
84
+ time = time_col.reshape(n, s)[:, 0]
85
+ unit = unit_col.reshape(n, s)[:, 0]
86
+
87
+ if ndim == 2:
88
+ values = table.column("value").to_numpy().reshape(n, s).astype(np.float32)
89
+ else:
90
+ n_features = int(header["n_features"])
91
+ stacked = [
92
+ table.column(f"f{f}").to_numpy().reshape(n, s) for f in range(n_features)
93
+ ]
94
+ values = np.stack(stacked, axis=1).astype(np.float32)
95
+
96
+ return {
97
+ "values": values,
98
+ "time": np.ascontiguousarray(time),
99
+ "unit": np.ascontiguousarray(unit),
100
+ "level": header["level"],
101
+ "metadata": header.get("metadata", {}),
102
+ "feature_names": header.get("feature_names"),
103
+ }
views_frames/io/npz.py ADDED
@@ -0,0 +1,59 @@
1
+ """Native serialization: ``values.npy`` + ``identifiers.npz`` (+ JSON header).
2
+
3
+ Operates on a frame's **state dict** — it carries no per-frame schema (register
4
+ C-09); each frame maps its fields to/from the state. The ``mmap`` path returns a
5
+ read-only memmap and preserves the subclass so peak RAM stays the working set
6
+ (register C-07, README §7) — the proven ``PredictionFrame`` idiom.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Any, Literal
14
+
15
+ import numpy as np
16
+ from numpy.typing import NDArray
17
+
18
+ from views_frames._typing import IntArray
19
+
20
+
21
+ def save(
22
+ directory: Path | str,
23
+ *,
24
+ values: NDArray[np.float32],
25
+ time: IntArray,
26
+ unit: IntArray,
27
+ level: str,
28
+ metadata: dict[str, Any],
29
+ feature_names: list[str] | None = None,
30
+ ) -> None:
31
+ """Write a frame's state (npy values + npz identifiers + json header)."""
32
+ directory = Path(directory)
33
+ directory.mkdir(parents=True, exist_ok=True)
34
+ np.save(directory / "values.npy", values)
35
+ np.savez(directory / "identifiers.npz", time=time, unit=unit)
36
+ header: dict[str, Any] = {"level": level, "metadata": metadata}
37
+ if feature_names is not None:
38
+ header["feature_names"] = feature_names
39
+ payload = json.dumps(header, sort_keys=True, default=str)
40
+ (directory / "header.json").write_text(payload)
41
+
42
+
43
+ def load(directory: Path | str, *, mmap: bool = False) -> dict[str, Any]:
44
+ """Read a frame's state; ``mmap=True`` returns ``values`` as a read-only memmap."""
45
+ directory = Path(directory)
46
+ mmap_mode: Literal["r"] | None = "r" if mmap else None
47
+ values = np.load(directory / "values.npy", mmap_mode=mmap_mode)
48
+ with np.load(directory / "identifiers.npz") as npz:
49
+ time = npz["time"]
50
+ unit = npz["unit"]
51
+ header = json.loads((directory / "header.json").read_text())
52
+ return {
53
+ "values": values,
54
+ "time": time,
55
+ "unit": unit,
56
+ "level": header["level"],
57
+ "metadata": header.get("metadata", {}),
58
+ "feature_names": header.get("feature_names"),
59
+ }
@@ -0,0 +1,36 @@
1
+ """`FrameMetadata` — the typed, optional-extensible provenance header (ADR-013).
2
+
3
+ Not a free-form dict: a frozen dataclass with all-optional, validated fields, so
4
+ adding a field is a MINOR change and consumers cannot diverge on key names (the
5
+ store-side cause of reporting's C-48). It is the typed home for run/eval identity.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Mapping
11
+ from dataclasses import dataclass, fields
12
+ from typing import Any
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class FrameMetadata:
17
+ """Optional provenance carried by a frame. All fields default to ``None``."""
18
+
19
+ model: str | None = None
20
+ run_type: str | None = None
21
+ timestamp: int | None = None
22
+ seed: int | None = None
23
+
24
+ def to_dict(self) -> dict[str, Any]:
25
+ """Serialize to a plain dict, omitting unset (``None``) fields."""
26
+ return {
27
+ f.name: getattr(self, f.name)
28
+ for f in fields(self)
29
+ if getattr(self, f.name) is not None
30
+ }
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: Mapping[str, Any]) -> FrameMetadata:
34
+ """Reconstruct from a dict, ignoring unknown keys (forward-compatible)."""
35
+ known = {f.name for f in fields(cls)}
36
+ return cls(**{k: v for k, v in data.items() if k in known})
@@ -0,0 +1,143 @@
1
+ """`PredictionFrame` — model outputs (ŷ samples): ``y_pred (N, S)`` float32.
2
+
3
+ A sibling frame (no shared base; ADR-011 Option C). Relocated from
4
+ views-pipeline-core and rewritten **numpy-only** — the original imports pandas
5
+ (``pd.isna``); here identifier validation is the integer-dtype check in
6
+ ``_validation`` (register C-17). The sample axis is always explicit (`S >= 1`;
7
+ ADR-012).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+ import numpy as np
15
+ from numpy.typing import NDArray
16
+
17
+ from views_frames._typing import IntArray
18
+ from views_frames._validation import coerce_values, validate_values
19
+ from views_frames.index import SpatioTemporalIndex
20
+ from views_frames.io import npz
21
+ from views_frames.metadata import FrameMetadata
22
+ from views_frames.spatial_level import SpatialLevel
23
+
24
+
25
+ class PredictionFrame:
26
+ """Immutable model-output frame: ``(N, S)`` float32 + a spatiotemporal index."""
27
+
28
+ def __init__(
29
+ self,
30
+ y_pred: object,
31
+ index: SpatioTemporalIndex,
32
+ metadata: FrameMetadata | None = None,
33
+ ) -> None:
34
+ values = coerce_values(y_pred)
35
+ validate_values(values)
36
+ if values.ndim != 2:
37
+ raise ValueError(
38
+ f"PredictionFrame y_pred must be 2D (N, S), got ndim={values.ndim}"
39
+ )
40
+ if values.shape[0] != index.n_rows:
41
+ raise ValueError(
42
+ f"y_pred has {values.shape[0]} rows but index has {index.n_rows}"
43
+ )
44
+ self._values = values
45
+ self._index = index
46
+ self._metadata = metadata if metadata is not None else FrameMetadata()
47
+
48
+ # ---- core surface -------------------------------------------------------
49
+
50
+ @property
51
+ def values(self) -> NDArray[np.float32]:
52
+ """The ``(N, S)`` float32 value array."""
53
+ return self._values
54
+
55
+ @property
56
+ def index(self) -> SpatioTemporalIndex:
57
+ """The spatiotemporal row index."""
58
+ return self._index
59
+
60
+ @property
61
+ def metadata(self) -> FrameMetadata:
62
+ """The typed provenance header."""
63
+ return self._metadata
64
+
65
+ @property
66
+ def n_rows(self) -> int:
67
+ """Number of rows ``N``."""
68
+ return int(self._values.shape[0])
69
+
70
+ @property
71
+ def identifiers(self) -> dict[str, IntArray]:
72
+ """The integer identifier arrays from the index."""
73
+ return self._index.identifiers
74
+
75
+ @property
76
+ def sample_count(self) -> int:
77
+ """Size of the trailing sample axis ``S``."""
78
+ return int(self._values.shape[-1])
79
+
80
+ @property
81
+ def is_sample(self) -> bool:
82
+ """True iff ``sample_count > 1``."""
83
+ return self.sample_count > 1
84
+
85
+ # ---- operations ---------------------------------------------------------
86
+
87
+ def with_metadata(self, metadata: FrameMetadata) -> PredictionFrame:
88
+ """Return a new frame with replaced metadata, **sharing** the values buffer."""
89
+ new = PredictionFrame.__new__(PredictionFrame)
90
+ new._values = self._values
91
+ new._index = self._index
92
+ new._metadata = metadata
93
+ return new
94
+
95
+ def select(self, indexer: IntArray | NDArray[np.bool_]) -> PredictionFrame:
96
+ """A new frame of the rows at integer positions **or** a boolean mask.
97
+
98
+ Rows are selected by numpy fancy indexing — an integer array reorders or
99
+ repeats, a boolean mask filters. Metadata is preserved; the selection
100
+ **copies** (the result does not share the buffer). An empty selection
101
+ yields an empty frame.
102
+ """
103
+ return PredictionFrame(
104
+ self._values[indexer], self._index.select(indexer), self._metadata
105
+ )
106
+
107
+ def reindex(self, other: SpatioTemporalIndex) -> PredictionFrame:
108
+ """Align this frame to ``other``'s rows, returning a new frame.
109
+
110
+ Fails loud unless this frame's index is a **superset** of ``other`` (so
111
+ every target row is present). The frame-level companion to the index's
112
+ ``reindex``/``searchsorted``, which return positions rather than a frame.
113
+ """
114
+ if not self._index.is_superset_of(other):
115
+ raise ValueError(
116
+ "reindex requires this frame's index to be a superset of `other`; "
117
+ "some target rows are absent"
118
+ )
119
+ return self.select(self._index.searchsorted(other))
120
+
121
+ # ---- persistence --------------------------------------------------------
122
+
123
+ def save(self, directory: Path | str) -> None:
124
+ """Serialize to ``directory`` (npy + npz + header)."""
125
+ npz.save(
126
+ directory,
127
+ values=self._values,
128
+ time=self._index.time,
129
+ unit=self._index.unit,
130
+ level=self._index.level.value,
131
+ metadata=self._metadata.to_dict(),
132
+ )
133
+
134
+ @classmethod
135
+ def load(cls, directory: Path | str, mmap: bool = False) -> PredictionFrame:
136
+ """Deserialize a frame from ``directory``; ``mmap`` propagates."""
137
+ state = npz.load(directory, mmap=mmap)
138
+ index = SpatioTemporalIndex(
139
+ time=state["time"],
140
+ unit=state["unit"],
141
+ level=SpatialLevel(state["level"]),
142
+ )
143
+ return cls(state["values"], index, FrameMetadata.from_dict(state["metadata"]))