views-frames 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Simon Polichinel von der Maase
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,29 @@
1
+ """views_frames_summarize — posterior / sample-axis summarization over frames.
2
+
3
+ A sibling package to `views_frames` (ADR-017): it operates on frames and owns the
4
+ volatile statistics the leaf must not. Depends on `views_frames` + numpy only;
5
+ never the reverse (enforced by ``tests/test_import_enforcement.py``).
6
+
7
+ Conventions (ADR-017): point estimates (mean/median/MAP, generic ``collapse``)
8
+ return a `(N, …, 1)` **frame**; interval estimates (HDI, quantiles) return numpy
9
+ arrays **aligned to the input frame's index** (the caller holds the index).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from views_frames_summarize.aggregate import (
15
+ aggregate_distributions,
16
+ aggregate_distributions_arrays,
17
+ )
18
+ from views_frames_summarize.collapse import collapse
19
+ from views_frames_summarize.interval import hdi, quantiles
20
+ from views_frames_summarize.point import map_estimate
21
+
22
+ __all__ = [
23
+ "aggregate_distributions",
24
+ "aggregate_distributions_arrays",
25
+ "collapse",
26
+ "hdi",
27
+ "map_estimate",
28
+ "quantiles",
29
+ ]
@@ -0,0 +1,68 @@
1
+ """Shared helpers for the summarize package.
2
+
3
+ Rebuilds a frame of the same concrete type with new values, preserving the index
4
+ and metadata — the structural plumbing every reducer needs.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Callable
10
+
11
+ import numpy as np
12
+ from numpy.typing import NDArray
13
+
14
+ from views_frames import (
15
+ FeatureFrame,
16
+ PredictionFrame,
17
+ SpatioTemporalIndex,
18
+ TargetFrame,
19
+ )
20
+
21
+ AnyFrame = PredictionFrame | FeatureFrame | TargetFrame
22
+
23
+ # Default row-block size for the memory-bounded estimators. Blocking caps the peak
24
+ # memory of an estimator at ``O(block * …)`` regardless of row count, so the
25
+ # full-grid reduction path stays well under the #181 OOM (register C-22, C-25).
26
+ ROW_BLOCK = 1 << 16
27
+
28
+
29
+ def block_apply(
30
+ values: NDArray[np.float32],
31
+ block_rows: int,
32
+ fn: Callable[[NDArray[np.float32]], NDArray[np.float32]],
33
+ ) -> NDArray[np.float32]:
34
+ """Apply ``fn`` to row-blocks of ``values`` (over axis 0), concatenating results.
35
+
36
+ ``fn`` maps a ``(block, …)`` slice to a ``(block, …)`` result (same axis-0
37
+ length). Peak memory is bounded by one block's working set, not the whole grid.
38
+ Frames at or below ``block_rows`` rows take the single-shot path (no copy).
39
+ """
40
+ n = values.shape[0]
41
+ if n <= block_rows:
42
+ return fn(values)
43
+ parts = [
44
+ fn(values[start : start + block_rows]) for start in range(0, n, block_rows)
45
+ ]
46
+ return np.concatenate(parts, axis=0)
47
+
48
+
49
+ def rebuild(
50
+ frame: AnyFrame,
51
+ values: NDArray[np.float32],
52
+ index: SpatioTemporalIndex | None = None,
53
+ ) -> AnyFrame:
54
+ """Return a frame of the same type as ``frame`` with new ``values``.
55
+
56
+ The metadata (and, for `FeatureFrame`, `feature_names`) is preserved. The index
57
+ defaults to the input frame's; pass ``index`` to rebuild at a different index
58
+ (e.g. after cross-level aggregation). The new values are validated by the
59
+ frame's constructor.
60
+ """
61
+ idx = frame.index if index is None else index
62
+ if isinstance(frame, FeatureFrame):
63
+ return FeatureFrame(values, idx, frame.feature_names, frame.metadata)
64
+ if isinstance(frame, PredictionFrame):
65
+ return PredictionFrame(values, idx, frame.metadata)
66
+ if isinstance(frame, TargetFrame):
67
+ return TargetFrame(values, idx, frame.metadata)
68
+ raise TypeError(f"unsupported frame type: {type(frame).__name__}")
@@ -0,0 +1,83 @@
1
+ """Conservation-correct cross-level aggregation of sample distributions (ADR-017).
2
+
3
+ Sum the per-cell sample arrays across the cells of each coarser unit **preserving the
4
+ sample index** (joint sampling), so the aggregated uncertainty is correct —
5
+ ``HDI(sum) != sum(HDI)`` (the faoapi C-70 concern). The ``(time, unit) ->
6
+ target_unit`` mapping is **injected** by the caller (the same map the leaf's
7
+ ``cross_level_align`` takes — time-varying, register C-20); no geography is
8
+ embedded here.
9
+
10
+ The mapping may be a Python ``dict`` (``aggregate_distributions``) or parallel
11
+ arrays (``aggregate_distributions_arrays``) — the columnar form avoids building a
12
+ ~10.5M-key dict at grid scale (register C-26).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Mapping
18
+ from typing import Any
19
+
20
+ import numpy as np
21
+ from numpy.typing import NDArray
22
+
23
+ from views_frames import SpatialLevel, SpatioTemporalIndex
24
+ from views_frames_summarize._common import AnyFrame, rebuild
25
+
26
+
27
+ def aggregate_distributions(
28
+ frame: AnyFrame,
29
+ mapping: Mapping[tuple[int, int], int],
30
+ target_level: SpatialLevel,
31
+ ) -> AnyFrame:
32
+ """Aggregate a frame's sample distributions up to ``target_level``.
33
+
34
+ Rows are grouped by ``(time, target_unit)`` — where ``target_unit`` comes from the
35
+ injected ``(time, unit) -> target_unit`` ``mapping`` via the leaf's
36
+ ``cross_level_align`` — and the sample arrays are summed **element-wise across the
37
+ constituent cells** (joint sampling). Time is preserved.
38
+
39
+ Raises:
40
+ ValueError: ``mapping`` is missing/empty, is not keyed by ``(time, unit)``
41
+ pairs, or a row's ``(time, unit)`` has no entry (inherited from
42
+ ``cross_level_align`` — the leaf never guesses a mapping).
43
+ """
44
+ remapped = frame.index.cross_level_align(mapping, target_level)
45
+ return _aggregate_to(frame, remapped, target_level)
46
+
47
+
48
+ def aggregate_distributions_arrays(
49
+ frame: AnyFrame,
50
+ map_keys: NDArray[np.integer[Any]],
51
+ map_vals: NDArray[np.integer[Any]],
52
+ target_level: SpatialLevel,
53
+ ) -> AnyFrame:
54
+ """Columnar ``aggregate_distributions`` — the mapping as parallel arrays.
55
+
56
+ Identical semantics, but the mapping is injected as ``map_keys`` ``(M, 2)`` and
57
+ ``map_vals`` ``(M,)`` rather than a Python ``dict``, so a producer holding a
58
+ grid-scale time-varying mapping never materializes a giant dict (register C-26).
59
+ Delegates to the leaf's ``cross_level_align_arrays``.
60
+ """
61
+ remapped = frame.index.cross_level_align_arrays(map_keys, map_vals, target_level)
62
+ return _aggregate_to(frame, remapped, target_level)
63
+
64
+
65
+ def _aggregate_to(
66
+ frame: AnyFrame, remapped: SpatioTemporalIndex, target_level: SpatialLevel
67
+ ) -> AnyFrame:
68
+ """Sum samples within each ``(time, target_unit)`` group of ``remapped``."""
69
+ keys = np.stack(
70
+ [remapped.time.astype(np.int64), remapped.unit.astype(np.int64)], axis=1
71
+ )
72
+ unique, inverse = np.unique(keys, axis=0, return_inverse=True)
73
+ inverse = np.asarray(inverse).reshape(-1)
74
+
75
+ agg = np.zeros((unique.shape[0], *frame.values.shape[1:]), dtype=np.float32)
76
+ np.add.at(agg, inverse, frame.values)
77
+
78
+ agg_index = SpatioTemporalIndex(
79
+ time=np.asarray(unique[:, 0], dtype=np.int64),
80
+ unit=np.asarray(unique[:, 1], dtype=np.int64),
81
+ level=target_level,
82
+ )
83
+ return rebuild(frame, agg, agg_index)
@@ -0,0 +1,37 @@
1
+ """`collapse` — the generic sample-axis fold (a point estimate over the samples).
2
+
3
+ The statistic is **injected** by the caller (e.g. ``np.mean``, ``np.median``); this
4
+ package owns the *mechanism* (reduce the trailing axis, rebuild a valid frame), not
5
+ a menu of statistics. This is the operation that was removed from the leaf (ADR-017).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+ from typing import Any
12
+
13
+ import numpy as np
14
+
15
+ from views_frames_summarize._common import AnyFrame, rebuild
16
+
17
+ # A reducer is applied as ``reducer(values, axis=-1)`` and reduces the sample axis.
18
+ Reducer = Callable[..., Any]
19
+
20
+
21
+ def collapse(frame: AnyFrame, reducer: Reducer) -> AnyFrame:
22
+ """Reduce the trailing sample axis with ``reducer``, returning a new frame.
23
+
24
+ ``reducer`` is called as ``reducer(frame.values, axis=-1)`` — any numpy-style
25
+ reduction works (``np.mean``, ``np.median``, ``np.max`` …). The result is a
26
+ point estimate with an explicit trailing axis of size 1 (e.g. `(N, S) → (N, 1)`,
27
+ `(N, F, S) → (N, F, 1)`).
28
+
29
+ Args:
30
+ frame: A frame with a trailing sample axis.
31
+ reducer: A callable taking ``(values, axis=-1)`` and reducing that axis.
32
+
33
+ Returns:
34
+ A new frame of the same type with the sample axis collapsed to size 1.
35
+ """
36
+ reduced = np.asarray(reducer(frame.values, axis=-1), dtype=np.float32)
37
+ return rebuild(frame, reduced[..., np.newaxis])
@@ -0,0 +1,40 @@
1
+ """Conformance checks for the summarize package (ADR-016/017).
2
+
3
+ A consumer can re-run these against its own frame factories to confirm the
4
+ summarizers behave: point estimates return same-type `(N, …, 1)` frames; interval
5
+ estimates return arrays aligned to the input frame's rows.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+
12
+ from views_frames_summarize._common import AnyFrame
13
+ from views_frames_summarize.collapse import collapse
14
+ from views_frames_summarize.interval import hdi, quantiles
15
+ from views_frames_summarize.point import map_estimate
16
+
17
+
18
+ def assert_summarizer_contract(frame: AnyFrame) -> None:
19
+ """Assert the summarizers behave on ``frame``.
20
+
21
+ Raises:
22
+ AssertionError: a summarizer violates its output contract.
23
+ """
24
+ n = frame.n_rows
25
+
26
+ point = collapse(frame, np.mean)
27
+ assert type(point) is type(frame), "collapse must return the same frame type"
28
+ assert point.values.shape[-1] == 1, "collapse must reduce the sample axis to 1"
29
+ assert point.n_rows == n, "collapse must preserve rows"
30
+
31
+ mode = map_estimate(frame)
32
+ assert mode.values.shape[-1] == 1 and mode.n_rows == n, "map_estimate → (N,…,1)"
33
+
34
+ lo_hi = hdi(frame, mass=0.9)
35
+ assert lo_hi.shape[0] == n, "hdi must be aligned to the frame's rows"
36
+ assert lo_hi.shape[-1] == 2, "hdi must produce (lower, upper)"
37
+
38
+ qs = quantiles(frame, [0.1, 0.5, 0.9])
39
+ assert qs.shape[0] == n, "quantiles must be aligned to the frame's rows"
40
+ assert qs.shape[-1] == 3, "quantiles must produce one column per quantile"
@@ -0,0 +1,62 @@
1
+ """Interval estimates over the sample axis (ADR-017).
2
+
3
+ Return numpy arrays **aligned to the input frame's index** (the caller holds the
4
+ index): `hdi` → `(N, …, 2)` lower/upper; `quantiles` → `(N, …, len(qs))`.
5
+
6
+ Both reduce the **trailing** sample axis and are vectorized (no per-row Python
7
+ loop). They run in **row-blocks** (`block_rows`) so peak memory is bounded by one
8
+ block's working set rather than a full-grid sorted copy — the same discipline as
9
+ `map_estimate`, so the whole reduction family stays under the #181 OOM (register
10
+ C-25).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from collections.abc import Sequence
16
+
17
+ import numpy as np
18
+ from numpy.typing import NDArray
19
+
20
+ from views_frames_summarize._common import ROW_BLOCK, AnyFrame, block_apply
21
+
22
+
23
+ def hdi(
24
+ frame: AnyFrame, mass: float = 0.9, *, block_rows: int = ROW_BLOCK
25
+ ) -> NDArray[np.float32]:
26
+ """Per-row highest-density interval over the sample axis → `(N, …, 2)`.
27
+
28
+ The shortest interval containing ``floor(mass * S)`` samples (empirical HDI),
29
+ computed vectorized over the trailing axis, in row-blocks of ``block_rows``.
30
+ """
31
+ s = frame.values.shape[-1]
32
+ k = int(np.floor(mass * s))
33
+
34
+ def _hdi_block(vals: NDArray[np.float32]) -> NDArray[np.float32]:
35
+ srt = np.sort(vals, axis=-1)
36
+ if k < 1:
37
+ lower = srt[..., 0]
38
+ return np.stack([lower, lower], axis=-1)
39
+ # widest-to-narrowest: for each candidate start i, width = srt[i+k] - srt[i];
40
+ # the narrowest window is the HDI. argmin returns the first minimum.
41
+ widths = srt[..., k:] - srt[..., : s - k]
42
+ i = np.argmin(widths, axis=-1)
43
+ lower = np.take_along_axis(srt, i[..., np.newaxis], axis=-1)[..., 0]
44
+ upper = np.take_along_axis(srt, (i + k)[..., np.newaxis], axis=-1)[..., 0]
45
+ return np.stack([lower, upper], axis=-1)
46
+
47
+ out = block_apply(frame.values, block_rows, _hdi_block)
48
+ return np.asarray(out, dtype=np.float32)
49
+
50
+
51
+ def quantiles(
52
+ frame: AnyFrame, qs: Sequence[float], *, block_rows: int = ROW_BLOCK
53
+ ) -> NDArray[np.float32]:
54
+ """Per-row quantiles over the sample axis → `(N, …, len(qs))`, index-aligned."""
55
+ q_levels = np.asarray(qs, dtype=np.float64)
56
+
57
+ def _q_block(vals: NDArray[np.float32]) -> NDArray[np.float32]:
58
+ q = np.quantile(vals, q_levels, axis=-1)
59
+ return np.moveaxis(np.asarray(q, dtype=np.float32), 0, -1)
60
+
61
+ out = block_apply(frame.values, block_rows, _q_block)
62
+ return np.asarray(out, dtype=np.float32)
@@ -0,0 +1,104 @@
1
+ """Point estimates over the sample axis (ADR-017) — return a `(N, …, 1)` frame.
2
+
3
+ `map_estimate` is the maximum-a-posteriori estimate as faoapi/reporting compute it:
4
+ the empirical density peak (histogram), with a zero-mass→0 rule for the
5
+ zero-inflated conflict distributions. The mechanism reduces the **trailing** axis;
6
+ the leaf guarantees that axis is the sample axis (ADR-012).
7
+
8
+ The histogram is computed **batched in row-blocks** (no per-row Python loop) so
9
+ it scales to the full grid (register C-22). Blocking caps peak memory at
10
+ ``O(block * bins)`` regardless of row count — a whole-grid batch would allocate a
11
+ ``rows × bins`` counts matrix and re-introduce the #181 OOM. The batched binning
12
+ reproduces ``numpy.histogram``'s uniform-bin **counts** and breaks ties on the
13
+ integer counts (lowest-index), so the selected bin is **deterministic and
14
+ identical on every numpy version** (register C-24); the bin centre matches the
15
+ per-row reference to float32 precision (proven by `test_summarize_scale.py`).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import numpy as np
21
+ from numpy.typing import NDArray
22
+
23
+ from views_frames_summarize._common import ROW_BLOCK, AnyFrame, rebuild
24
+
25
+
26
+ def map_estimate(
27
+ frame: AnyFrame,
28
+ *,
29
+ bins: int = 100,
30
+ zero_mass_threshold: float = 0.3,
31
+ block_rows: int = ROW_BLOCK,
32
+ ) -> AnyFrame:
33
+ """Per-row MAP estimate over the sample axis → a `(N, …, 1)` frame.
34
+
35
+ For each row: if a fraction ``>= zero_mass_threshold`` of the samples is ~0 the
36
+ MAP is ``0.0``; otherwise it is the centre of the densest histogram bin. The
37
+ work runs in row-blocks of ``block_rows`` to bound peak memory (register C-22).
38
+ """
39
+ values = frame.values
40
+ lead = values.shape[:-1]
41
+ s = values.shape[-1]
42
+ # Bin in the input dtype, exactly as the v0.2.0 per-row np.histogram did —
43
+ # upcasting to float64 would shift the bin edges and pick a different mode.
44
+ flat = np.ascontiguousarray(values).reshape(-1, s)
45
+
46
+ result = np.empty(flat.shape[0], dtype=np.float32)
47
+ for start in range(0, flat.shape[0], block_rows):
48
+ block = flat[start : start + block_rows]
49
+ centers = _batched_map(block, bins)
50
+ mass_at_zero = np.mean(np.isclose(block, 0.0, atol=1e-8), axis=1)
51
+ result[start : start + block_rows] = np.where(
52
+ mass_at_zero >= zero_mass_threshold, 0.0, centers
53
+ )
54
+
55
+ reduced = result.reshape(lead)[..., np.newaxis]
56
+ return rebuild(frame, reduced)
57
+
58
+
59
+ def _batched_map(flat: NDArray[np.float32], bins: int) -> NDArray[np.float32]:
60
+ """Centre of the densest histogram bin for each row of a row-block ``(M, S)``.
61
+
62
+ Reproduces ``numpy.histogram``'s uniform-bin path row-by-row but vectorized:
63
+ same dtype, same edges (``linspace``), same float-rounding correction, so the
64
+ per-row bin counts — and therefore the argmax and bin centre — are identical.
65
+ """
66
+ m = flat.shape[0]
67
+ dtype = flat.dtype
68
+ first = flat.min(axis=1)
69
+ last = flat.max(axis=1)
70
+ # all-equal rows: numpy widens the range to (v - 0.5, v + 0.5).
71
+ degenerate = first == last
72
+ half = np.array(0.5, dtype=dtype)
73
+ first = np.where(degenerate, first - half, first)
74
+ last = np.where(degenerate, last + half, last)
75
+ span = last - first
76
+
77
+ # Per-row bin edges — numpy.histogram builds these with linspace at bin dtype.
78
+ edges = np.linspace(first, last, bins + 1, axis=1).astype(dtype) # (M, bins + 1)
79
+
80
+ # numpy's uniform-bin index: ((a - first) / span) * bins, then the exact
81
+ # float-rounding correction against the gathered edges.
82
+ f_idx = ((flat - first[:, None]) / span[:, None]) * bins
83
+ idx = f_idx.astype(np.intp)
84
+ idx[idx == bins] = bins - 1
85
+ left = np.take_along_axis(edges, idx, axis=1)
86
+ idx[flat < left] -= 1
87
+ right = np.take_along_axis(edges, idx + 1, axis=1)
88
+ idx[(flat >= right) & (idx != bins - 1)] += 1
89
+
90
+ # Batched bincount: offset each row into its own length-``bins`` block.
91
+ offsets = idx + (np.arange(m)[:, None] * bins)
92
+ counts = np.bincount(offsets.ravel(), minlength=m * bins).reshape(m, bins)
93
+
94
+ # The densest bin = the one with the most samples. Tie-break on the **integer
95
+ # counts** (lowest-index wins), not on ``counts / width`` density: the bins are
96
+ # uniform so density and counts agree on the winner — *except* on ties, where
97
+ # the float64 bin widths differ by ~1 ulp across numpy versions and flip the
98
+ # argmax (register C-24). Integer ``argmax`` is deterministic and identical on
99
+ # every numpy build, so ``map_estimate`` is portable and reproducible.
100
+ densest = np.argmax(counts, axis=1)
101
+
102
+ lo = np.take_along_axis(edges, densest[:, None], axis=1)[:, 0]
103
+ hi = np.take_along_axis(edges, (densest + 1)[:, None], axis=1)[:, 0]
104
+ return np.asarray((lo + hi) / 2.0, dtype=np.float32)
File without changes