views-frames 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- views_frames/__init__.py +41 -0
- views_frames/_typing.py +24 -0
- views_frames/_validation.py +114 -0
- views_frames/conformance/__init__.py +123 -0
- views_frames/feature_frame.py +188 -0
- views_frames/index.py +309 -0
- views_frames/io/__init__.py +13 -0
- views_frames/io/arrow.py +103 -0
- views_frames/io/npz.py +59 -0
- views_frames/metadata.py +36 -0
- views_frames/prediction_frame.py +143 -0
- views_frames/protocols.py +82 -0
- views_frames/py.typed +0 -0
- views_frames/spatial_level.py +41 -0
- views_frames/target_frame.py +138 -0
- views_frames-1.0.0.dist-info/METADATA +624 -0
- views_frames-1.0.0.dist-info/RECORD +27 -0
- views_frames-1.0.0.dist-info/WHEEL +4 -0
- views_frames-1.0.0.dist-info/licenses/LICENSE +21 -0
- views_frames_summarize/__init__.py +29 -0
- views_frames_summarize/_common.py +68 -0
- views_frames_summarize/aggregate.py +83 -0
- views_frames_summarize/collapse.py +37 -0
- views_frames_summarize/conformance.py +40 -0
- views_frames_summarize/interval.py +62 -0
- views_frames_summarize/point.py +104 -0
- views_frames_summarize/py.typed +0 -0
views_frames/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""views-frames — the VIEWS platform data-contract layer (numpy only).
|
|
2
|
+
|
|
3
|
+
Immutable array+identifier value objects at the root of the platform dependency
|
|
4
|
+
DAG. Explicit re-exports only (no ``import *``) so the public API is statically
|
|
5
|
+
analyzable (README §6).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
11
|
+
|
|
12
|
+
from views_frames.feature_frame import FeatureFrame
|
|
13
|
+
from views_frames.index import SpatioTemporalIndex
|
|
14
|
+
from views_frames.metadata import FrameMetadata
|
|
15
|
+
from views_frames.prediction_frame import PredictionFrame
|
|
16
|
+
from views_frames.protocols import (
|
|
17
|
+
Frame,
|
|
18
|
+
Persistable,
|
|
19
|
+
Sampled,
|
|
20
|
+
SpatioTemporalIndexed,
|
|
21
|
+
)
|
|
22
|
+
from views_frames.spatial_level import SpatialLevel
|
|
23
|
+
from views_frames.target_frame import TargetFrame
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"FeatureFrame",
|
|
27
|
+
"Frame",
|
|
28
|
+
"FrameMetadata",
|
|
29
|
+
"Persistable",
|
|
30
|
+
"PredictionFrame",
|
|
31
|
+
"Sampled",
|
|
32
|
+
"SpatialLevel",
|
|
33
|
+
"SpatioTemporalIndex",
|
|
34
|
+
"SpatioTemporalIndexed",
|
|
35
|
+
"TargetFrame",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
__version__ = version("views-frames")
|
|
40
|
+
except PackageNotFoundError: # pragma: no cover
|
|
41
|
+
__version__ = "0.0.0"
|
views_frames/_typing.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Internal type aliases for the leaf's array surface (register C-19).
|
|
2
|
+
|
|
3
|
+
`NDArray[np.integer]` is a generic with an unbound parameter: under
|
|
4
|
+
``mypy --strict`` (``disallow_any_generics``) it is a ``[type-arg]`` error at the
|
|
5
|
+
declared numpy floor (``numpy==1.26.4``), even though newer stubs let it slide.
|
|
6
|
+
Parameterising once here — ``np.integer[Any]`` — keeps every call site green at
|
|
7
|
+
the floor and gives the integer identifier arrays a single, named contract.
|
|
8
|
+
|
|
9
|
+
These are private (underscore module): the public surface is the frames, not the
|
|
10
|
+
array aliases.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from numpy.typing import NDArray
|
|
19
|
+
|
|
20
|
+
# Integer identifier arrays (``time``, ``unit``): any width, fixed to integers.
|
|
21
|
+
IntArray = NDArray[np.integer[Any]]
|
|
22
|
+
|
|
23
|
+
# Float32 value arrays: the leaf's canonical value dtype (ADR-009).
|
|
24
|
+
Float32Array = NDArray[np.float32]
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Shared construction-time invariant checks (ADR-008, ADR-009).
|
|
2
|
+
|
|
3
|
+
Fail loud at construction. The guarantee is **structural, not temporal**: the
|
|
4
|
+
leaf validates integer dtype / length-N / completeness, but ``time`` is an opaque
|
|
5
|
+
integer — epoch, range, and monotonicity are a producer concern (register C-11).
|
|
6
|
+
|
|
7
|
+
This is the numpy-only replacement for the ``pd.isna`` check used today in
|
|
8
|
+
``views-pipeline-core/.../data/prediction_frame.py`` (register C-17): identifiers
|
|
9
|
+
are required to be **integer** dtype, which makes them complete by construction
|
|
10
|
+
(integers cannot be NaN).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import cast
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from numpy.typing import NDArray
|
|
19
|
+
|
|
20
|
+
from views_frames._typing import IntArray
|
|
21
|
+
|
|
22
|
+
REQUIRED_IDENTIFIERS = ("time", "unit")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def validate_identifiers(
|
|
26
|
+
identifiers: dict[str, IntArray], n_rows: int
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Assert identifiers are integer 1-D arrays of length ``n_rows``, complete.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
identifiers: Mapping of identifier name to a 1-D integer array.
|
|
32
|
+
n_rows: The expected length of every identifier array.
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
ValueError: A required identifier is missing, an array is not 1-D, or an
|
|
36
|
+
array length does not match ``n_rows``.
|
|
37
|
+
TypeError: An identifier is not a numpy array or not an integer dtype.
|
|
38
|
+
"""
|
|
39
|
+
for key in REQUIRED_IDENTIFIERS:
|
|
40
|
+
if key not in identifiers:
|
|
41
|
+
raise ValueError(f"Missing required identifier: '{key}'")
|
|
42
|
+
for key, arr in identifiers.items():
|
|
43
|
+
if not isinstance(arr, np.ndarray):
|
|
44
|
+
raise TypeError(
|
|
45
|
+
f"Identifier '{key}' must be a numpy array, "
|
|
46
|
+
f"got {type(arr).__name__}"
|
|
47
|
+
)
|
|
48
|
+
if not np.issubdtype(arr.dtype, np.integer):
|
|
49
|
+
raise TypeError(
|
|
50
|
+
f"Identifier '{key}' must be an integer dtype "
|
|
51
|
+
f"(integers cannot be NaN); got {arr.dtype}"
|
|
52
|
+
)
|
|
53
|
+
if arr.ndim != 1:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"Identifier '{key}' must be 1-D, got ndim={arr.ndim}"
|
|
56
|
+
)
|
|
57
|
+
if arr.shape[0] != n_rows:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"Identifier '{key}' has length {arr.shape[0]} "
|
|
60
|
+
f"but expected {n_rows}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def coerce_values(values: object) -> NDArray[np.float32]:
|
|
65
|
+
"""Coerce input to a ``float32`` array, banning object dtype (list-in-cell).
|
|
66
|
+
|
|
67
|
+
A ``float32`` input (including an ``np.memmap``) is returned **without a copy**
|
|
68
|
+
so ``mmap`` and zero-copy semantics are preserved (register C-07). A ``float64``
|
|
69
|
+
(or other numeric) input is cast to ``float32``.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
ValueError: the input is object dtype (the banned list-in-cell encoding).
|
|
73
|
+
"""
|
|
74
|
+
# asanyarray preserves subclasses (e.g. np.memmap) so mmap loads stay zero-copy.
|
|
75
|
+
arr = np.asanyarray(values)
|
|
76
|
+
if arr.dtype == np.dtype(object):
|
|
77
|
+
raise ValueError(
|
|
78
|
+
"values must not be object dtype — list-in-cell is banned (README §7)"
|
|
79
|
+
)
|
|
80
|
+
if arr.dtype == np.float32:
|
|
81
|
+
return cast("NDArray[np.float32]", arr)
|
|
82
|
+
return np.asarray(arr, dtype=np.float32)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def validate_values(values: NDArray[np.float32]) -> None:
|
|
86
|
+
"""Assert ``values`` are a ``float32`` array with an explicit trailing axis.
|
|
87
|
+
|
|
88
|
+
The frame is responsible for coercing input to ``float32`` (accepting e.g.
|
|
89
|
+
``float64``) and for rejecting object-dtype input before calling this; this
|
|
90
|
+
check confirms the final stored invariants.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
values: The frame's value array (first axis = rows; last axis = samples).
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
TypeError: ``values`` is not a numpy array or not ``float32``.
|
|
97
|
+
ValueError: ``values`` is object dtype (list-in-cell is banned) or lacks
|
|
98
|
+
an explicit trailing sample axis (``ndim < 2``).
|
|
99
|
+
"""
|
|
100
|
+
if not isinstance(values, np.ndarray):
|
|
101
|
+
raise TypeError(
|
|
102
|
+
f"values must be a numpy array, got {type(values).__name__}"
|
|
103
|
+
)
|
|
104
|
+
if values.dtype == np.dtype(object):
|
|
105
|
+
raise ValueError(
|
|
106
|
+
"values must not be object dtype — list-in-cell is banned (README §7)"
|
|
107
|
+
)
|
|
108
|
+
if values.dtype != np.float32:
|
|
109
|
+
raise TypeError(f"values must be float32, got {values.dtype}")
|
|
110
|
+
if values.ndim < 2:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
"values must have an explicit trailing sample axis (ndim >= 2), "
|
|
113
|
+
f"got ndim={values.ndim}"
|
|
114
|
+
)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""The published conformance suite (ADR-016).
|
|
2
|
+
|
|
3
|
+
A consumer re-runs these contract checks in CI against **its own** frame factories,
|
|
4
|
+
at a single governed **conformance-floor** version, so every consumer tests the same
|
|
5
|
+
contract (closes the cross-repo gap; register C-10). The checks are plain assertion
|
|
6
|
+
functions (no pytest dependency) so they run anywhere.
|
|
7
|
+
|
|
8
|
+
Usage in a consumer's test::
|
|
9
|
+
|
|
10
|
+
from views_frames.conformance import assert_frame_contract
|
|
11
|
+
assert_frame_contract(my_adapter_output())
|
|
12
|
+
|
|
13
|
+
The floor is governed in ``GOVERNANCE.md``; ``CONFORMANCE_FLOOR`` records the version
|
|
14
|
+
this suite belongs to.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import tempfile
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
CONFORMANCE_FLOOR = "1.0.0"
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"CONFORMANCE_FLOOR",
|
|
28
|
+
"assert_cross_level_alignment_law",
|
|
29
|
+
"assert_frame_contract",
|
|
30
|
+
"assert_index_alignment_laws",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def assert_frame_contract(frame: Any) -> None:
|
|
35
|
+
"""Assert ``frame`` satisfies the views-frames data contract.
|
|
36
|
+
|
|
37
|
+
Checks the structural invariants (float32 values, no object dtype, an explicit
|
|
38
|
+
trailing axis, complete integer identifiers of length ``n_rows``) and the
|
|
39
|
+
save/load round-trip. (Sample-axis reduction is the ``views_frames_summarize``
|
|
40
|
+
package's concern, not the contract's — ADR-017.)
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
AssertionError: any part of the contract is violated.
|
|
44
|
+
"""
|
|
45
|
+
values = frame.values
|
|
46
|
+
assert isinstance(values, np.ndarray), "values must be a numpy array"
|
|
47
|
+
assert values.dtype == np.float32, f"values must be float32, got {values.dtype}"
|
|
48
|
+
assert values.dtype != np.dtype(object), "object dtype is banned (list-in-cell)"
|
|
49
|
+
assert values.ndim >= 2, "values must have an explicit trailing sample axis"
|
|
50
|
+
assert values.shape[0] == frame.n_rows, "values rows must equal n_rows"
|
|
51
|
+
|
|
52
|
+
ids = frame.identifiers
|
|
53
|
+
for key in ("time", "unit"):
|
|
54
|
+
assert key in ids, f"missing required identifier '{key}'"
|
|
55
|
+
arr = ids[key]
|
|
56
|
+
assert np.issubdtype(arr.dtype, np.integer), f"'{key}' must be integer"
|
|
57
|
+
assert arr.shape == (frame.n_rows,), f"'{key}' must be length n_rows"
|
|
58
|
+
|
|
59
|
+
_assert_roundtrip(frame)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _assert_roundtrip(frame: Any) -> None:
|
|
63
|
+
with tempfile.TemporaryDirectory() as directory:
|
|
64
|
+
frame.save(directory)
|
|
65
|
+
loaded = type(frame).load(directory)
|
|
66
|
+
assert np.array_equal(loaded.values, frame.values), "save/load changed values"
|
|
67
|
+
for key, arr in frame.identifiers.items():
|
|
68
|
+
assert np.array_equal(loaded.identifiers[key], arr), (
|
|
69
|
+
f"save/load changed identifier '{key}'"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def assert_index_alignment_laws(index_a: Any, index_b: Any) -> None:
|
|
74
|
+
"""Assert the same-level alignment laws hold for two indices at the same level.
|
|
75
|
+
|
|
76
|
+
- intersection is commutative;
|
|
77
|
+
- an index is a superset of itself (reflexive);
|
|
78
|
+
- ``searchsorted`` against itself is an identity round-trip.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
AssertionError: a law is violated.
|
|
82
|
+
"""
|
|
83
|
+
assert index_a.intersect(index_b) == index_b.intersect(index_a), (
|
|
84
|
+
"intersect must be commutative"
|
|
85
|
+
)
|
|
86
|
+
assert index_a.is_superset_of(index_a) is True, "is_superset_of must be reflexive"
|
|
87
|
+
pos = index_a.searchsorted(index_a)
|
|
88
|
+
assert np.array_equal(index_a.time[pos], index_a.time), "searchsorted self-identity"
|
|
89
|
+
assert np.array_equal(index_a.unit[pos], index_a.unit), "searchsorted self-identity"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def assert_cross_level_alignment_law(
|
|
93
|
+
index: Any, mapping: Any, target_level: Any
|
|
94
|
+
) -> None:
|
|
95
|
+
"""Assert ``cross_level_align`` honours the **time-varying** injected mapping.
|
|
96
|
+
|
|
97
|
+
The mapping is keyed by ``(time, unit)`` (register C-20), so the same unit may
|
|
98
|
+
map to different target units in different time steps. The law:
|
|
99
|
+
|
|
100
|
+
- every row's target unit equals ``mapping[(time, unit)]`` (time-varying remap);
|
|
101
|
+
- ``time`` is preserved row-for-row;
|
|
102
|
+
- the produced index carries ``target_level``.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
index: a ``SpatioTemporalIndex`` to remap.
|
|
106
|
+
mapping: a ``{(time, unit): target_unit}`` mapping covering every row.
|
|
107
|
+
target_level: the ``SpatialLevel`` to remap to.
|
|
108
|
+
|
|
109
|
+
Raises:
|
|
110
|
+
AssertionError: the remap disagrees with the mapping, drops time, or
|
|
111
|
+
produces the wrong level.
|
|
112
|
+
"""
|
|
113
|
+
aligned = index.cross_level_align(mapping, target_level)
|
|
114
|
+
assert aligned.level is target_level, "cross_level_align must carry target_level"
|
|
115
|
+
assert np.array_equal(aligned.time, index.time), "cross_level_align must keep time"
|
|
116
|
+
pairs = zip(index.time, index.unit, strict=True)
|
|
117
|
+
expected = np.array(
|
|
118
|
+
[mapping[(int(t), int(u))] for t, u in pairs],
|
|
119
|
+
dtype=aligned.unit.dtype,
|
|
120
|
+
)
|
|
121
|
+
assert np.array_equal(aligned.unit, expected), (
|
|
122
|
+
"cross_level_align must honour the (time, unit)-keyed mapping per row"
|
|
123
|
+
)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""`FeatureFrame` — model inputs (X): ``y_features (N, F, S)`` float32.
|
|
2
|
+
|
|
3
|
+
A sibling frame (no shared base; ADR-011 Option C). Relocated from
|
|
4
|
+
views-datafactory. The sample axis is **always explicit** (ADR-012): legacy 2D
|
|
5
|
+
``(N, F)`` arrays are lifted to ``(N, F, 1)`` only through the explicit
|
|
6
|
+
:meth:`from_2d` shim. Carries ``feature_names`` and a typed metadata header
|
|
7
|
+
(ADR-013). ``from_grid`` is **not** here — it stays in views-datafactory.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from numpy.typing import NDArray
|
|
16
|
+
|
|
17
|
+
from views_frames._typing import IntArray
|
|
18
|
+
from views_frames._validation import coerce_values, validate_values
|
|
19
|
+
from views_frames.index import SpatioTemporalIndex
|
|
20
|
+
from views_frames.io import npz
|
|
21
|
+
from views_frames.metadata import FrameMetadata
|
|
22
|
+
from views_frames.spatial_level import SpatialLevel
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FeatureFrame:
|
|
26
|
+
"""Immutable model-input frame: ``(N, F, S)`` float32 + index + feature names."""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
y_features: object,
|
|
31
|
+
index: SpatioTemporalIndex,
|
|
32
|
+
feature_names: list[str],
|
|
33
|
+
metadata: FrameMetadata | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
values = coerce_values(y_features)
|
|
36
|
+
validate_values(values)
|
|
37
|
+
if values.ndim != 3:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
"FeatureFrame y_features must be 3D (N, F, S) with an explicit "
|
|
40
|
+
f"trailing sample axis (ADR-012), got ndim={values.ndim}. "
|
|
41
|
+
"Use FeatureFrame.from_2d to lift a legacy (N, F) array."
|
|
42
|
+
)
|
|
43
|
+
if values.shape[0] != index.n_rows:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
f"y_features has {values.shape[0]} rows but index has {index.n_rows}"
|
|
46
|
+
)
|
|
47
|
+
if len(feature_names) != values.shape[1]:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"feature_names length ({len(feature_names)}) must match the "
|
|
50
|
+
f"feature axis ({values.shape[1]})"
|
|
51
|
+
)
|
|
52
|
+
self._values = values
|
|
53
|
+
self._index = index
|
|
54
|
+
self._feature_names = list(feature_names)
|
|
55
|
+
self._metadata = metadata if metadata is not None else FrameMetadata()
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def from_2d(
|
|
59
|
+
cls,
|
|
60
|
+
y_features_2d: object,
|
|
61
|
+
index: SpatioTemporalIndex,
|
|
62
|
+
feature_names: list[str],
|
|
63
|
+
metadata: FrameMetadata | None = None,
|
|
64
|
+
) -> FeatureFrame:
|
|
65
|
+
"""Lift a legacy 2D ``(N, F)`` array to ``(N, F, 1)`` (deprecated shim)."""
|
|
66
|
+
arr = coerce_values(y_features_2d)
|
|
67
|
+
if arr.ndim != 2:
|
|
68
|
+
raise ValueError(f"from_2d expects a 2D (N, F) array, got ndim={arr.ndim}")
|
|
69
|
+
return cls(arr[:, :, np.newaxis], index, feature_names, metadata)
|
|
70
|
+
|
|
71
|
+
# ---- core surface -------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def values(self) -> NDArray[np.float32]:
|
|
75
|
+
"""The ``(N, F, S)`` float32 value array."""
|
|
76
|
+
return self._values
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def index(self) -> SpatioTemporalIndex:
|
|
80
|
+
"""The spatiotemporal row index."""
|
|
81
|
+
return self._index
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def feature_names(self) -> list[str]:
|
|
85
|
+
"""The feature/channel names (length ``F``)."""
|
|
86
|
+
return list(self._feature_names)
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def metadata(self) -> FrameMetadata:
|
|
90
|
+
"""The typed provenance header."""
|
|
91
|
+
return self._metadata
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def n_rows(self) -> int:
|
|
95
|
+
"""Number of rows ``N``."""
|
|
96
|
+
return int(self._values.shape[0])
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def n_features(self) -> int:
|
|
100
|
+
"""Number of features ``F``."""
|
|
101
|
+
return int(self._values.shape[1])
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def identifiers(self) -> dict[str, IntArray]:
|
|
105
|
+
"""The integer identifier arrays from the index."""
|
|
106
|
+
return self._index.identifiers
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def sample_count(self) -> int:
|
|
110
|
+
"""Size of the trailing sample axis ``S``."""
|
|
111
|
+
return int(self._values.shape[-1])
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def is_sample(self) -> bool:
|
|
115
|
+
"""True iff ``sample_count > 1``."""
|
|
116
|
+
return self.sample_count > 1
|
|
117
|
+
|
|
118
|
+
# ---- operations ---------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
def with_metadata(self, metadata: FrameMetadata) -> FeatureFrame:
|
|
121
|
+
"""Return a new frame with replaced metadata, **sharing** the values buffer."""
|
|
122
|
+
new = FeatureFrame.__new__(FeatureFrame)
|
|
123
|
+
new._values = self._values
|
|
124
|
+
new._index = self._index
|
|
125
|
+
new._feature_names = self._feature_names
|
|
126
|
+
new._metadata = metadata
|
|
127
|
+
return new
|
|
128
|
+
|
|
129
|
+
def select(self, indexer: IntArray | NDArray[np.bool_]) -> FeatureFrame:
|
|
130
|
+
"""A new frame of the rows at integer positions **or** a boolean mask.
|
|
131
|
+
|
|
132
|
+
Rows are selected by numpy fancy indexing — an integer array reorders or
|
|
133
|
+
repeats, a boolean mask filters. ``feature_names`` and metadata are
|
|
134
|
+
preserved; the selection **copies**. An empty selection yields an empty
|
|
135
|
+
frame.
|
|
136
|
+
"""
|
|
137
|
+
return FeatureFrame(
|
|
138
|
+
self._values[indexer],
|
|
139
|
+
self._index.select(indexer),
|
|
140
|
+
self._feature_names,
|
|
141
|
+
self._metadata,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def reindex(self, other: SpatioTemporalIndex) -> FeatureFrame:
|
|
145
|
+
"""Align this frame to ``other``'s rows, returning a new frame.
|
|
146
|
+
|
|
147
|
+
Fails loud unless this frame's index is a **superset** of ``other``. The
|
|
148
|
+
frame-level companion to the index's ``reindex``/``searchsorted``.
|
|
149
|
+
"""
|
|
150
|
+
if not self._index.is_superset_of(other):
|
|
151
|
+
raise ValueError(
|
|
152
|
+
"reindex requires this frame's index to be a superset of `other`; "
|
|
153
|
+
"some target rows are absent"
|
|
154
|
+
)
|
|
155
|
+
return self.select(self._index.searchsorted(other))
|
|
156
|
+
|
|
157
|
+
# ---- persistence --------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
def save(self, directory: Path | str) -> None:
|
|
160
|
+
"""Serialize to ``directory`` (incl. ``feature_names`` + metadata header)."""
|
|
161
|
+
npz.save(
|
|
162
|
+
directory,
|
|
163
|
+
values=self._values,
|
|
164
|
+
time=self._index.time,
|
|
165
|
+
unit=self._index.unit,
|
|
166
|
+
level=self._index.level.value,
|
|
167
|
+
metadata=self._metadata.to_dict(),
|
|
168
|
+
feature_names=self._feature_names,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def load(cls, directory: Path | str, mmap: bool = False) -> FeatureFrame:
|
|
173
|
+
"""Deserialize a frame from ``directory``; ``mmap`` propagates."""
|
|
174
|
+
state = npz.load(directory, mmap=mmap)
|
|
175
|
+
index = SpatioTemporalIndex(
|
|
176
|
+
time=state["time"],
|
|
177
|
+
unit=state["unit"],
|
|
178
|
+
level=SpatialLevel(state["level"]),
|
|
179
|
+
)
|
|
180
|
+
feature_names = state["feature_names"]
|
|
181
|
+
if feature_names is None:
|
|
182
|
+
raise ValueError("saved FeatureFrame is missing feature_names")
|
|
183
|
+
return cls(
|
|
184
|
+
state["values"],
|
|
185
|
+
index,
|
|
186
|
+
feature_names,
|
|
187
|
+
FrameMetadata.from_dict(state["metadata"]),
|
|
188
|
+
)
|