tnfr 3.0.3__py3-none-any.whl → 8.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tnfr might be problematic. Click here for more details.
- tnfr/__init__.py +375 -56
- tnfr/__init__.pyi +33 -0
- tnfr/_compat.py +10 -0
- tnfr/_generated_version.py +34 -0
- tnfr/_version.py +49 -0
- tnfr/_version.pyi +7 -0
- tnfr/alias.py +723 -0
- tnfr/alias.pyi +108 -0
- tnfr/backends/__init__.py +354 -0
- tnfr/backends/jax_backend.py +173 -0
- tnfr/backends/numpy_backend.py +238 -0
- tnfr/backends/optimized_numpy.py +420 -0
- tnfr/backends/torch_backend.py +408 -0
- tnfr/cache.py +171 -0
- tnfr/cache.pyi +13 -0
- tnfr/cli/__init__.py +110 -0
- tnfr/cli/__init__.pyi +26 -0
- tnfr/cli/arguments.py +489 -0
- tnfr/cli/arguments.pyi +29 -0
- tnfr/cli/execution.py +914 -0
- tnfr/cli/execution.pyi +70 -0
- tnfr/cli/interactive_validator.py +614 -0
- tnfr/cli/utils.py +51 -0
- tnfr/cli/utils.pyi +7 -0
- tnfr/cli/validate.py +236 -0
- tnfr/compat/__init__.py +85 -0
- tnfr/compat/dataclass.py +136 -0
- tnfr/compat/jsonschema_stub.py +61 -0
- tnfr/compat/matplotlib_stub.py +73 -0
- tnfr/compat/numpy_stub.py +155 -0
- tnfr/config/__init__.py +224 -0
- tnfr/config/__init__.pyi +10 -0
- tnfr/config/constants.py +104 -0
- tnfr/config/constants.pyi +12 -0
- tnfr/config/defaults.py +54 -0
- tnfr/config/defaults_core.py +212 -0
- tnfr/config/defaults_init.py +33 -0
- tnfr/config/defaults_metric.py +104 -0
- tnfr/config/feature_flags.py +81 -0
- tnfr/config/feature_flags.pyi +16 -0
- tnfr/config/glyph_constants.py +31 -0
- tnfr/config/init.py +77 -0
- tnfr/config/init.pyi +8 -0
- tnfr/config/operator_names.py +254 -0
- tnfr/config/operator_names.pyi +36 -0
- tnfr/config/physics_derivation.py +354 -0
- tnfr/config/presets.py +83 -0
- tnfr/config/presets.pyi +7 -0
- tnfr/config/security.py +927 -0
- tnfr/config/thresholds.py +114 -0
- tnfr/config/tnfr_config.py +498 -0
- tnfr/constants/__init__.py +92 -0
- tnfr/constants/__init__.pyi +92 -0
- tnfr/constants/aliases.py +33 -0
- tnfr/constants/aliases.pyi +27 -0
- tnfr/constants/init.py +33 -0
- tnfr/constants/init.pyi +12 -0
- tnfr/constants/metric.py +104 -0
- tnfr/constants/metric.pyi +19 -0
- tnfr/core/__init__.py +33 -0
- tnfr/core/container.py +226 -0
- tnfr/core/default_implementations.py +329 -0
- tnfr/core/interfaces.py +279 -0
- tnfr/dynamics/__init__.py +238 -0
- tnfr/dynamics/__init__.pyi +83 -0
- tnfr/dynamics/adaptation.py +267 -0
- tnfr/dynamics/adaptation.pyi +7 -0
- tnfr/dynamics/adaptive_sequences.py +189 -0
- tnfr/dynamics/adaptive_sequences.pyi +14 -0
- tnfr/dynamics/aliases.py +23 -0
- tnfr/dynamics/aliases.pyi +19 -0
- tnfr/dynamics/bifurcation.py +232 -0
- tnfr/dynamics/canonical.py +229 -0
- tnfr/dynamics/canonical.pyi +48 -0
- tnfr/dynamics/coordination.py +385 -0
- tnfr/dynamics/coordination.pyi +25 -0
- tnfr/dynamics/dnfr.py +3034 -0
- tnfr/dynamics/dnfr.pyi +26 -0
- tnfr/dynamics/dynamic_limits.py +225 -0
- tnfr/dynamics/feedback.py +252 -0
- tnfr/dynamics/feedback.pyi +24 -0
- tnfr/dynamics/fused_dnfr.py +454 -0
- tnfr/dynamics/homeostasis.py +157 -0
- tnfr/dynamics/homeostasis.pyi +14 -0
- tnfr/dynamics/integrators.py +661 -0
- tnfr/dynamics/integrators.pyi +36 -0
- tnfr/dynamics/learning.py +310 -0
- tnfr/dynamics/learning.pyi +33 -0
- tnfr/dynamics/metabolism.py +254 -0
- tnfr/dynamics/nbody.py +796 -0
- tnfr/dynamics/nbody_tnfr.py +783 -0
- tnfr/dynamics/propagation.py +326 -0
- tnfr/dynamics/runtime.py +908 -0
- tnfr/dynamics/runtime.pyi +77 -0
- tnfr/dynamics/sampling.py +36 -0
- tnfr/dynamics/sampling.pyi +7 -0
- tnfr/dynamics/selectors.py +711 -0
- tnfr/dynamics/selectors.pyi +85 -0
- tnfr/dynamics/structural_clip.py +207 -0
- tnfr/errors/__init__.py +37 -0
- tnfr/errors/contextual.py +492 -0
- tnfr/execution.py +223 -0
- tnfr/execution.pyi +45 -0
- tnfr/extensions/__init__.py +205 -0
- tnfr/extensions/__init__.pyi +18 -0
- tnfr/extensions/base.py +173 -0
- tnfr/extensions/base.pyi +35 -0
- tnfr/extensions/business/__init__.py +71 -0
- tnfr/extensions/business/__init__.pyi +11 -0
- tnfr/extensions/business/cookbook.py +88 -0
- tnfr/extensions/business/cookbook.pyi +8 -0
- tnfr/extensions/business/health_analyzers.py +202 -0
- tnfr/extensions/business/health_analyzers.pyi +9 -0
- tnfr/extensions/business/patterns.py +183 -0
- tnfr/extensions/business/patterns.pyi +8 -0
- tnfr/extensions/medical/__init__.py +73 -0
- tnfr/extensions/medical/__init__.pyi +11 -0
- tnfr/extensions/medical/cookbook.py +88 -0
- tnfr/extensions/medical/cookbook.pyi +8 -0
- tnfr/extensions/medical/health_analyzers.py +181 -0
- tnfr/extensions/medical/health_analyzers.pyi +9 -0
- tnfr/extensions/medical/patterns.py +163 -0
- tnfr/extensions/medical/patterns.pyi +8 -0
- tnfr/flatten.py +262 -0
- tnfr/flatten.pyi +21 -0
- tnfr/gamma.py +354 -0
- tnfr/gamma.pyi +36 -0
- tnfr/glyph_history.py +377 -0
- tnfr/glyph_history.pyi +35 -0
- tnfr/glyph_runtime.py +19 -0
- tnfr/glyph_runtime.pyi +8 -0
- tnfr/immutable.py +218 -0
- tnfr/immutable.pyi +36 -0
- tnfr/initialization.py +203 -0
- tnfr/initialization.pyi +65 -0
- tnfr/io.py +10 -0
- tnfr/io.pyi +13 -0
- tnfr/locking.py +37 -0
- tnfr/locking.pyi +7 -0
- tnfr/mathematics/__init__.py +79 -0
- tnfr/mathematics/backend.py +453 -0
- tnfr/mathematics/backend.pyi +99 -0
- tnfr/mathematics/dynamics.py +408 -0
- tnfr/mathematics/dynamics.pyi +90 -0
- tnfr/mathematics/epi.py +391 -0
- tnfr/mathematics/epi.pyi +65 -0
- tnfr/mathematics/generators.py +242 -0
- tnfr/mathematics/generators.pyi +29 -0
- tnfr/mathematics/metrics.py +119 -0
- tnfr/mathematics/metrics.pyi +16 -0
- tnfr/mathematics/operators.py +239 -0
- tnfr/mathematics/operators.pyi +59 -0
- tnfr/mathematics/operators_factory.py +124 -0
- tnfr/mathematics/operators_factory.pyi +11 -0
- tnfr/mathematics/projection.py +87 -0
- tnfr/mathematics/projection.pyi +33 -0
- tnfr/mathematics/runtime.py +182 -0
- tnfr/mathematics/runtime.pyi +64 -0
- tnfr/mathematics/spaces.py +256 -0
- tnfr/mathematics/spaces.pyi +83 -0
- tnfr/mathematics/transforms.py +305 -0
- tnfr/mathematics/transforms.pyi +62 -0
- tnfr/metrics/__init__.py +79 -0
- tnfr/metrics/__init__.pyi +20 -0
- tnfr/metrics/buffer_cache.py +163 -0
- tnfr/metrics/buffer_cache.pyi +24 -0
- tnfr/metrics/cache_utils.py +214 -0
- tnfr/metrics/coherence.py +2009 -0
- tnfr/metrics/coherence.pyi +129 -0
- tnfr/metrics/common.py +158 -0
- tnfr/metrics/common.pyi +35 -0
- tnfr/metrics/core.py +316 -0
- tnfr/metrics/core.pyi +13 -0
- tnfr/metrics/diagnosis.py +833 -0
- tnfr/metrics/diagnosis.pyi +86 -0
- tnfr/metrics/emergence.py +245 -0
- tnfr/metrics/export.py +179 -0
- tnfr/metrics/export.pyi +7 -0
- tnfr/metrics/glyph_timing.py +379 -0
- tnfr/metrics/glyph_timing.pyi +81 -0
- tnfr/metrics/learning_metrics.py +280 -0
- tnfr/metrics/learning_metrics.pyi +21 -0
- tnfr/metrics/phase_coherence.py +351 -0
- tnfr/metrics/phase_compatibility.py +349 -0
- tnfr/metrics/reporting.py +183 -0
- tnfr/metrics/reporting.pyi +25 -0
- tnfr/metrics/sense_index.py +1203 -0
- tnfr/metrics/sense_index.pyi +9 -0
- tnfr/metrics/trig.py +373 -0
- tnfr/metrics/trig.pyi +13 -0
- tnfr/metrics/trig_cache.py +233 -0
- tnfr/metrics/trig_cache.pyi +10 -0
- tnfr/multiscale/__init__.py +32 -0
- tnfr/multiscale/hierarchical.py +517 -0
- tnfr/node.py +763 -0
- tnfr/node.pyi +139 -0
- tnfr/observers.py +255 -130
- tnfr/observers.pyi +31 -0
- tnfr/ontosim.py +144 -137
- tnfr/ontosim.pyi +28 -0
- tnfr/operators/__init__.py +1672 -0
- tnfr/operators/__init__.pyi +31 -0
- tnfr/operators/algebra.py +277 -0
- tnfr/operators/canonical_patterns.py +420 -0
- tnfr/operators/cascade.py +267 -0
- tnfr/operators/cycle_detection.py +358 -0
- tnfr/operators/definitions.py +4108 -0
- tnfr/operators/definitions.pyi +78 -0
- tnfr/operators/grammar.py +1164 -0
- tnfr/operators/grammar.pyi +140 -0
- tnfr/operators/hamiltonian.py +710 -0
- tnfr/operators/health_analyzer.py +809 -0
- tnfr/operators/jitter.py +272 -0
- tnfr/operators/jitter.pyi +11 -0
- tnfr/operators/lifecycle.py +314 -0
- tnfr/operators/metabolism.py +618 -0
- tnfr/operators/metrics.py +2138 -0
- tnfr/operators/network_analysis/__init__.py +27 -0
- tnfr/operators/network_analysis/source_detection.py +186 -0
- tnfr/operators/nodal_equation.py +395 -0
- tnfr/operators/pattern_detection.py +660 -0
- tnfr/operators/patterns.py +669 -0
- tnfr/operators/postconditions/__init__.py +38 -0
- tnfr/operators/postconditions/mutation.py +236 -0
- tnfr/operators/preconditions/__init__.py +1226 -0
- tnfr/operators/preconditions/coherence.py +305 -0
- tnfr/operators/preconditions/dissonance.py +236 -0
- tnfr/operators/preconditions/emission.py +128 -0
- tnfr/operators/preconditions/mutation.py +580 -0
- tnfr/operators/preconditions/reception.py +125 -0
- tnfr/operators/preconditions/resonance.py +364 -0
- tnfr/operators/registry.py +74 -0
- tnfr/operators/registry.pyi +9 -0
- tnfr/operators/remesh.py +1809 -0
- tnfr/operators/remesh.pyi +26 -0
- tnfr/operators/structural_units.py +268 -0
- tnfr/operators/unified_grammar.py +105 -0
- tnfr/parallel/__init__.py +54 -0
- tnfr/parallel/auto_scaler.py +234 -0
- tnfr/parallel/distributed.py +384 -0
- tnfr/parallel/engine.py +238 -0
- tnfr/parallel/gpu_engine.py +420 -0
- tnfr/parallel/monitoring.py +248 -0
- tnfr/parallel/partitioner.py +459 -0
- tnfr/py.typed +0 -0
- tnfr/recipes/__init__.py +22 -0
- tnfr/recipes/cookbook.py +743 -0
- tnfr/rng.py +178 -0
- tnfr/rng.pyi +26 -0
- tnfr/schemas/__init__.py +8 -0
- tnfr/schemas/grammar.json +94 -0
- tnfr/sdk/__init__.py +107 -0
- tnfr/sdk/__init__.pyi +19 -0
- tnfr/sdk/adaptive_system.py +173 -0
- tnfr/sdk/adaptive_system.pyi +21 -0
- tnfr/sdk/builders.py +370 -0
- tnfr/sdk/builders.pyi +51 -0
- tnfr/sdk/fluent.py +1121 -0
- tnfr/sdk/fluent.pyi +74 -0
- tnfr/sdk/templates.py +342 -0
- tnfr/sdk/templates.pyi +41 -0
- tnfr/sdk/utils.py +341 -0
- tnfr/secure_config.py +46 -0
- tnfr/security/__init__.py +70 -0
- tnfr/security/database.py +514 -0
- tnfr/security/subprocess.py +503 -0
- tnfr/security/validation.py +290 -0
- tnfr/selector.py +247 -0
- tnfr/selector.pyi +19 -0
- tnfr/sense.py +378 -0
- tnfr/sense.pyi +23 -0
- tnfr/services/__init__.py +17 -0
- tnfr/services/orchestrator.py +325 -0
- tnfr/sparse/__init__.py +39 -0
- tnfr/sparse/representations.py +492 -0
- tnfr/structural.py +705 -0
- tnfr/structural.pyi +83 -0
- tnfr/telemetry/__init__.py +35 -0
- tnfr/telemetry/cache_metrics.py +226 -0
- tnfr/telemetry/cache_metrics.pyi +64 -0
- tnfr/telemetry/nu_f.py +422 -0
- tnfr/telemetry/nu_f.pyi +108 -0
- tnfr/telemetry/verbosity.py +36 -0
- tnfr/telemetry/verbosity.pyi +15 -0
- tnfr/tokens.py +58 -0
- tnfr/tokens.pyi +36 -0
- tnfr/tools/__init__.py +20 -0
- tnfr/tools/domain_templates.py +478 -0
- tnfr/tools/sequence_generator.py +846 -0
- tnfr/topology/__init__.py +13 -0
- tnfr/topology/asymmetry.py +151 -0
- tnfr/trace.py +543 -0
- tnfr/trace.pyi +42 -0
- tnfr/tutorials/__init__.py +38 -0
- tnfr/tutorials/autonomous_evolution.py +285 -0
- tnfr/tutorials/interactive.py +1576 -0
- tnfr/tutorials/structural_metabolism.py +238 -0
- tnfr/types.py +775 -0
- tnfr/types.pyi +357 -0
- tnfr/units.py +68 -0
- tnfr/units.pyi +13 -0
- tnfr/utils/__init__.py +282 -0
- tnfr/utils/__init__.pyi +215 -0
- tnfr/utils/cache.py +4223 -0
- tnfr/utils/cache.pyi +470 -0
- tnfr/utils/callbacks.py +375 -0
- tnfr/utils/callbacks.pyi +49 -0
- tnfr/utils/chunks.py +108 -0
- tnfr/utils/chunks.pyi +22 -0
- tnfr/utils/data.py +428 -0
- tnfr/utils/data.pyi +74 -0
- tnfr/utils/graph.py +85 -0
- tnfr/utils/graph.pyi +10 -0
- tnfr/utils/init.py +821 -0
- tnfr/utils/init.pyi +80 -0
- tnfr/utils/io.py +559 -0
- tnfr/utils/io.pyi +66 -0
- tnfr/utils/numeric.py +114 -0
- tnfr/utils/numeric.pyi +21 -0
- tnfr/validation/__init__.py +257 -0
- tnfr/validation/__init__.pyi +85 -0
- tnfr/validation/compatibility.py +460 -0
- tnfr/validation/compatibility.pyi +6 -0
- tnfr/validation/config.py +73 -0
- tnfr/validation/graph.py +139 -0
- tnfr/validation/graph.pyi +18 -0
- tnfr/validation/input_validation.py +755 -0
- tnfr/validation/invariants.py +712 -0
- tnfr/validation/rules.py +253 -0
- tnfr/validation/rules.pyi +44 -0
- tnfr/validation/runtime.py +279 -0
- tnfr/validation/runtime.pyi +28 -0
- tnfr/validation/sequence_validator.py +162 -0
- tnfr/validation/soft_filters.py +170 -0
- tnfr/validation/soft_filters.pyi +32 -0
- tnfr/validation/spectral.py +164 -0
- tnfr/validation/spectral.pyi +42 -0
- tnfr/validation/validator.py +1266 -0
- tnfr/validation/window.py +39 -0
- tnfr/validation/window.pyi +1 -0
- tnfr/visualization/__init__.py +98 -0
- tnfr/visualization/cascade_viz.py +256 -0
- tnfr/visualization/hierarchy.py +284 -0
- tnfr/visualization/sequence_plotter.py +784 -0
- tnfr/viz/__init__.py +60 -0
- tnfr/viz/matplotlib.py +278 -0
- tnfr/viz/matplotlib.pyi +35 -0
- tnfr-8.5.0.dist-info/METADATA +573 -0
- tnfr-8.5.0.dist-info/RECORD +353 -0
- tnfr-8.5.0.dist-info/entry_points.txt +3 -0
- tnfr-3.0.3.dist-info/licenses/LICENSE.txt → tnfr-8.5.0.dist-info/licenses/LICENSE.md +1 -1
- tnfr/constants.py +0 -183
- tnfr/dynamics.py +0 -543
- tnfr/helpers.py +0 -198
- tnfr/main.py +0 -37
- tnfr/operators.py +0 -296
- tnfr-3.0.3.dist-info/METADATA +0 -35
- tnfr-3.0.3.dist-info/RECORD +0 -13
- {tnfr-3.0.3.dist-info → tnfr-8.5.0.dist-info}/WHEEL +0 -0
- {tnfr-3.0.3.dist-info → tnfr-8.5.0.dist-info}/top_level.txt +0 -0
tnfr/utils/cache.py
ADDED
|
@@ -0,0 +1,4223 @@
|
|
|
1
|
+
"""Cache infrastructure primitives and graph-level helpers for TNFR.
|
|
2
|
+
|
|
3
|
+
This module consolidates structural cache helpers that previously lived in
|
|
4
|
+
legacy helper modules and are now exposed under :mod:`tnfr.utils`. The
|
|
5
|
+
functions exposed here are responsible for maintaining deterministic node
|
|
6
|
+
digests, scoped graph caches guarded by locks, and version counters that keep
|
|
7
|
+
edge artifacts in sync with ΔNFR driven updates.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
import hashlib
|
|
14
|
+
import hmac
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import pickle
|
|
18
|
+
import shelve
|
|
19
|
+
import threading
|
|
20
|
+
import warnings
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from collections.abc import (
|
|
23
|
+
Callable,
|
|
24
|
+
Hashable,
|
|
25
|
+
Iterable,
|
|
26
|
+
Iterator,
|
|
27
|
+
Mapping,
|
|
28
|
+
MutableMapping,
|
|
29
|
+
)
|
|
30
|
+
from contextlib import contextmanager
|
|
31
|
+
from dataclasses import field
|
|
32
|
+
from enum import Enum
|
|
33
|
+
from functools import lru_cache, wraps
|
|
34
|
+
import sys
|
|
35
|
+
import time
|
|
36
|
+
from time import perf_counter
|
|
37
|
+
from typing import Any, Generic, Optional, TypeVar, cast
|
|
38
|
+
|
|
39
|
+
import networkx as nx
|
|
40
|
+
from cachetools import LRUCache
|
|
41
|
+
|
|
42
|
+
from ..compat.dataclass import dataclass
|
|
43
|
+
|
|
44
|
+
from ..locking import get_lock
|
|
45
|
+
from ..types import GraphLike, NodeId, TimingContext, TNFRGraph
|
|
46
|
+
from .graph import get_graph, mark_dnfr_prep_dirty
|
|
47
|
+
|
|
48
|
+
K = TypeVar("K", bound=Hashable)
|
|
49
|
+
V = TypeVar("V")
|
|
50
|
+
T = TypeVar("T")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SecurityError(RuntimeError):
|
|
54
|
+
"""Raised when a cache payload fails hardened validation."""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class SecurityWarning(UserWarning):
|
|
58
|
+
"""Issued when potentially unsafe serialization is used without signing."""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
__all__ = (
|
|
62
|
+
"CacheLayer",
|
|
63
|
+
"CacheManager",
|
|
64
|
+
"CacheCapacityConfig",
|
|
65
|
+
"CacheStatistics",
|
|
66
|
+
"InstrumentedLRUCache",
|
|
67
|
+
"ManagedLRUCache",
|
|
68
|
+
"MappingCacheLayer",
|
|
69
|
+
"RedisCacheLayer",
|
|
70
|
+
"ShelveCacheLayer",
|
|
71
|
+
"SecurityError",
|
|
72
|
+
"SecurityWarning",
|
|
73
|
+
"create_hmac_signer",
|
|
74
|
+
"create_hmac_validator",
|
|
75
|
+
"create_secure_shelve_layer",
|
|
76
|
+
"create_secure_redis_layer",
|
|
77
|
+
"prune_lock_mapping",
|
|
78
|
+
"EdgeCacheManager",
|
|
79
|
+
"NODE_SET_CHECKSUM_KEY",
|
|
80
|
+
"cached_node_list",
|
|
81
|
+
"cached_nodes_and_A",
|
|
82
|
+
"clear_node_repr_cache",
|
|
83
|
+
"edge_version_cache",
|
|
84
|
+
"edge_version_update",
|
|
85
|
+
"ensure_node_index_map",
|
|
86
|
+
"ensure_node_offset_map",
|
|
87
|
+
"get_graph_version",
|
|
88
|
+
"increment_edge_version",
|
|
89
|
+
"increment_graph_version",
|
|
90
|
+
"node_set_checksum",
|
|
91
|
+
"stable_json",
|
|
92
|
+
"configure_graph_cache_limits",
|
|
93
|
+
"DNFR_PREP_STATE_KEY",
|
|
94
|
+
"DnfrPrepState",
|
|
95
|
+
"build_cache_manager",
|
|
96
|
+
"configure_global_cache_layers",
|
|
97
|
+
"reset_global_cache_manager",
|
|
98
|
+
"_GRAPH_CACHE_LAYERS_KEY",
|
|
99
|
+
"_SeedHashCache",
|
|
100
|
+
"ScopedCounterCache",
|
|
101
|
+
"DnfrCache",
|
|
102
|
+
"new_dnfr_cache",
|
|
103
|
+
# Hierarchical cache classes (moved from caching/)
|
|
104
|
+
"CacheLevel",
|
|
105
|
+
"CacheEntry",
|
|
106
|
+
"TNFRHierarchicalCache",
|
|
107
|
+
# Cache decorators (moved from caching/decorators.py)
|
|
108
|
+
"cache_tnfr_computation",
|
|
109
|
+
"invalidate_function_cache",
|
|
110
|
+
"get_global_cache",
|
|
111
|
+
"set_global_cache",
|
|
112
|
+
"reset_global_cache",
|
|
113
|
+
# Invalidation tracking (moved from caching/invalidation.py)
|
|
114
|
+
"GraphChangeTracker",
|
|
115
|
+
"track_node_property_update",
|
|
116
|
+
# Persistence (moved from caching/persistence.py)
|
|
117
|
+
"PersistentTNFRCache",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
_SIGNATURE_PREFIX = b"TNFRSIG1"
|
|
121
|
+
_SIGN_MODE_RAW = 0
|
|
122
|
+
_SIGN_MODE_PICKLE = 1
|
|
123
|
+
_SIGNATURE_HEADER_SIZE = len(_SIGNATURE_PREFIX) + 1 + 4
|
|
124
|
+
|
|
125
|
+
# Environment variable to control security warnings for pickle deserialization
|
|
126
|
+
_TNFR_ALLOW_UNSIGNED_PICKLE = "TNFR_ALLOW_UNSIGNED_PICKLE"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def create_hmac_signer(secret: bytes | str) -> Callable[[bytes], bytes]:
|
|
130
|
+
"""Create an HMAC-SHA256 signer for cache layer signature validation.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
secret : bytes or str
|
|
135
|
+
The secret key for HMAC signing. If str, it will be encoded as UTF-8.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
callable
|
|
140
|
+
A function that takes payload bytes and returns an HMAC signature.
|
|
141
|
+
|
|
142
|
+
Examples
|
|
143
|
+
--------
|
|
144
|
+
>>> import os
|
|
145
|
+
>>> secret = os.environ.get("TNFR_CACHE_SECRET", "dev-secret-key")
|
|
146
|
+
>>> signer = create_hmac_signer(secret)
|
|
147
|
+
>>> validator = create_hmac_validator(secret)
|
|
148
|
+
>>> layer = ShelveCacheLayer(
|
|
149
|
+
... "cache.db",
|
|
150
|
+
... signer=signer,
|
|
151
|
+
... validator=validator,
|
|
152
|
+
... require_signature=True
|
|
153
|
+
... )
|
|
154
|
+
"""
|
|
155
|
+
secret_bytes = secret if isinstance(secret, bytes) else secret.encode("utf-8")
|
|
156
|
+
|
|
157
|
+
def signer(payload: bytes) -> bytes:
|
|
158
|
+
return hmac.new(secret_bytes, payload, hashlib.sha256).digest()
|
|
159
|
+
|
|
160
|
+
return signer
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def create_hmac_validator(secret: bytes | str) -> Callable[[bytes, bytes], bool]:
|
|
164
|
+
"""Create an HMAC-SHA256 validator for cache layer signature validation.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
secret : bytes or str
|
|
169
|
+
The secret key for HMAC validation. Must match the signer's secret.
|
|
170
|
+
If str, it will be encoded as UTF-8.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
callable
|
|
175
|
+
A function that takes (payload_bytes, signature) and returns True
|
|
176
|
+
if the signature is valid.
|
|
177
|
+
|
|
178
|
+
See Also
|
|
179
|
+
--------
|
|
180
|
+
create_hmac_signer : Create the corresponding signer.
|
|
181
|
+
"""
|
|
182
|
+
secret_bytes = secret if isinstance(secret, bytes) else secret.encode("utf-8")
|
|
183
|
+
|
|
184
|
+
def validator(payload: bytes, signature: bytes) -> bool:
|
|
185
|
+
expected = hmac.new(secret_bytes, payload, hashlib.sha256).digest()
|
|
186
|
+
return hmac.compare_digest(expected, signature)
|
|
187
|
+
|
|
188
|
+
return validator
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def create_secure_shelve_layer(
|
|
192
|
+
path: str,
|
|
193
|
+
secret: bytes | str | None = None,
|
|
194
|
+
*,
|
|
195
|
+
flag: str = "c",
|
|
196
|
+
protocol: int | None = None,
|
|
197
|
+
writeback: bool = False,
|
|
198
|
+
) -> ShelveCacheLayer:
|
|
199
|
+
"""Create a ShelveCacheLayer with HMAC signature validation enabled.
|
|
200
|
+
|
|
201
|
+
This is the recommended way to create persistent cache layers that handle
|
|
202
|
+
TNFR structures (EPI, NFR, NetworkX graphs). Signature validation protects
|
|
203
|
+
against arbitrary code execution from tampered pickle data.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
path : str
|
|
208
|
+
Path to the shelve database file.
|
|
209
|
+
secret : bytes, str, or None
|
|
210
|
+
Secret key for HMAC signing. If None, reads from TNFR_CACHE_SECRET
|
|
211
|
+
environment variable. In production, **always** set this via environment.
|
|
212
|
+
flag : str, default='c'
|
|
213
|
+
Shelve open flag ('r', 'w', 'c', 'n').
|
|
214
|
+
protocol : int, optional
|
|
215
|
+
Pickle protocol version. Defaults to pickle.HIGHEST_PROTOCOL.
|
|
216
|
+
writeback : bool, default=False
|
|
217
|
+
Enable shelve writeback mode.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
ShelveCacheLayer
|
|
222
|
+
A cache layer with signature validation enabled.
|
|
223
|
+
|
|
224
|
+
Raises
|
|
225
|
+
------
|
|
226
|
+
ValueError
|
|
227
|
+
If no secret is provided and TNFR_CACHE_SECRET is not set.
|
|
228
|
+
|
|
229
|
+
Examples
|
|
230
|
+
--------
|
|
231
|
+
>>> # In production, set environment variable:
|
|
232
|
+
>>> # export TNFR_CACHE_SECRET="your-secure-random-key"
|
|
233
|
+
>>>
|
|
234
|
+
>>> layer = create_secure_shelve_layer("coherence.db")
|
|
235
|
+
>>> # Or explicitly provide secret:
|
|
236
|
+
>>> layer = create_secure_shelve_layer("coherence.db", secret=b"my-secret")
|
|
237
|
+
"""
|
|
238
|
+
if secret is None:
|
|
239
|
+
secret = os.environ.get("TNFR_CACHE_SECRET")
|
|
240
|
+
if not secret:
|
|
241
|
+
raise ValueError(
|
|
242
|
+
"Secret required for secure cache layer. "
|
|
243
|
+
"Set TNFR_CACHE_SECRET environment variable or pass secret parameter."
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
signer = create_hmac_signer(secret)
|
|
247
|
+
validator = create_hmac_validator(secret)
|
|
248
|
+
|
|
249
|
+
return ShelveCacheLayer(
|
|
250
|
+
path,
|
|
251
|
+
flag=flag,
|
|
252
|
+
protocol=protocol,
|
|
253
|
+
writeback=writeback,
|
|
254
|
+
signer=signer,
|
|
255
|
+
validator=validator,
|
|
256
|
+
require_signature=True,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def create_secure_redis_layer(
|
|
261
|
+
client: Any | None = None,
|
|
262
|
+
secret: bytes | str | None = None,
|
|
263
|
+
*,
|
|
264
|
+
namespace: str = "tnfr:cache",
|
|
265
|
+
protocol: int | None = None,
|
|
266
|
+
) -> RedisCacheLayer:
|
|
267
|
+
"""Create a RedisCacheLayer with HMAC signature validation enabled.
|
|
268
|
+
|
|
269
|
+
This is the recommended way to create distributed cache layers for TNFR.
|
|
270
|
+
Signature validation protects against arbitrary code execution if Redis
|
|
271
|
+
is compromised or contains tampered data.
|
|
272
|
+
|
|
273
|
+
Parameters
|
|
274
|
+
----------
|
|
275
|
+
client : redis.Redis, optional
|
|
276
|
+
Redis client instance. If None, creates default client.
|
|
277
|
+
secret : bytes, str, or None
|
|
278
|
+
Secret key for HMAC signing. If None, reads from TNFR_CACHE_SECRET
|
|
279
|
+
environment variable.
|
|
280
|
+
namespace : str, default='tnfr:cache'
|
|
281
|
+
Redis key namespace prefix.
|
|
282
|
+
protocol : int, optional
|
|
283
|
+
Pickle protocol version.
|
|
284
|
+
|
|
285
|
+
Returns
|
|
286
|
+
-------
|
|
287
|
+
RedisCacheLayer
|
|
288
|
+
A cache layer with signature validation enabled.
|
|
289
|
+
|
|
290
|
+
Raises
|
|
291
|
+
------
|
|
292
|
+
ValueError
|
|
293
|
+
If no secret is provided and TNFR_CACHE_SECRET is not set.
|
|
294
|
+
|
|
295
|
+
Examples
|
|
296
|
+
--------
|
|
297
|
+
>>> # Set environment variable in production:
|
|
298
|
+
>>> # export TNFR_CACHE_SECRET="your-secure-random-key"
|
|
299
|
+
>>>
|
|
300
|
+
>>> layer = create_secure_redis_layer()
|
|
301
|
+
>>> # Or with explicit configuration:
|
|
302
|
+
>>> import redis
|
|
303
|
+
>>> client = redis.Redis(host='localhost', port=6379)
|
|
304
|
+
>>> layer = create_secure_redis_layer(client, secret=b"my-secret")
|
|
305
|
+
"""
|
|
306
|
+
if secret is None:
|
|
307
|
+
secret = os.environ.get("TNFR_CACHE_SECRET")
|
|
308
|
+
if not secret:
|
|
309
|
+
raise ValueError(
|
|
310
|
+
"Secret required for secure cache layer. "
|
|
311
|
+
"Set TNFR_CACHE_SECRET environment variable or pass secret parameter."
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
signer = create_hmac_signer(secret)
|
|
315
|
+
validator = create_hmac_validator(secret)
|
|
316
|
+
|
|
317
|
+
return RedisCacheLayer(
|
|
318
|
+
client=client,
|
|
319
|
+
namespace=namespace,
|
|
320
|
+
signer=signer,
|
|
321
|
+
validator=validator,
|
|
322
|
+
require_signature=True,
|
|
323
|
+
protocol=protocol,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _prepare_payload_bytes(value: Any, *, protocol: int) -> tuple[int, bytes]:
|
|
328
|
+
"""Return payload encoding mode and the bytes that should be signed."""
|
|
329
|
+
|
|
330
|
+
if isinstance(value, (bytes, bytearray, memoryview)):
|
|
331
|
+
return _SIGN_MODE_RAW, bytes(value)
|
|
332
|
+
return _SIGN_MODE_PICKLE, pickle.dumps(value, protocol=protocol)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _pack_signed_envelope(mode: int, payload: bytes, signature: bytes) -> bytes:
|
|
336
|
+
"""Pack payload and signature into a self-describing binary envelope."""
|
|
337
|
+
|
|
338
|
+
if not (0 <= mode <= 255): # pragma: no cover - defensive guard
|
|
339
|
+
raise ValueError(f"invalid payload mode: {mode}")
|
|
340
|
+
signature_length = len(signature)
|
|
341
|
+
if signature_length >= 2**32: # pragma: no cover - defensive guard
|
|
342
|
+
raise ValueError("signature too large to encode")
|
|
343
|
+
header = (
|
|
344
|
+
_SIGNATURE_PREFIX
|
|
345
|
+
+ bytes([mode])
|
|
346
|
+
+ signature_length.to_bytes(4, byteorder="big", signed=False)
|
|
347
|
+
)
|
|
348
|
+
return header + signature + payload
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _is_signed_envelope(blob: bytes) -> bool:
|
|
352
|
+
"""Return ``True`` when *blob* represents a signed cache entry."""
|
|
353
|
+
|
|
354
|
+
return blob.startswith(_SIGNATURE_PREFIX)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _unpack_signed_envelope(blob: bytes) -> tuple[int, bytes, bytes]:
|
|
358
|
+
"""Return the ``(mode, signature, payload)`` triple encoded in *blob*."""
|
|
359
|
+
|
|
360
|
+
if len(blob) < _SIGNATURE_HEADER_SIZE:
|
|
361
|
+
raise SecurityError("signed payload header truncated")
|
|
362
|
+
if not _is_signed_envelope(blob):
|
|
363
|
+
raise SecurityError("missing signed payload marker")
|
|
364
|
+
mode = blob[len(_SIGNATURE_PREFIX)]
|
|
365
|
+
sig_start = len(_SIGNATURE_PREFIX) + 1
|
|
366
|
+
sig_len = int.from_bytes(blob[sig_start : sig_start + 4], byteorder="big")
|
|
367
|
+
payload_start = sig_start + 4 + sig_len
|
|
368
|
+
if len(blob) < payload_start:
|
|
369
|
+
raise SecurityError("signed payload signature truncated")
|
|
370
|
+
signature = blob[sig_start + 4 : payload_start]
|
|
371
|
+
payload = blob[payload_start:]
|
|
372
|
+
return mode, signature, payload
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _decode_payload(mode: int, payload: bytes) -> Any:
|
|
376
|
+
"""Decode payload bytes depending on cache encoding *mode*."""
|
|
377
|
+
|
|
378
|
+
if mode == _SIGN_MODE_RAW:
|
|
379
|
+
return payload
|
|
380
|
+
if mode == _SIGN_MODE_PICKLE:
|
|
381
|
+
return pickle.loads(payload) # nosec B301 - validated via signature
|
|
382
|
+
raise SecurityError(f"unknown payload encoding mode: {mode}")
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
@dataclass(frozen=True)
|
|
386
|
+
class CacheCapacityConfig:
|
|
387
|
+
"""Configuration snapshot for cache capacity policies."""
|
|
388
|
+
|
|
389
|
+
default_capacity: int | None
|
|
390
|
+
overrides: dict[str, int | None]
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
@dataclass(frozen=True)
|
|
394
|
+
class CacheStatistics:
|
|
395
|
+
"""Immutable snapshot of cache telemetry counters."""
|
|
396
|
+
|
|
397
|
+
hits: int = 0
|
|
398
|
+
misses: int = 0
|
|
399
|
+
evictions: int = 0
|
|
400
|
+
total_time: float = 0.0
|
|
401
|
+
timings: int = 0
|
|
402
|
+
|
|
403
|
+
def merge(self, other: CacheStatistics) -> CacheStatistics:
|
|
404
|
+
"""Return aggregated metrics combining ``self`` and ``other``."""
|
|
405
|
+
|
|
406
|
+
return CacheStatistics(
|
|
407
|
+
hits=self.hits + other.hits,
|
|
408
|
+
misses=self.misses + other.misses,
|
|
409
|
+
evictions=self.evictions + other.evictions,
|
|
410
|
+
total_time=self.total_time + other.total_time,
|
|
411
|
+
timings=self.timings + other.timings,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
@dataclass
|
|
416
|
+
class DnfrCache:
|
|
417
|
+
idx: dict[Any, int]
|
|
418
|
+
theta: list[float]
|
|
419
|
+
epi: list[float]
|
|
420
|
+
vf: list[float]
|
|
421
|
+
cos_theta: list[float]
|
|
422
|
+
sin_theta: list[float]
|
|
423
|
+
neighbor_x: list[float]
|
|
424
|
+
neighbor_y: list[float]
|
|
425
|
+
neighbor_epi_sum: list[float]
|
|
426
|
+
neighbor_vf_sum: list[float]
|
|
427
|
+
neighbor_count: list[float]
|
|
428
|
+
neighbor_deg_sum: list[float] | None
|
|
429
|
+
th_bar: list[float] | None = None
|
|
430
|
+
epi_bar: list[float] | None = None
|
|
431
|
+
vf_bar: list[float] | None = None
|
|
432
|
+
deg_bar: list[float] | None = None
|
|
433
|
+
degs: dict[Any, float] | None = None
|
|
434
|
+
deg_list: list[float] | None = None
|
|
435
|
+
theta_np: Any | None = None
|
|
436
|
+
epi_np: Any | None = None
|
|
437
|
+
vf_np: Any | None = None
|
|
438
|
+
cos_theta_np: Any | None = None
|
|
439
|
+
sin_theta_np: Any | None = None
|
|
440
|
+
deg_array: Any | None = None
|
|
441
|
+
edge_src: Any | None = None
|
|
442
|
+
edge_dst: Any | None = None
|
|
443
|
+
checksum: Any | None = None
|
|
444
|
+
neighbor_x_np: Any | None = None
|
|
445
|
+
neighbor_y_np: Any | None = None
|
|
446
|
+
neighbor_epi_sum_np: Any | None = None
|
|
447
|
+
neighbor_vf_sum_np: Any | None = None
|
|
448
|
+
neighbor_count_np: Any | None = None
|
|
449
|
+
neighbor_deg_sum_np: Any | None = None
|
|
450
|
+
th_bar_np: Any | None = None
|
|
451
|
+
epi_bar_np: Any | None = None
|
|
452
|
+
vf_bar_np: Any | None = None
|
|
453
|
+
deg_bar_np: Any | None = None
|
|
454
|
+
grad_phase_np: Any | None = None
|
|
455
|
+
grad_epi_np: Any | None = None
|
|
456
|
+
grad_vf_np: Any | None = None
|
|
457
|
+
grad_topo_np: Any | None = None
|
|
458
|
+
grad_total_np: Any | None = None
|
|
459
|
+
dense_components_np: Any | None = None
|
|
460
|
+
dense_accum_np: Any | None = None
|
|
461
|
+
dense_degree_np: Any | None = None
|
|
462
|
+
neighbor_accum_np: Any | None = None
|
|
463
|
+
neighbor_inv_count_np: Any | None = None
|
|
464
|
+
neighbor_cos_avg_np: Any | None = None
|
|
465
|
+
neighbor_sin_avg_np: Any | None = None
|
|
466
|
+
neighbor_mean_tmp_np: Any | None = None
|
|
467
|
+
neighbor_mean_length_np: Any | None = None
|
|
468
|
+
edge_signature: Any | None = None
|
|
469
|
+
neighbor_accum_signature: Any | None = None
|
|
470
|
+
neighbor_edge_values_np: Any | None = None
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def new_dnfr_cache() -> DnfrCache:
|
|
474
|
+
"""Return an empty :class:`DnfrCache` prepared for ΔNFR orchestration."""
|
|
475
|
+
|
|
476
|
+
return DnfrCache(
|
|
477
|
+
idx={},
|
|
478
|
+
theta=[],
|
|
479
|
+
epi=[],
|
|
480
|
+
vf=[],
|
|
481
|
+
cos_theta=[],
|
|
482
|
+
sin_theta=[],
|
|
483
|
+
neighbor_x=[],
|
|
484
|
+
neighbor_y=[],
|
|
485
|
+
neighbor_epi_sum=[],
|
|
486
|
+
neighbor_vf_sum=[],
|
|
487
|
+
neighbor_count=[],
|
|
488
|
+
neighbor_deg_sum=[],
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
@dataclass
|
|
493
|
+
class _CacheMetrics:
|
|
494
|
+
hits: int = 0
|
|
495
|
+
misses: int = 0
|
|
496
|
+
evictions: int = 0
|
|
497
|
+
total_time: float = 0.0
|
|
498
|
+
timings: int = 0
|
|
499
|
+
lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
|
|
500
|
+
|
|
501
|
+
def snapshot(self) -> CacheStatistics:
|
|
502
|
+
return CacheStatistics(
|
|
503
|
+
hits=self.hits,
|
|
504
|
+
misses=self.misses,
|
|
505
|
+
evictions=self.evictions,
|
|
506
|
+
total_time=self.total_time,
|
|
507
|
+
timings=self.timings,
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
@dataclass
|
|
512
|
+
class _CacheEntry:
|
|
513
|
+
factory: Callable[[], Any]
|
|
514
|
+
lock: threading.Lock
|
|
515
|
+
reset: Callable[[Any], Any] | None = None
|
|
516
|
+
encoder: Callable[[Any], Any] | None = None
|
|
517
|
+
decoder: Callable[[Any], Any] | None = None
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
class CacheLayer(ABC):
|
|
521
|
+
"""Abstract interface implemented by storage backends orchestrated by :class:`CacheManager`."""
|
|
522
|
+
|
|
523
|
+
@abstractmethod
|
|
524
|
+
def load(self, name: str) -> Any:
|
|
525
|
+
"""Return the stored payload for ``name`` or raise :class:`KeyError`."""
|
|
526
|
+
|
|
527
|
+
@abstractmethod
|
|
528
|
+
def store(self, name: str, value: Any) -> None:
|
|
529
|
+
"""Persist ``value`` under ``name``."""
|
|
530
|
+
|
|
531
|
+
@abstractmethod
|
|
532
|
+
def delete(self, name: str) -> None:
|
|
533
|
+
"""Remove ``name`` from the backend if present."""
|
|
534
|
+
|
|
535
|
+
@abstractmethod
|
|
536
|
+
def clear(self) -> None:
|
|
537
|
+
"""Remove every entry maintained by the layer."""
|
|
538
|
+
|
|
539
|
+
def close(self) -> None: # pragma: no cover - optional hook
|
|
540
|
+
"""Release resources held by the backend."""
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
class MappingCacheLayer(CacheLayer):
|
|
544
|
+
"""In-memory cache layer backed by a mutable mapping."""
|
|
545
|
+
|
|
546
|
+
def __init__(self, storage: MutableMapping[str, Any] | None = None) -> None:
|
|
547
|
+
self._storage: MutableMapping[str, Any] = {} if storage is None else storage
|
|
548
|
+
self._lock = threading.RLock()
|
|
549
|
+
|
|
550
|
+
@property
|
|
551
|
+
def storage(self) -> MutableMapping[str, Any]:
|
|
552
|
+
"""Return the mapping used to store cache entries."""
|
|
553
|
+
|
|
554
|
+
return self._storage
|
|
555
|
+
|
|
556
|
+
def load(self, name: str) -> Any:
|
|
557
|
+
with self._lock:
|
|
558
|
+
if name not in self._storage:
|
|
559
|
+
raise KeyError(name)
|
|
560
|
+
return self._storage[name]
|
|
561
|
+
|
|
562
|
+
def store(self, name: str, value: Any) -> None:
|
|
563
|
+
with self._lock:
|
|
564
|
+
self._storage[name] = value
|
|
565
|
+
|
|
566
|
+
def delete(self, name: str) -> None:
|
|
567
|
+
with self._lock:
|
|
568
|
+
self._storage.pop(name, None)
|
|
569
|
+
|
|
570
|
+
def clear(self) -> None:
|
|
571
|
+
with self._lock:
|
|
572
|
+
self._storage.clear()
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
class ShelveCacheLayer(CacheLayer):
|
|
576
|
+
"""Persistent cache layer backed by :mod:`shelve`.
|
|
577
|
+
|
|
578
|
+
.. warning::
|
|
579
|
+
This layer uses :mod:`pickle` for serialization, which can deserialize
|
|
580
|
+
arbitrary Python objects and execute code during deserialization.
|
|
581
|
+
**Only use with trusted data** from controlled sources. Never load
|
|
582
|
+
shelf files from untrusted origins without cryptographic verification.
|
|
583
|
+
|
|
584
|
+
Pickle is required for TNFR's complex structures (NetworkX graphs, EPIs,
|
|
585
|
+
coherence states, numpy arrays). For untrusted inputs, enable
|
|
586
|
+
:term:`HMAC` or equivalent signing via ``signer``/``validator`` and
|
|
587
|
+
set ``require_signature=True`` to reject tampered payloads.
|
|
588
|
+
|
|
589
|
+
:param signer: Optional callable that receives payload bytes and returns a
|
|
590
|
+
signature (for example ``lambda payload: hmac.new(key, payload,
|
|
591
|
+
hashlib.sha256).digest()``).
|
|
592
|
+
:param validator: Optional callable that receives ``(payload_bytes,
|
|
593
|
+
signature)`` and returns ``True`` when the payload is trustworthy.
|
|
594
|
+
:param require_signature: When ``True`` the cache operates in hardened
|
|
595
|
+
mode, deleting entries whose signatures are missing or invalid and
|
|
596
|
+
raising :class:`SecurityError`.
|
|
597
|
+
"""
|
|
598
|
+
|
|
599
|
+
def __init__(
|
|
600
|
+
self,
|
|
601
|
+
path: str,
|
|
602
|
+
*,
|
|
603
|
+
flag: str = "c",
|
|
604
|
+
protocol: int | None = None,
|
|
605
|
+
writeback: bool = False,
|
|
606
|
+
signer: Callable[[bytes], bytes] | None = None,
|
|
607
|
+
validator: Callable[[bytes, bytes], bool] | None = None,
|
|
608
|
+
require_signature: bool = False,
|
|
609
|
+
) -> None:
|
|
610
|
+
# Validate cache file path to prevent path traversal
|
|
611
|
+
from ..security import validate_file_path, PathTraversalError
|
|
612
|
+
|
|
613
|
+
try:
|
|
614
|
+
validated_path = validate_file_path(
|
|
615
|
+
path,
|
|
616
|
+
allow_absolute=True,
|
|
617
|
+
allowed_extensions=None, # Shelve creates multiple files with various extensions
|
|
618
|
+
)
|
|
619
|
+
self._path = str(validated_path)
|
|
620
|
+
except (ValueError, PathTraversalError) as e:
|
|
621
|
+
raise ValueError(f"Invalid cache path {path!r}: {e}") from e
|
|
622
|
+
|
|
623
|
+
self._flag = flag
|
|
624
|
+
self._protocol = pickle.HIGHEST_PROTOCOL if protocol is None else protocol
|
|
625
|
+
# shelve module inherently uses pickle for serialization; security risks documented in class docstring
|
|
626
|
+
self._shelf = shelve.open(
|
|
627
|
+
self._path, flag=flag, protocol=self._protocol, writeback=writeback
|
|
628
|
+
) # nosec B301
|
|
629
|
+
self._lock = threading.RLock()
|
|
630
|
+
self._signer = signer
|
|
631
|
+
self._validator = validator
|
|
632
|
+
self._require_signature = require_signature
|
|
633
|
+
if require_signature and (signer is None or validator is None):
|
|
634
|
+
raise ValueError(
|
|
635
|
+
"require_signature=True requires both signer and validator"
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
# Issue security warning when using unsigned pickle deserialization
|
|
639
|
+
if not require_signature and os.environ.get(_TNFR_ALLOW_UNSIGNED_PICKLE) != "1":
|
|
640
|
+
warnings.warn(
|
|
641
|
+
f"ShelveCacheLayer at {path!r} uses pickle without signature validation. "
|
|
642
|
+
"This can execute arbitrary code during deserialization. "
|
|
643
|
+
"Use create_secure_shelve_layer() or set require_signature=True with signer/validator. "
|
|
644
|
+
f"To suppress this warning, set {_TNFR_ALLOW_UNSIGNED_PICKLE}=1 environment variable.",
|
|
645
|
+
SecurityWarning,
|
|
646
|
+
stacklevel=2,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
def load(self, name: str) -> Any:
|
|
650
|
+
with self._lock:
|
|
651
|
+
if name not in self._shelf:
|
|
652
|
+
raise KeyError(name)
|
|
653
|
+
entry = self._shelf[name]
|
|
654
|
+
|
|
655
|
+
return self._decode_entry(name, entry)
|
|
656
|
+
|
|
657
|
+
def store(self, name: str, value: Any) -> None:
|
|
658
|
+
if self._signer is None:
|
|
659
|
+
stored_value: Any = value
|
|
660
|
+
else:
|
|
661
|
+
mode, payload = _prepare_payload_bytes(value, protocol=self._protocol)
|
|
662
|
+
signature = self._signer(payload)
|
|
663
|
+
stored_value = _pack_signed_envelope(mode, payload, signature)
|
|
664
|
+
with self._lock:
|
|
665
|
+
self._shelf[name] = stored_value
|
|
666
|
+
self._shelf.sync()
|
|
667
|
+
|
|
668
|
+
def delete(self, name: str) -> None:
|
|
669
|
+
with self._lock:
|
|
670
|
+
try:
|
|
671
|
+
del self._shelf[name]
|
|
672
|
+
except KeyError:
|
|
673
|
+
return
|
|
674
|
+
self._shelf.sync()
|
|
675
|
+
|
|
676
|
+
def clear(self) -> None:
|
|
677
|
+
with self._lock:
|
|
678
|
+
self._shelf.clear()
|
|
679
|
+
self._shelf.sync()
|
|
680
|
+
|
|
681
|
+
def close(self) -> None: # pragma: no cover - exercised indirectly
|
|
682
|
+
with self._lock:
|
|
683
|
+
self._shelf.close()
|
|
684
|
+
|
|
685
|
+
def _decode_entry(self, name: str, entry: Any) -> Any:
|
|
686
|
+
if isinstance(entry, (bytes, bytearray, memoryview)):
|
|
687
|
+
blob = bytes(entry)
|
|
688
|
+
if _is_signed_envelope(blob):
|
|
689
|
+
try:
|
|
690
|
+
mode, signature, payload = _unpack_signed_envelope(blob)
|
|
691
|
+
except SecurityError:
|
|
692
|
+
self.delete(name)
|
|
693
|
+
raise
|
|
694
|
+
validator = self._validator
|
|
695
|
+
if validator is None:
|
|
696
|
+
if self._require_signature:
|
|
697
|
+
self.delete(name)
|
|
698
|
+
raise SecurityError(
|
|
699
|
+
"signature validation requested but no validator configured"
|
|
700
|
+
)
|
|
701
|
+
else:
|
|
702
|
+
try:
|
|
703
|
+
valid = validator(payload, signature)
|
|
704
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
705
|
+
self.delete(name)
|
|
706
|
+
raise SecurityError(
|
|
707
|
+
"signature validator raised an exception"
|
|
708
|
+
) from exc
|
|
709
|
+
if not valid:
|
|
710
|
+
self.delete(name)
|
|
711
|
+
raise SecurityError(
|
|
712
|
+
f"signature validation failed for cache entry {name!r}"
|
|
713
|
+
)
|
|
714
|
+
try:
|
|
715
|
+
return _decode_payload(mode, payload)
|
|
716
|
+
except Exception as exc:
|
|
717
|
+
self.delete(name)
|
|
718
|
+
raise SecurityError("signed payload decode failure") from exc
|
|
719
|
+
if self._require_signature:
|
|
720
|
+
self.delete(name)
|
|
721
|
+
raise SecurityError(f"unsigned cache entry rejected: {name}")
|
|
722
|
+
return blob
|
|
723
|
+
if self._require_signature:
|
|
724
|
+
self.delete(name)
|
|
725
|
+
raise SecurityError(f"unsigned cache entry rejected: {name}")
|
|
726
|
+
return entry
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
class RedisCacheLayer(CacheLayer):
|
|
730
|
+
"""Distributed cache layer backed by a Redis client.
|
|
731
|
+
|
|
732
|
+
.. warning::
|
|
733
|
+
This layer uses :mod:`pickle` for serialization, which can deserialize
|
|
734
|
+
arbitrary Python objects and execute code during deserialization.
|
|
735
|
+
**Only cache trusted data** from controlled TNFR nodes. Ensure Redis
|
|
736
|
+
uses authentication (AUTH command or ACL for Redis 6.0+) and network
|
|
737
|
+
access controls. Never cache untrusted user input or external data.
|
|
738
|
+
|
|
739
|
+
If Redis is compromised or contains tampered data, pickle deserialization
|
|
740
|
+
executes arbitrary code. Use TLS for connections and enable signature
|
|
741
|
+
validation (``signer``/``validator`` with ``require_signature=True``)
|
|
742
|
+
in high-assurance deployments.
|
|
743
|
+
|
|
744
|
+
:param signer: Optional callable that produces a signature for payload bytes
|
|
745
|
+
before they are written to Redis.
|
|
746
|
+
:param validator: Optional callable that validates ``(payload_bytes,
|
|
747
|
+
signature)`` during loads.
|
|
748
|
+
:param require_signature: Enable hardened mode that deletes and rejects
|
|
749
|
+
cache entries whose signatures are missing or invalid, raising
|
|
750
|
+
:class:`SecurityError`.
|
|
751
|
+
"""
|
|
752
|
+
|
|
753
|
+
def __init__(
|
|
754
|
+
self,
|
|
755
|
+
client: Any | None = None,
|
|
756
|
+
*,
|
|
757
|
+
namespace: str = "tnfr:cache",
|
|
758
|
+
signer: Callable[[bytes], bytes] | None = None,
|
|
759
|
+
validator: Callable[[bytes, bytes], bool] | None = None,
|
|
760
|
+
require_signature: bool = False,
|
|
761
|
+
protocol: int | None = None,
|
|
762
|
+
) -> None:
|
|
763
|
+
if client is None:
|
|
764
|
+
try: # pragma: no cover - import guarded for optional dependency
|
|
765
|
+
import redis # type: ignore
|
|
766
|
+
except Exception as exc: # pragma: no cover - defensive import
|
|
767
|
+
raise RuntimeError(
|
|
768
|
+
"redis-py is required to initialise RedisCacheLayer"
|
|
769
|
+
) from exc
|
|
770
|
+
client = redis.Redis()
|
|
771
|
+
self._client = client
|
|
772
|
+
self._namespace = namespace.rstrip(":") or "tnfr:cache"
|
|
773
|
+
self._lock = threading.RLock()
|
|
774
|
+
self._signer = signer
|
|
775
|
+
self._validator = validator
|
|
776
|
+
self._require_signature = require_signature
|
|
777
|
+
self._protocol = pickle.HIGHEST_PROTOCOL if protocol is None else protocol
|
|
778
|
+
if require_signature and (signer is None or validator is None):
|
|
779
|
+
raise ValueError(
|
|
780
|
+
"require_signature=True requires both signer and validator"
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
# Issue security warning when using unsigned pickle deserialization
|
|
784
|
+
if not require_signature and os.environ.get(_TNFR_ALLOW_UNSIGNED_PICKLE) != "1":
|
|
785
|
+
warnings.warn(
|
|
786
|
+
f"RedisCacheLayer with namespace {namespace!r} uses pickle without signature validation. "
|
|
787
|
+
"This can execute arbitrary code if Redis is compromised. "
|
|
788
|
+
"Use create_secure_redis_layer() or set require_signature=True with signer/validator. "
|
|
789
|
+
f"To suppress this warning, set {_TNFR_ALLOW_UNSIGNED_PICKLE}=1 environment variable.",
|
|
790
|
+
SecurityWarning,
|
|
791
|
+
stacklevel=2,
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
def _format_key(self, name: str) -> str:
|
|
795
|
+
return f"{self._namespace}:{name}"
|
|
796
|
+
|
|
797
|
+
def load(self, name: str) -> Any:
|
|
798
|
+
key = self._format_key(name)
|
|
799
|
+
with self._lock:
|
|
800
|
+
value = self._client.get(key)
|
|
801
|
+
if value is None:
|
|
802
|
+
raise KeyError(name)
|
|
803
|
+
if isinstance(value, (bytes, bytearray, memoryview)):
|
|
804
|
+
blob = bytes(value)
|
|
805
|
+
if _is_signed_envelope(blob):
|
|
806
|
+
try:
|
|
807
|
+
mode, signature, payload = _unpack_signed_envelope(blob)
|
|
808
|
+
except SecurityError:
|
|
809
|
+
self.delete(name)
|
|
810
|
+
raise
|
|
811
|
+
validator = self._validator
|
|
812
|
+
if validator is None:
|
|
813
|
+
if self._require_signature:
|
|
814
|
+
self.delete(name)
|
|
815
|
+
raise SecurityError(
|
|
816
|
+
"signature validation requested but no validator configured"
|
|
817
|
+
)
|
|
818
|
+
else:
|
|
819
|
+
try:
|
|
820
|
+
valid = validator(payload, signature)
|
|
821
|
+
except Exception as exc: # pragma: no cover - defensive
|
|
822
|
+
self.delete(name)
|
|
823
|
+
raise SecurityError(
|
|
824
|
+
"signature validator raised an exception"
|
|
825
|
+
) from exc
|
|
826
|
+
if not valid:
|
|
827
|
+
self.delete(name)
|
|
828
|
+
raise SecurityError(
|
|
829
|
+
f"signature validation failed for cache entry {name!r}"
|
|
830
|
+
)
|
|
831
|
+
try:
|
|
832
|
+
return _decode_payload(mode, payload)
|
|
833
|
+
except Exception as exc:
|
|
834
|
+
self.delete(name)
|
|
835
|
+
raise SecurityError("signed payload decode failure") from exc
|
|
836
|
+
if self._require_signature:
|
|
837
|
+
self.delete(name)
|
|
838
|
+
raise SecurityError(f"unsigned cache entry rejected: {name}")
|
|
839
|
+
# pickle from trusted Redis; documented security warning in class docstring
|
|
840
|
+
return pickle.loads(blob) # nosec B301
|
|
841
|
+
return value
|
|
842
|
+
|
|
843
|
+
def store(self, name: str, value: Any) -> None:
|
|
844
|
+
key = self._format_key(name)
|
|
845
|
+
if self._signer is None:
|
|
846
|
+
payload: Any = value
|
|
847
|
+
if not isinstance(value, (bytes, bytearray, memoryview)):
|
|
848
|
+
payload = pickle.dumps(value, protocol=self._protocol)
|
|
849
|
+
else:
|
|
850
|
+
mode, payload_bytes = _prepare_payload_bytes(value, protocol=self._protocol)
|
|
851
|
+
signature = self._signer(payload_bytes)
|
|
852
|
+
payload = _pack_signed_envelope(mode, payload_bytes, signature)
|
|
853
|
+
with self._lock:
|
|
854
|
+
self._client.set(key, payload)
|
|
855
|
+
|
|
856
|
+
def delete(self, name: str) -> None:
|
|
857
|
+
key = self._format_key(name)
|
|
858
|
+
with self._lock:
|
|
859
|
+
self._client.delete(key)
|
|
860
|
+
|
|
861
|
+
def clear(self) -> None:
|
|
862
|
+
pattern = f"{self._namespace}:*"
|
|
863
|
+
with self._lock:
|
|
864
|
+
if hasattr(self._client, "scan_iter"):
|
|
865
|
+
keys = list(self._client.scan_iter(match=pattern))
|
|
866
|
+
elif hasattr(self._client, "keys"):
|
|
867
|
+
keys = list(self._client.keys(pattern))
|
|
868
|
+
else: # pragma: no cover - extremely defensive
|
|
869
|
+
keys = []
|
|
870
|
+
if keys:
|
|
871
|
+
self._client.delete(*keys)
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
class CacheManager:
|
|
875
|
+
"""Coordinate named caches guarded by per-entry locks."""
|
|
876
|
+
|
|
877
|
+
_MISSING = object()
|
|
878
|
+
|
|
879
|
+
def __init__(
|
|
880
|
+
self,
|
|
881
|
+
storage: MutableMapping[str, Any] | None = None,
|
|
882
|
+
*,
|
|
883
|
+
default_capacity: int | None = None,
|
|
884
|
+
overrides: Mapping[str, int | None] | None = None,
|
|
885
|
+
layers: Iterable[CacheLayer] | None = None,
|
|
886
|
+
) -> None:
|
|
887
|
+
mapping_layer = MappingCacheLayer(storage)
|
|
888
|
+
extra_layers: tuple[CacheLayer, ...]
|
|
889
|
+
if layers is None:
|
|
890
|
+
extra_layers = ()
|
|
891
|
+
else:
|
|
892
|
+
extra_layers = tuple(layers)
|
|
893
|
+
for layer in extra_layers:
|
|
894
|
+
if not isinstance(
|
|
895
|
+
layer, CacheLayer
|
|
896
|
+
): # pragma: no cover - defensive typing
|
|
897
|
+
raise TypeError(f"unsupported cache layer type: {type(layer)!r}")
|
|
898
|
+
self._layers: tuple[CacheLayer, ...] = (mapping_layer, *extra_layers)
|
|
899
|
+
self._storage_layer = mapping_layer
|
|
900
|
+
self._storage: MutableMapping[str, Any] = mapping_layer.storage
|
|
901
|
+
self._entries: dict[str, _CacheEntry] = {}
|
|
902
|
+
self._registry_lock = threading.RLock()
|
|
903
|
+
self._default_capacity = self._normalise_capacity(default_capacity)
|
|
904
|
+
self._capacity_overrides: dict[str, int | None] = {}
|
|
905
|
+
self._metrics: dict[str, _CacheMetrics] = {}
|
|
906
|
+
self._metrics_publishers: list[Callable[[str, CacheStatistics], None]] = []
|
|
907
|
+
if overrides:
|
|
908
|
+
self.configure(overrides=overrides)
|
|
909
|
+
|
|
910
|
+
@staticmethod
|
|
911
|
+
def _normalise_capacity(value: int | None) -> int | None:
|
|
912
|
+
if value is None:
|
|
913
|
+
return None
|
|
914
|
+
size = int(value)
|
|
915
|
+
if size < 0:
|
|
916
|
+
raise ValueError("capacity must be non-negative or None")
|
|
917
|
+
return size
|
|
918
|
+
|
|
919
|
+
def register(
|
|
920
|
+
self,
|
|
921
|
+
name: str,
|
|
922
|
+
factory: Callable[[], Any],
|
|
923
|
+
*,
|
|
924
|
+
lock_factory: Callable[[], threading.Lock | threading.RLock] | None = None,
|
|
925
|
+
reset: Callable[[Any], Any] | None = None,
|
|
926
|
+
create: bool = True,
|
|
927
|
+
encoder: Callable[[Any], Any] | None = None,
|
|
928
|
+
decoder: Callable[[Any], Any] | None = None,
|
|
929
|
+
) -> None:
|
|
930
|
+
"""Register ``name`` with ``factory`` and optional lifecycle hooks."""
|
|
931
|
+
|
|
932
|
+
if lock_factory is None:
|
|
933
|
+
lock_factory = threading.RLock
|
|
934
|
+
with self._registry_lock:
|
|
935
|
+
entry = self._entries.get(name)
|
|
936
|
+
if entry is None:
|
|
937
|
+
entry = _CacheEntry(
|
|
938
|
+
factory=factory,
|
|
939
|
+
lock=lock_factory(),
|
|
940
|
+
reset=reset,
|
|
941
|
+
encoder=encoder,
|
|
942
|
+
decoder=decoder,
|
|
943
|
+
)
|
|
944
|
+
self._entries[name] = entry
|
|
945
|
+
else:
|
|
946
|
+
# Update hooks when re-registering the same cache name.
|
|
947
|
+
entry.factory = factory
|
|
948
|
+
entry.reset = reset
|
|
949
|
+
entry.encoder = encoder
|
|
950
|
+
entry.decoder = decoder
|
|
951
|
+
self._ensure_metrics(name)
|
|
952
|
+
if create:
|
|
953
|
+
self.get(name)
|
|
954
|
+
|
|
955
|
+
def configure(
|
|
956
|
+
self,
|
|
957
|
+
*,
|
|
958
|
+
default_capacity: int | None | object = _MISSING,
|
|
959
|
+
overrides: Mapping[str, int | None] | None = None,
|
|
960
|
+
replace_overrides: bool = False,
|
|
961
|
+
) -> None:
|
|
962
|
+
"""Update the cache capacity policy shared by registered entries."""
|
|
963
|
+
|
|
964
|
+
with self._registry_lock:
|
|
965
|
+
if default_capacity is not self._MISSING:
|
|
966
|
+
self._default_capacity = self._normalise_capacity(
|
|
967
|
+
default_capacity if default_capacity is not None else None
|
|
968
|
+
)
|
|
969
|
+
if overrides is not None:
|
|
970
|
+
if replace_overrides:
|
|
971
|
+
self._capacity_overrides.clear()
|
|
972
|
+
for key, value in overrides.items():
|
|
973
|
+
self._capacity_overrides[key] = self._normalise_capacity(value)
|
|
974
|
+
|
|
975
|
+
def configure_from_mapping(self, config: Mapping[str, Any]) -> None:
|
|
976
|
+
"""Load configuration produced by :meth:`export_config`."""
|
|
977
|
+
|
|
978
|
+
default = config.get("default_capacity", self._MISSING)
|
|
979
|
+
overrides = config.get("overrides")
|
|
980
|
+
overrides_mapping: Mapping[str, int | None] | None
|
|
981
|
+
overrides_mapping = overrides if isinstance(overrides, Mapping) else None
|
|
982
|
+
self.configure(default_capacity=default, overrides=overrides_mapping)
|
|
983
|
+
|
|
984
|
+
def export_config(self) -> CacheCapacityConfig:
|
|
985
|
+
"""Return a copy of the current capacity configuration."""
|
|
986
|
+
|
|
987
|
+
with self._registry_lock:
|
|
988
|
+
return CacheCapacityConfig(
|
|
989
|
+
default_capacity=self._default_capacity,
|
|
990
|
+
overrides=dict(self._capacity_overrides),
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
def get_capacity(
|
|
994
|
+
self,
|
|
995
|
+
name: str,
|
|
996
|
+
*,
|
|
997
|
+
requested: int | None = None,
|
|
998
|
+
fallback: int | None = None,
|
|
999
|
+
use_default: bool = True,
|
|
1000
|
+
) -> int | None:
|
|
1001
|
+
"""Return capacity for ``name`` considering overrides and defaults."""
|
|
1002
|
+
|
|
1003
|
+
with self._registry_lock:
|
|
1004
|
+
override = self._capacity_overrides.get(name, self._MISSING)
|
|
1005
|
+
default = self._default_capacity
|
|
1006
|
+
if override is not self._MISSING:
|
|
1007
|
+
return override
|
|
1008
|
+
values: tuple[int | None, ...]
|
|
1009
|
+
if use_default:
|
|
1010
|
+
values = (requested, default, fallback)
|
|
1011
|
+
else:
|
|
1012
|
+
values = (requested, fallback)
|
|
1013
|
+
for value in values:
|
|
1014
|
+
if value is self._MISSING:
|
|
1015
|
+
continue
|
|
1016
|
+
normalised = self._normalise_capacity(value)
|
|
1017
|
+
if normalised is not None:
|
|
1018
|
+
return normalised
|
|
1019
|
+
return None
|
|
1020
|
+
|
|
1021
|
+
def has_override(self, name: str) -> bool:
|
|
1022
|
+
"""Return ``True`` if ``name`` has an explicit capacity override."""
|
|
1023
|
+
|
|
1024
|
+
with self._registry_lock:
|
|
1025
|
+
return name in self._capacity_overrides
|
|
1026
|
+
|
|
1027
|
+
def get_lock(self, name: str) -> threading.Lock | threading.RLock:
|
|
1028
|
+
"""Return the lock guarding cache ``name`` for external coordination."""
|
|
1029
|
+
|
|
1030
|
+
entry = self._entries.get(name)
|
|
1031
|
+
if entry is None:
|
|
1032
|
+
raise KeyError(name)
|
|
1033
|
+
return entry.lock
|
|
1034
|
+
|
|
1035
|
+
def names(self) -> Iterator[str]:
|
|
1036
|
+
"""Iterate over registered cache names."""
|
|
1037
|
+
|
|
1038
|
+
with self._registry_lock:
|
|
1039
|
+
return iter(tuple(self._entries))
|
|
1040
|
+
|
|
1041
|
+
def get(self, name: str, *, create: bool = True) -> Any:
|
|
1042
|
+
"""Return cache ``name`` creating it on demand when ``create`` is true."""
|
|
1043
|
+
|
|
1044
|
+
entry = self._entries.get(name)
|
|
1045
|
+
if entry is None:
|
|
1046
|
+
raise KeyError(name)
|
|
1047
|
+
with entry.lock:
|
|
1048
|
+
value = self._load_from_layers(name, entry)
|
|
1049
|
+
if create and value is None:
|
|
1050
|
+
value = entry.factory()
|
|
1051
|
+
self._persist_layers(name, entry, value)
|
|
1052
|
+
return value
|
|
1053
|
+
|
|
1054
|
+
def peek(self, name: str) -> Any:
|
|
1055
|
+
"""Return cache ``name`` without creating a missing entry."""
|
|
1056
|
+
|
|
1057
|
+
entry = self._entries.get(name)
|
|
1058
|
+
if entry is None:
|
|
1059
|
+
raise KeyError(name)
|
|
1060
|
+
with entry.lock:
|
|
1061
|
+
return self._load_from_layers(name, entry)
|
|
1062
|
+
|
|
1063
|
+
def store(self, name: str, value: Any) -> None:
|
|
1064
|
+
"""Replace the stored value for cache ``name`` with ``value``."""
|
|
1065
|
+
|
|
1066
|
+
entry = self._entries.get(name)
|
|
1067
|
+
if entry is None:
|
|
1068
|
+
raise KeyError(name)
|
|
1069
|
+
with entry.lock:
|
|
1070
|
+
self._persist_layers(name, entry, value)
|
|
1071
|
+
|
|
1072
|
+
def update(
|
|
1073
|
+
self,
|
|
1074
|
+
name: str,
|
|
1075
|
+
updater: Callable[[Any], Any],
|
|
1076
|
+
*,
|
|
1077
|
+
create: bool = True,
|
|
1078
|
+
) -> Any:
|
|
1079
|
+
"""Apply ``updater`` to cache ``name`` storing the resulting value."""
|
|
1080
|
+
|
|
1081
|
+
entry = self._entries.get(name)
|
|
1082
|
+
if entry is None:
|
|
1083
|
+
raise KeyError(name)
|
|
1084
|
+
with entry.lock:
|
|
1085
|
+
current = self._load_from_layers(name, entry)
|
|
1086
|
+
if create and current is None:
|
|
1087
|
+
current = entry.factory()
|
|
1088
|
+
new_value = updater(current)
|
|
1089
|
+
self._persist_layers(name, entry, new_value)
|
|
1090
|
+
return new_value
|
|
1091
|
+
|
|
1092
|
+
def clear(self, name: str | None = None) -> None:
|
|
1093
|
+
"""Reset caches either selectively or for every registered name."""
|
|
1094
|
+
|
|
1095
|
+
if name is not None:
|
|
1096
|
+
names = (name,)
|
|
1097
|
+
else:
|
|
1098
|
+
with self._registry_lock:
|
|
1099
|
+
names = tuple(self._entries)
|
|
1100
|
+
for cache_name in names:
|
|
1101
|
+
entry = self._entries.get(cache_name)
|
|
1102
|
+
if entry is None:
|
|
1103
|
+
continue
|
|
1104
|
+
with entry.lock:
|
|
1105
|
+
current = self._load_from_layers(cache_name, entry)
|
|
1106
|
+
new_value = None
|
|
1107
|
+
if entry.reset is not None:
|
|
1108
|
+
try:
|
|
1109
|
+
new_value = entry.reset(current)
|
|
1110
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1111
|
+
_logger.exception("cache reset failed for %s", cache_name)
|
|
1112
|
+
if new_value is None:
|
|
1113
|
+
try:
|
|
1114
|
+
new_value = entry.factory()
|
|
1115
|
+
except Exception:
|
|
1116
|
+
self._delete_from_layers(cache_name)
|
|
1117
|
+
continue
|
|
1118
|
+
self._persist_layers(cache_name, entry, new_value)
|
|
1119
|
+
|
|
1120
|
+
# ------------------------------------------------------------------
|
|
1121
|
+
# Layer orchestration helpers
|
|
1122
|
+
|
|
1123
|
+
def _encode_value(self, entry: _CacheEntry, value: Any) -> Any:
|
|
1124
|
+
encoder = entry.encoder
|
|
1125
|
+
if encoder is None:
|
|
1126
|
+
return value
|
|
1127
|
+
return encoder(value)
|
|
1128
|
+
|
|
1129
|
+
def _decode_value(self, entry: _CacheEntry, payload: Any) -> Any:
|
|
1130
|
+
decoder = entry.decoder
|
|
1131
|
+
if decoder is None:
|
|
1132
|
+
return payload
|
|
1133
|
+
return decoder(payload)
|
|
1134
|
+
|
|
1135
|
+
def _store_layer(
|
|
1136
|
+
self, name: str, entry: _CacheEntry, value: Any, *, layer_index: int
|
|
1137
|
+
) -> None:
|
|
1138
|
+
layer = self._layers[layer_index]
|
|
1139
|
+
if layer_index == 0:
|
|
1140
|
+
payload = value
|
|
1141
|
+
else:
|
|
1142
|
+
try:
|
|
1143
|
+
payload = self._encode_value(entry, value)
|
|
1144
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1145
|
+
_logger.exception("cache encoding failed for %s", name)
|
|
1146
|
+
return
|
|
1147
|
+
try:
|
|
1148
|
+
layer.store(name, payload)
|
|
1149
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1150
|
+
_logger.exception(
|
|
1151
|
+
"cache layer store failed for %s on %s", name, layer.__class__.__name__
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
def _persist_layers(self, name: str, entry: _CacheEntry, value: Any) -> None:
|
|
1155
|
+
for index in range(len(self._layers)):
|
|
1156
|
+
self._store_layer(name, entry, value, layer_index=index)
|
|
1157
|
+
|
|
1158
|
+
def _delete_from_layers(self, name: str) -> None:
|
|
1159
|
+
for layer in self._layers:
|
|
1160
|
+
try:
|
|
1161
|
+
layer.delete(name)
|
|
1162
|
+
except KeyError:
|
|
1163
|
+
continue
|
|
1164
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1165
|
+
_logger.exception(
|
|
1166
|
+
"cache layer delete failed for %s on %s",
|
|
1167
|
+
name,
|
|
1168
|
+
layer.__class__.__name__,
|
|
1169
|
+
)
|
|
1170
|
+
|
|
1171
|
+
def _load_from_layers(self, name: str, entry: _CacheEntry) -> Any:
|
|
1172
|
+
# Primary in-memory layer first for fast-path lookups.
|
|
1173
|
+
try:
|
|
1174
|
+
value = self._layers[0].load(name)
|
|
1175
|
+
except KeyError:
|
|
1176
|
+
value = None
|
|
1177
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1178
|
+
_logger.exception(
|
|
1179
|
+
"cache layer load failed for %s on %s",
|
|
1180
|
+
name,
|
|
1181
|
+
self._layers[0].__class__.__name__,
|
|
1182
|
+
)
|
|
1183
|
+
value = None
|
|
1184
|
+
if value is not None:
|
|
1185
|
+
return value
|
|
1186
|
+
|
|
1187
|
+
# Fall back to slower layers and hydrate preceding caches on success.
|
|
1188
|
+
for index in range(1, len(self._layers)):
|
|
1189
|
+
layer = self._layers[index]
|
|
1190
|
+
try:
|
|
1191
|
+
payload = layer.load(name)
|
|
1192
|
+
except KeyError:
|
|
1193
|
+
continue
|
|
1194
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1195
|
+
_logger.exception(
|
|
1196
|
+
"cache layer load failed for %s on %s",
|
|
1197
|
+
name,
|
|
1198
|
+
layer.__class__.__name__,
|
|
1199
|
+
)
|
|
1200
|
+
continue
|
|
1201
|
+
try:
|
|
1202
|
+
value = self._decode_value(entry, payload)
|
|
1203
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1204
|
+
_logger.exception("cache decoding failed for %s", name)
|
|
1205
|
+
continue
|
|
1206
|
+
if value is None:
|
|
1207
|
+
continue
|
|
1208
|
+
for prev_index in range(index):
|
|
1209
|
+
self._store_layer(name, entry, value, layer_index=prev_index)
|
|
1210
|
+
return value
|
|
1211
|
+
return None
|
|
1212
|
+
|
|
1213
|
+
# ------------------------------------------------------------------
|
|
1214
|
+
# Metrics helpers
|
|
1215
|
+
|
|
1216
|
+
def _ensure_metrics(self, name: str) -> _CacheMetrics:
|
|
1217
|
+
metrics = self._metrics.get(name)
|
|
1218
|
+
if metrics is None:
|
|
1219
|
+
with self._registry_lock:
|
|
1220
|
+
metrics = self._metrics.get(name)
|
|
1221
|
+
if metrics is None:
|
|
1222
|
+
metrics = _CacheMetrics()
|
|
1223
|
+
self._metrics[name] = metrics
|
|
1224
|
+
return metrics
|
|
1225
|
+
|
|
1226
|
+
def increment_hit(
|
|
1227
|
+
self,
|
|
1228
|
+
name: str,
|
|
1229
|
+
*,
|
|
1230
|
+
amount: int = 1,
|
|
1231
|
+
duration: float | None = None,
|
|
1232
|
+
) -> None:
|
|
1233
|
+
"""Increase cache hit counters for ``name`` (optionally logging latency)."""
|
|
1234
|
+
|
|
1235
|
+
metrics = self._ensure_metrics(name)
|
|
1236
|
+
with metrics.lock:
|
|
1237
|
+
metrics.hits += int(amount)
|
|
1238
|
+
if duration is not None:
|
|
1239
|
+
metrics.total_time += float(duration)
|
|
1240
|
+
metrics.timings += 1
|
|
1241
|
+
|
|
1242
|
+
def increment_miss(
|
|
1243
|
+
self,
|
|
1244
|
+
name: str,
|
|
1245
|
+
*,
|
|
1246
|
+
amount: int = 1,
|
|
1247
|
+
duration: float | None = None,
|
|
1248
|
+
) -> None:
|
|
1249
|
+
"""Increase cache miss counters for ``name`` (optionally logging latency)."""
|
|
1250
|
+
|
|
1251
|
+
metrics = self._ensure_metrics(name)
|
|
1252
|
+
with metrics.lock:
|
|
1253
|
+
metrics.misses += int(amount)
|
|
1254
|
+
if duration is not None:
|
|
1255
|
+
metrics.total_time += float(duration)
|
|
1256
|
+
metrics.timings += 1
|
|
1257
|
+
|
|
1258
|
+
def increment_eviction(self, name: str, *, amount: int = 1) -> None:
|
|
1259
|
+
"""Increase eviction count for cache ``name``."""
|
|
1260
|
+
|
|
1261
|
+
metrics = self._ensure_metrics(name)
|
|
1262
|
+
with metrics.lock:
|
|
1263
|
+
metrics.evictions += int(amount)
|
|
1264
|
+
|
|
1265
|
+
def record_timing(self, name: str, duration: float) -> None:
|
|
1266
|
+
"""Accumulate ``duration`` into latency telemetry for ``name``."""
|
|
1267
|
+
|
|
1268
|
+
metrics = self._ensure_metrics(name)
|
|
1269
|
+
with metrics.lock:
|
|
1270
|
+
metrics.total_time += float(duration)
|
|
1271
|
+
metrics.timings += 1
|
|
1272
|
+
|
|
1273
|
+
@contextmanager
|
|
1274
|
+
def timer(self, name: str) -> TimingContext:
|
|
1275
|
+
"""Context manager recording execution time for ``name``."""
|
|
1276
|
+
|
|
1277
|
+
start = perf_counter()
|
|
1278
|
+
try:
|
|
1279
|
+
yield
|
|
1280
|
+
finally:
|
|
1281
|
+
self.record_timing(name, perf_counter() - start)
|
|
1282
|
+
|
|
1283
|
+
def get_metrics(self, name: str) -> CacheStatistics:
|
|
1284
|
+
"""Return a snapshot of telemetry collected for cache ``name``."""
|
|
1285
|
+
|
|
1286
|
+
metrics = self._metrics.get(name)
|
|
1287
|
+
if metrics is None:
|
|
1288
|
+
return CacheStatistics()
|
|
1289
|
+
with metrics.lock:
|
|
1290
|
+
return metrics.snapshot()
|
|
1291
|
+
|
|
1292
|
+
def iter_metrics(self) -> Iterator[tuple[str, CacheStatistics]]:
|
|
1293
|
+
"""Yield ``(name, stats)`` pairs for every cache with telemetry."""
|
|
1294
|
+
|
|
1295
|
+
with self._registry_lock:
|
|
1296
|
+
items = tuple(self._metrics.items())
|
|
1297
|
+
for name, metrics in items:
|
|
1298
|
+
with metrics.lock:
|
|
1299
|
+
yield name, metrics.snapshot()
|
|
1300
|
+
|
|
1301
|
+
def aggregate_metrics(self) -> CacheStatistics:
|
|
1302
|
+
"""Return aggregated telemetry statistics across all caches."""
|
|
1303
|
+
|
|
1304
|
+
aggregate = CacheStatistics()
|
|
1305
|
+
for _, stats in self.iter_metrics():
|
|
1306
|
+
aggregate = aggregate.merge(stats)
|
|
1307
|
+
return aggregate
|
|
1308
|
+
|
|
1309
|
+
def register_metrics_publisher(
|
|
1310
|
+
self, publisher: Callable[[str, CacheStatistics], None]
|
|
1311
|
+
) -> None:
|
|
1312
|
+
"""Register ``publisher`` to receive metrics snapshots on demand."""
|
|
1313
|
+
|
|
1314
|
+
with self._registry_lock:
|
|
1315
|
+
self._metrics_publishers.append(publisher)
|
|
1316
|
+
|
|
1317
|
+
def publish_metrics(
|
|
1318
|
+
self,
|
|
1319
|
+
*,
|
|
1320
|
+
publisher: Callable[[str, CacheStatistics], None] | None = None,
|
|
1321
|
+
) -> None:
|
|
1322
|
+
"""Send cached telemetry to ``publisher`` or all registered publishers."""
|
|
1323
|
+
|
|
1324
|
+
if publisher is None:
|
|
1325
|
+
with self._registry_lock:
|
|
1326
|
+
publishers = tuple(self._metrics_publishers)
|
|
1327
|
+
else:
|
|
1328
|
+
publishers = (publisher,)
|
|
1329
|
+
if not publishers:
|
|
1330
|
+
return
|
|
1331
|
+
snapshot = tuple(self.iter_metrics())
|
|
1332
|
+
for emit in publishers:
|
|
1333
|
+
for name, stats in snapshot:
|
|
1334
|
+
try:
|
|
1335
|
+
emit(name, stats)
|
|
1336
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1337
|
+
_logger.exception("Cache metrics publisher failed for %s", name)
|
|
1338
|
+
|
|
1339
|
+
def log_metrics(self, logger: logging.Logger, *, level: int = logging.INFO) -> None:
|
|
1340
|
+
"""Emit cache metrics using ``logger`` for telemetry hooks."""
|
|
1341
|
+
|
|
1342
|
+
for name, stats in self.iter_metrics():
|
|
1343
|
+
logger.log(
|
|
1344
|
+
level,
|
|
1345
|
+
"cache=%s hits=%d misses=%d evictions=%d timings=%d total_time=%.6f",
|
|
1346
|
+
name,
|
|
1347
|
+
stats.hits,
|
|
1348
|
+
stats.misses,
|
|
1349
|
+
stats.evictions,
|
|
1350
|
+
stats.timings,
|
|
1351
|
+
stats.total_time,
|
|
1352
|
+
)
|
|
1353
|
+
|
|
1354
|
+
|
|
1355
|
+
try:
|
|
1356
|
+
from .init import get_logger as _get_logger
|
|
1357
|
+
except ImportError: # pragma: no cover - circular bootstrap fallback
|
|
1358
|
+
|
|
1359
|
+
def _get_logger(name: str) -> logging.Logger:
|
|
1360
|
+
return logging.getLogger(name)
|
|
1361
|
+
|
|
1362
|
+
|
|
1363
|
+
_logger = _get_logger(__name__)
|
|
1364
|
+
get_logger = _get_logger
|
|
1365
|
+
|
|
1366
|
+
|
|
1367
|
+
def _normalise_callbacks(
|
|
1368
|
+
callbacks: Iterable[Callable[[K, V], None]] | Callable[[K, V], None] | None,
|
|
1369
|
+
) -> tuple[Callable[[K, V], None], ...]:
|
|
1370
|
+
if callbacks is None:
|
|
1371
|
+
return ()
|
|
1372
|
+
if callable(callbacks):
|
|
1373
|
+
return (callbacks,)
|
|
1374
|
+
return tuple(callbacks)
|
|
1375
|
+
|
|
1376
|
+
|
|
1377
|
+
def prune_lock_mapping(
|
|
1378
|
+
cache: Mapping[K, Any] | MutableMapping[K, Any] | None,
|
|
1379
|
+
locks: MutableMapping[K, Any] | None,
|
|
1380
|
+
) -> None:
|
|
1381
|
+
"""Drop lock entries not present in ``cache``."""
|
|
1382
|
+
|
|
1383
|
+
if locks is None:
|
|
1384
|
+
return
|
|
1385
|
+
if cache is None:
|
|
1386
|
+
cache_keys: set[K] = set()
|
|
1387
|
+
else:
|
|
1388
|
+
cache_keys = set(cache.keys())
|
|
1389
|
+
for key in list(locks.keys()):
|
|
1390
|
+
if key not in cache_keys:
|
|
1391
|
+
locks.pop(key, None)
|
|
1392
|
+
|
|
1393
|
+
|
|
1394
|
+
class InstrumentedLRUCache(MutableMapping[K, V], Generic[K, V]):
|
|
1395
|
+
"""LRU cache wrapper that synchronises telemetry, callbacks and locks.
|
|
1396
|
+
|
|
1397
|
+
The wrapper owns an internal :class:`cachetools.LRUCache` instance and
|
|
1398
|
+
forwards all read operations to it. Mutating operations are instrumented to
|
|
1399
|
+
update :class:`CacheManager` metrics, execute registered callbacks and keep
|
|
1400
|
+
an optional lock mapping aligned with the stored keys. Telemetry callbacks
|
|
1401
|
+
always execute before eviction callbacks, preserving the registration order
|
|
1402
|
+
for deterministic side effects.
|
|
1403
|
+
|
|
1404
|
+
Callbacks can be extended or replaced after construction via
|
|
1405
|
+
:meth:`set_telemetry_callbacks` and :meth:`set_eviction_callbacks`. When
|
|
1406
|
+
``append`` is ``False`` (default) the provided callbacks replace the
|
|
1407
|
+
existing sequence; otherwise they are appended at the end while keeping the
|
|
1408
|
+
previous ordering intact.
|
|
1409
|
+
"""
|
|
1410
|
+
|
|
1411
|
+
_MISSING = object()
|
|
1412
|
+
|
|
1413
|
+
def __init__(
|
|
1414
|
+
self,
|
|
1415
|
+
maxsize: int,
|
|
1416
|
+
*,
|
|
1417
|
+
manager: CacheManager | None = None,
|
|
1418
|
+
metrics_key: str | None = None,
|
|
1419
|
+
telemetry_callbacks: (
|
|
1420
|
+
Iterable[Callable[[K, V], None]] | Callable[[K, V], None] | None
|
|
1421
|
+
) = None,
|
|
1422
|
+
eviction_callbacks: (
|
|
1423
|
+
Iterable[Callable[[K, V], None]] | Callable[[K, V], None] | None
|
|
1424
|
+
) = None,
|
|
1425
|
+
locks: MutableMapping[K, Any] | None = None,
|
|
1426
|
+
getsizeof: Callable[[V], int] | None = None,
|
|
1427
|
+
count_overwrite_hit: bool = True,
|
|
1428
|
+
) -> None:
|
|
1429
|
+
self._cache: LRUCache[K, V] = LRUCache(maxsize, getsizeof=getsizeof)
|
|
1430
|
+
original_popitem = self._cache.popitem
|
|
1431
|
+
|
|
1432
|
+
def _instrumented_popitem() -> tuple[K, V]:
|
|
1433
|
+
key, value = original_popitem()
|
|
1434
|
+
self._dispatch_removal(key, value)
|
|
1435
|
+
return key, value
|
|
1436
|
+
|
|
1437
|
+
self._cache.popitem = _instrumented_popitem # type: ignore[assignment]
|
|
1438
|
+
self._manager = manager
|
|
1439
|
+
self._metrics_key = metrics_key
|
|
1440
|
+
self._locks = locks
|
|
1441
|
+
self._count_overwrite_hit = bool(count_overwrite_hit)
|
|
1442
|
+
self._telemetry_callbacks: list[Callable[[K, V], None]]
|
|
1443
|
+
self._telemetry_callbacks = list(_normalise_callbacks(telemetry_callbacks))
|
|
1444
|
+
self._eviction_callbacks: list[Callable[[K, V], None]]
|
|
1445
|
+
self._eviction_callbacks = list(_normalise_callbacks(eviction_callbacks))
|
|
1446
|
+
|
|
1447
|
+
# ------------------------------------------------------------------
|
|
1448
|
+
# Callback registration helpers
|
|
1449
|
+
|
|
1450
|
+
@property
|
|
1451
|
+
def telemetry_callbacks(self) -> tuple[Callable[[K, V], None], ...]:
|
|
1452
|
+
"""Return currently registered telemetry callbacks."""
|
|
1453
|
+
|
|
1454
|
+
return tuple(self._telemetry_callbacks)
|
|
1455
|
+
|
|
1456
|
+
@property
|
|
1457
|
+
def eviction_callbacks(self) -> tuple[Callable[[K, V], None], ...]:
|
|
1458
|
+
"""Return currently registered eviction callbacks."""
|
|
1459
|
+
|
|
1460
|
+
return tuple(self._eviction_callbacks)
|
|
1461
|
+
|
|
1462
|
+
def set_telemetry_callbacks(
|
|
1463
|
+
self,
|
|
1464
|
+
callbacks: Iterable[Callable[[K, V], None]] | Callable[[K, V], None] | None,
|
|
1465
|
+
*,
|
|
1466
|
+
append: bool = False,
|
|
1467
|
+
) -> None:
|
|
1468
|
+
"""Update telemetry callbacks executed on removals.
|
|
1469
|
+
|
|
1470
|
+
When ``append`` is ``True`` the provided callbacks are added to the end
|
|
1471
|
+
of the execution chain while preserving relative order. Otherwise, the
|
|
1472
|
+
previous callbacks are replaced.
|
|
1473
|
+
"""
|
|
1474
|
+
|
|
1475
|
+
new_callbacks = list(_normalise_callbacks(callbacks))
|
|
1476
|
+
if append:
|
|
1477
|
+
self._telemetry_callbacks.extend(new_callbacks)
|
|
1478
|
+
else:
|
|
1479
|
+
self._telemetry_callbacks = new_callbacks
|
|
1480
|
+
|
|
1481
|
+
def set_eviction_callbacks(
|
|
1482
|
+
self,
|
|
1483
|
+
callbacks: Iterable[Callable[[K, V], None]] | Callable[[K, V], None] | None,
|
|
1484
|
+
*,
|
|
1485
|
+
append: bool = False,
|
|
1486
|
+
) -> None:
|
|
1487
|
+
"""Update eviction callbacks executed on removals.
|
|
1488
|
+
|
|
1489
|
+
Behaviour matches :meth:`set_telemetry_callbacks`.
|
|
1490
|
+
"""
|
|
1491
|
+
|
|
1492
|
+
new_callbacks = list(_normalise_callbacks(callbacks))
|
|
1493
|
+
if append:
|
|
1494
|
+
self._eviction_callbacks.extend(new_callbacks)
|
|
1495
|
+
else:
|
|
1496
|
+
self._eviction_callbacks = new_callbacks
|
|
1497
|
+
|
|
1498
|
+
# ------------------------------------------------------------------
|
|
1499
|
+
# MutableMapping interface
|
|
1500
|
+
|
|
1501
|
+
def __getitem__(self, key: K) -> V:
|
|
1502
|
+
"""Return the cached value for ``key``."""
|
|
1503
|
+
|
|
1504
|
+
return self._cache[key]
|
|
1505
|
+
|
|
1506
|
+
def __setitem__(self, key: K, value: V) -> None:
|
|
1507
|
+
"""Store ``value`` under ``key`` updating telemetry accordingly."""
|
|
1508
|
+
|
|
1509
|
+
exists = key in self._cache
|
|
1510
|
+
self._cache[key] = value
|
|
1511
|
+
if exists:
|
|
1512
|
+
if self._count_overwrite_hit:
|
|
1513
|
+
self._record_hit(1)
|
|
1514
|
+
else:
|
|
1515
|
+
self._record_miss(1)
|
|
1516
|
+
|
|
1517
|
+
def __delitem__(self, key: K) -> None:
|
|
1518
|
+
"""Remove ``key`` from the cache and dispatch removal callbacks."""
|
|
1519
|
+
|
|
1520
|
+
try:
|
|
1521
|
+
value = self._cache[key]
|
|
1522
|
+
except KeyError:
|
|
1523
|
+
self._record_miss(1)
|
|
1524
|
+
raise
|
|
1525
|
+
del self._cache[key]
|
|
1526
|
+
self._dispatch_removal(key, value, hits=1)
|
|
1527
|
+
|
|
1528
|
+
def __iter__(self) -> Iterator[K]:
|
|
1529
|
+
"""Iterate over cached keys in eviction order."""
|
|
1530
|
+
|
|
1531
|
+
return iter(self._cache)
|
|
1532
|
+
|
|
1533
|
+
def __len__(self) -> int:
|
|
1534
|
+
"""Return the number of cached entries."""
|
|
1535
|
+
|
|
1536
|
+
return len(self._cache)
|
|
1537
|
+
|
|
1538
|
+
def __contains__(self, key: object) -> bool:
|
|
1539
|
+
"""Return ``True`` when ``key`` is stored in the cache."""
|
|
1540
|
+
|
|
1541
|
+
return key in self._cache
|
|
1542
|
+
|
|
1543
|
+
def __repr__(self) -> str: # pragma: no cover - debugging helper
|
|
1544
|
+
"""Return a debug representation including the underlying cache."""
|
|
1545
|
+
|
|
1546
|
+
return f"{self.__class__.__name__}({self._cache!r})"
|
|
1547
|
+
|
|
1548
|
+
# ------------------------------------------------------------------
|
|
1549
|
+
# Cache helpers
|
|
1550
|
+
|
|
1551
|
+
@property
|
|
1552
|
+
def maxsize(self) -> int:
|
|
1553
|
+
"""Return the configured maximum cache size."""
|
|
1554
|
+
|
|
1555
|
+
return self._cache.maxsize
|
|
1556
|
+
|
|
1557
|
+
@property
|
|
1558
|
+
def currsize(self) -> int:
|
|
1559
|
+
"""Return the current weighted size reported by :mod:`cachetools`."""
|
|
1560
|
+
|
|
1561
|
+
return self._cache.currsize
|
|
1562
|
+
|
|
1563
|
+
def get(self, key: K, default: V | None = None) -> V | None:
|
|
1564
|
+
"""Return ``key`` if present, otherwise ``default``."""
|
|
1565
|
+
|
|
1566
|
+
return self._cache.get(key, default)
|
|
1567
|
+
|
|
1568
|
+
def pop(self, key: K, default: Any = _MISSING) -> V:
|
|
1569
|
+
"""Remove ``key`` returning its value or ``default`` when provided."""
|
|
1570
|
+
|
|
1571
|
+
try:
|
|
1572
|
+
value = self._cache[key]
|
|
1573
|
+
except KeyError:
|
|
1574
|
+
self._record_miss(1)
|
|
1575
|
+
if default is self._MISSING:
|
|
1576
|
+
raise
|
|
1577
|
+
return cast(V, default)
|
|
1578
|
+
del self._cache[key]
|
|
1579
|
+
self._dispatch_removal(key, value, hits=1)
|
|
1580
|
+
return value
|
|
1581
|
+
|
|
1582
|
+
def popitem(self) -> tuple[K, V]:
|
|
1583
|
+
"""Remove and return the LRU entry ensuring instrumentation fires."""
|
|
1584
|
+
|
|
1585
|
+
return self._cache.popitem()
|
|
1586
|
+
|
|
1587
|
+
def clear(self) -> None: # type: ignore[override]
|
|
1588
|
+
"""Evict every entry while keeping telemetry and locks consistent."""
|
|
1589
|
+
|
|
1590
|
+
while True:
|
|
1591
|
+
try:
|
|
1592
|
+
self.popitem()
|
|
1593
|
+
except KeyError:
|
|
1594
|
+
break
|
|
1595
|
+
if self._locks is not None:
|
|
1596
|
+
try:
|
|
1597
|
+
self._locks.clear()
|
|
1598
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1599
|
+
_logger.exception("lock cleanup failed during cache clear")
|
|
1600
|
+
|
|
1601
|
+
# ------------------------------------------------------------------
|
|
1602
|
+
# Internal helpers
|
|
1603
|
+
|
|
1604
|
+
def _record_hit(self, amount: int) -> None:
|
|
1605
|
+
if amount and self._manager is not None and self._metrics_key is not None:
|
|
1606
|
+
self._manager.increment_hit(self._metrics_key, amount=amount)
|
|
1607
|
+
|
|
1608
|
+
def _record_miss(self, amount: int) -> None:
|
|
1609
|
+
if amount and self._manager is not None and self._metrics_key is not None:
|
|
1610
|
+
self._manager.increment_miss(self._metrics_key, amount=amount)
|
|
1611
|
+
|
|
1612
|
+
def _record_eviction(self, amount: int) -> None:
|
|
1613
|
+
if amount and self._manager is not None and self._metrics_key is not None:
|
|
1614
|
+
self._manager.increment_eviction(self._metrics_key, amount=amount)
|
|
1615
|
+
|
|
1616
|
+
def _dispatch_removal(
|
|
1617
|
+
self,
|
|
1618
|
+
key: K,
|
|
1619
|
+
value: V,
|
|
1620
|
+
*,
|
|
1621
|
+
hits: int = 0,
|
|
1622
|
+
misses: int = 0,
|
|
1623
|
+
eviction_amount: int = 1,
|
|
1624
|
+
purge_lock: bool = True,
|
|
1625
|
+
) -> None:
|
|
1626
|
+
if hits:
|
|
1627
|
+
self._record_hit(hits)
|
|
1628
|
+
if misses:
|
|
1629
|
+
self._record_miss(misses)
|
|
1630
|
+
if eviction_amount:
|
|
1631
|
+
self._record_eviction(eviction_amount)
|
|
1632
|
+
self._emit_callbacks(self._telemetry_callbacks, key, value, "telemetry")
|
|
1633
|
+
self._emit_callbacks(self._eviction_callbacks, key, value, "eviction")
|
|
1634
|
+
if purge_lock:
|
|
1635
|
+
self._purge_lock(key)
|
|
1636
|
+
|
|
1637
|
+
def _emit_callbacks(
|
|
1638
|
+
self,
|
|
1639
|
+
callbacks: Iterable[Callable[[K, V], None]],
|
|
1640
|
+
key: K,
|
|
1641
|
+
value: V,
|
|
1642
|
+
kind: str,
|
|
1643
|
+
) -> None:
|
|
1644
|
+
for callback in callbacks:
|
|
1645
|
+
try:
|
|
1646
|
+
callback(key, value)
|
|
1647
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1648
|
+
_logger.exception("%s callback failed for %r", kind, key)
|
|
1649
|
+
|
|
1650
|
+
def _purge_lock(self, key: K) -> None:
|
|
1651
|
+
if self._locks is None:
|
|
1652
|
+
return
|
|
1653
|
+
try:
|
|
1654
|
+
self._locks.pop(key, None)
|
|
1655
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1656
|
+
_logger.exception("lock cleanup failed for %r", key)
|
|
1657
|
+
|
|
1658
|
+
|
|
1659
|
+
class ManagedLRUCache(LRUCache[K, V]):
|
|
1660
|
+
"""LRU cache wrapper with telemetry hooks and lock synchronisation."""
|
|
1661
|
+
|
|
1662
|
+
def __init__(
|
|
1663
|
+
self,
|
|
1664
|
+
maxsize: int,
|
|
1665
|
+
*,
|
|
1666
|
+
manager: CacheManager | None = None,
|
|
1667
|
+
metrics_key: str | None = None,
|
|
1668
|
+
eviction_callbacks: (
|
|
1669
|
+
Iterable[Callable[[K, V], None]] | Callable[[K, V], None] | None
|
|
1670
|
+
) = None,
|
|
1671
|
+
telemetry_callbacks: (
|
|
1672
|
+
Iterable[Callable[[K, V], None]] | Callable[[K, V], None] | None
|
|
1673
|
+
) = None,
|
|
1674
|
+
locks: MutableMapping[K, Any] | None = None,
|
|
1675
|
+
) -> None:
|
|
1676
|
+
super().__init__(maxsize)
|
|
1677
|
+
self._manager = manager
|
|
1678
|
+
self._metrics_key = metrics_key
|
|
1679
|
+
self._locks = locks
|
|
1680
|
+
self._eviction_callbacks = _normalise_callbacks(eviction_callbacks)
|
|
1681
|
+
self._telemetry_callbacks = _normalise_callbacks(telemetry_callbacks)
|
|
1682
|
+
|
|
1683
|
+
def popitem(self) -> tuple[K, V]: # type: ignore[override]
|
|
1684
|
+
"""Evict the LRU entry while updating telemetry and lock state."""
|
|
1685
|
+
|
|
1686
|
+
key, value = super().popitem()
|
|
1687
|
+
if self._locks is not None:
|
|
1688
|
+
try:
|
|
1689
|
+
self._locks.pop(key, None)
|
|
1690
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1691
|
+
_logger.exception("lock cleanup failed for %r", key)
|
|
1692
|
+
if self._manager is not None and self._metrics_key is not None:
|
|
1693
|
+
self._manager.increment_eviction(self._metrics_key)
|
|
1694
|
+
for callback in self._telemetry_callbacks:
|
|
1695
|
+
try:
|
|
1696
|
+
callback(key, value)
|
|
1697
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1698
|
+
_logger.exception("telemetry callback failed for %r", key)
|
|
1699
|
+
for callback in self._eviction_callbacks:
|
|
1700
|
+
try:
|
|
1701
|
+
callback(key, value)
|
|
1702
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1703
|
+
_logger.exception("eviction callback failed for %r", key)
|
|
1704
|
+
return key, value
|
|
1705
|
+
|
|
1706
|
+
|
|
1707
|
+
@dataclass
|
|
1708
|
+
class _SeedCacheState:
|
|
1709
|
+
"""Container tracking the state for :class:`_SeedHashCache`."""
|
|
1710
|
+
|
|
1711
|
+
cache: InstrumentedLRUCache[tuple[int, int], int] | None
|
|
1712
|
+
maxsize: int
|
|
1713
|
+
|
|
1714
|
+
|
|
1715
|
+
@dataclass
|
|
1716
|
+
class _CounterState(Generic[K]):
|
|
1717
|
+
"""State bundle used by :class:`ScopedCounterCache`."""
|
|
1718
|
+
|
|
1719
|
+
cache: InstrumentedLRUCache[K, int]
|
|
1720
|
+
locks: dict[K, threading.RLock]
|
|
1721
|
+
max_entries: int
|
|
1722
|
+
|
|
1723
|
+
|
|
1724
|
+
# Key used to store the node set checksum in a graph's ``graph`` attribute.
|
|
1725
|
+
NODE_SET_CHECKSUM_KEY = "_node_set_checksum_cache"
|
|
1726
|
+
|
|
1727
|
+
logger = _logger
|
|
1728
|
+
|
|
1729
|
+
|
|
1730
|
+
# Helper to avoid importing ``tnfr.utils.init`` at module import time and keep
|
|
1731
|
+
# circular dependencies at bay while still reusing the canonical numpy loader.
|
|
1732
|
+
def _require_numpy():
|
|
1733
|
+
from .init import get_numpy
|
|
1734
|
+
|
|
1735
|
+
return get_numpy()
|
|
1736
|
+
|
|
1737
|
+
|
|
1738
|
+
# Graph key storing per-graph layer configuration overrides.
|
|
1739
|
+
_GRAPH_CACHE_LAYERS_KEY = "_tnfr_cache_layers"
|
|
1740
|
+
|
|
1741
|
+
# Process-wide configuration for shared cache layers (Shelve/Redis).
|
|
1742
|
+
_GLOBAL_CACHE_LAYER_CONFIG: dict[str, dict[str, Any]] = {}
|
|
1743
|
+
_GLOBAL_CACHE_LOCK = threading.RLock()
|
|
1744
|
+
_GLOBAL_CACHE_MANAGER: CacheManager | None = None
|
|
1745
|
+
|
|
1746
|
+
# Keys of cache entries dependent on the edge version. Any change to the edge
|
|
1747
|
+
# set requires these to be dropped to avoid stale data.
|
|
1748
|
+
EDGE_VERSION_CACHE_KEYS = ("_trig_version",)
|
|
1749
|
+
|
|
1750
|
+
|
|
1751
|
+
def get_graph_version(graph: Any, key: str, default: int = 0) -> int:
|
|
1752
|
+
"""Return integer version stored in ``graph`` under ``key``."""
|
|
1753
|
+
|
|
1754
|
+
return int(graph.get(key, default))
|
|
1755
|
+
|
|
1756
|
+
|
|
1757
|
+
def increment_graph_version(graph: Any, key: str) -> int:
|
|
1758
|
+
"""Increment and store a version counter in ``graph`` under ``key``."""
|
|
1759
|
+
|
|
1760
|
+
version = get_graph_version(graph, key) + 1
|
|
1761
|
+
graph[key] = version
|
|
1762
|
+
return version
|
|
1763
|
+
|
|
1764
|
+
|
|
1765
|
+
def stable_json(obj: Any) -> str:
|
|
1766
|
+
"""Return a JSON string with deterministic ordering for ``obj``."""
|
|
1767
|
+
|
|
1768
|
+
from .io import json_dumps
|
|
1769
|
+
|
|
1770
|
+
return json_dumps(
|
|
1771
|
+
obj,
|
|
1772
|
+
sort_keys=True,
|
|
1773
|
+
ensure_ascii=False,
|
|
1774
|
+
to_bytes=False,
|
|
1775
|
+
)
|
|
1776
|
+
|
|
1777
|
+
|
|
1778
|
+
@lru_cache(maxsize=1024)
|
|
1779
|
+
def _node_repr_digest(obj: Any) -> tuple[str, bytes]:
|
|
1780
|
+
"""Return cached stable representation and digest for ``obj``."""
|
|
1781
|
+
|
|
1782
|
+
try:
|
|
1783
|
+
repr_ = stable_json(obj)
|
|
1784
|
+
except TypeError:
|
|
1785
|
+
repr_ = repr(obj)
|
|
1786
|
+
digest = hashlib.blake2b(repr_.encode("utf-8"), digest_size=16).digest()
|
|
1787
|
+
return repr_, digest
|
|
1788
|
+
|
|
1789
|
+
|
|
1790
|
+
def clear_node_repr_cache() -> None:
|
|
1791
|
+
"""Clear cached node representations used for checksums."""
|
|
1792
|
+
|
|
1793
|
+
_node_repr_digest.cache_clear()
|
|
1794
|
+
|
|
1795
|
+
|
|
1796
|
+
def configure_global_cache_layers(
|
|
1797
|
+
*,
|
|
1798
|
+
shelve: Mapping[str, Any] | None = None,
|
|
1799
|
+
redis: Mapping[str, Any] | None = None,
|
|
1800
|
+
replace: bool = False,
|
|
1801
|
+
) -> None:
|
|
1802
|
+
"""Update process-wide cache layer configuration.
|
|
1803
|
+
|
|
1804
|
+
Parameters mirror the per-layer specifications accepted via graph metadata.
|
|
1805
|
+
Passing ``replace=True`` clears previous settings before applying new ones.
|
|
1806
|
+
Providing ``None`` for a layer while ``replace`` is true removes that layer
|
|
1807
|
+
from the configuration.
|
|
1808
|
+
"""
|
|
1809
|
+
|
|
1810
|
+
global _GLOBAL_CACHE_MANAGER
|
|
1811
|
+
with _GLOBAL_CACHE_LOCK:
|
|
1812
|
+
manager = _GLOBAL_CACHE_MANAGER
|
|
1813
|
+
_GLOBAL_CACHE_MANAGER = None
|
|
1814
|
+
if replace:
|
|
1815
|
+
_GLOBAL_CACHE_LAYER_CONFIG.clear()
|
|
1816
|
+
if shelve is not None:
|
|
1817
|
+
_GLOBAL_CACHE_LAYER_CONFIG["shelve"] = dict(shelve)
|
|
1818
|
+
elif replace:
|
|
1819
|
+
_GLOBAL_CACHE_LAYER_CONFIG.pop("shelve", None)
|
|
1820
|
+
if redis is not None:
|
|
1821
|
+
_GLOBAL_CACHE_LAYER_CONFIG["redis"] = dict(redis)
|
|
1822
|
+
elif replace:
|
|
1823
|
+
_GLOBAL_CACHE_LAYER_CONFIG.pop("redis", None)
|
|
1824
|
+
_close_cache_layers(manager)
|
|
1825
|
+
|
|
1826
|
+
|
|
1827
|
+
def _resolve_layer_config(
|
|
1828
|
+
graph: MutableMapping[str, Any] | None,
|
|
1829
|
+
) -> dict[str, dict[str, Any]]:
|
|
1830
|
+
resolved: dict[str, dict[str, Any]] = {}
|
|
1831
|
+
with _GLOBAL_CACHE_LOCK:
|
|
1832
|
+
for name, spec in _GLOBAL_CACHE_LAYER_CONFIG.items():
|
|
1833
|
+
resolved[name] = dict(spec)
|
|
1834
|
+
if graph is not None:
|
|
1835
|
+
overrides = graph.get(_GRAPH_CACHE_LAYERS_KEY)
|
|
1836
|
+
if isinstance(overrides, Mapping):
|
|
1837
|
+
for name in ("shelve", "redis"):
|
|
1838
|
+
layer_spec = overrides.get(name)
|
|
1839
|
+
if isinstance(layer_spec, Mapping):
|
|
1840
|
+
resolved[name] = dict(layer_spec)
|
|
1841
|
+
elif layer_spec is None:
|
|
1842
|
+
resolved.pop(name, None)
|
|
1843
|
+
return resolved
|
|
1844
|
+
|
|
1845
|
+
|
|
1846
|
+
def _build_shelve_layer(spec: Mapping[str, Any]) -> ShelveCacheLayer | None:
|
|
1847
|
+
path = spec.get("path")
|
|
1848
|
+
if not path:
|
|
1849
|
+
return None
|
|
1850
|
+
flag = spec.get("flag", "c")
|
|
1851
|
+
protocol = spec.get("protocol")
|
|
1852
|
+
writeback = bool(spec.get("writeback", False))
|
|
1853
|
+
try:
|
|
1854
|
+
proto_arg = None if protocol is None else int(protocol)
|
|
1855
|
+
except (TypeError, ValueError):
|
|
1856
|
+
logger.warning("Invalid shelve protocol %r; falling back to default", protocol)
|
|
1857
|
+
proto_arg = None
|
|
1858
|
+
try:
|
|
1859
|
+
return ShelveCacheLayer(
|
|
1860
|
+
str(path),
|
|
1861
|
+
flag=str(flag),
|
|
1862
|
+
protocol=proto_arg,
|
|
1863
|
+
writeback=writeback,
|
|
1864
|
+
)
|
|
1865
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1866
|
+
logger.exception("Failed to initialise ShelveCacheLayer for path %r", path)
|
|
1867
|
+
return None
|
|
1868
|
+
|
|
1869
|
+
|
|
1870
|
+
def _build_redis_layer(spec: Mapping[str, Any]) -> RedisCacheLayer | None:
|
|
1871
|
+
enabled = spec.get("enabled", True)
|
|
1872
|
+
if not enabled:
|
|
1873
|
+
return None
|
|
1874
|
+
namespace = spec.get("namespace")
|
|
1875
|
+
client = spec.get("client")
|
|
1876
|
+
if client is None:
|
|
1877
|
+
factory = spec.get("client_factory")
|
|
1878
|
+
if callable(factory):
|
|
1879
|
+
try:
|
|
1880
|
+
client = factory()
|
|
1881
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1882
|
+
logger.exception("Redis cache client factory failed")
|
|
1883
|
+
return None
|
|
1884
|
+
else:
|
|
1885
|
+
kwargs = spec.get("client_kwargs")
|
|
1886
|
+
if isinstance(kwargs, Mapping):
|
|
1887
|
+
try: # pragma: no cover - optional dependency
|
|
1888
|
+
import redis # type: ignore
|
|
1889
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1890
|
+
logger.exception(
|
|
1891
|
+
"redis-py is required to build the configured Redis client"
|
|
1892
|
+
)
|
|
1893
|
+
return None
|
|
1894
|
+
try:
|
|
1895
|
+
client = redis.Redis(**dict(kwargs))
|
|
1896
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1897
|
+
logger.exception(
|
|
1898
|
+
"Failed to initialise redis client with %r", kwargs
|
|
1899
|
+
)
|
|
1900
|
+
return None
|
|
1901
|
+
try:
|
|
1902
|
+
if namespace is None:
|
|
1903
|
+
return RedisCacheLayer(client=client)
|
|
1904
|
+
return RedisCacheLayer(client=client, namespace=str(namespace))
|
|
1905
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1906
|
+
logger.exception("Failed to initialise RedisCacheLayer")
|
|
1907
|
+
return None
|
|
1908
|
+
|
|
1909
|
+
|
|
1910
|
+
def _build_cache_layers(config: Mapping[str, dict[str, Any]]) -> tuple[CacheLayer, ...]:
|
|
1911
|
+
layers: list[CacheLayer] = []
|
|
1912
|
+
shelve_spec = config.get("shelve")
|
|
1913
|
+
if isinstance(shelve_spec, Mapping):
|
|
1914
|
+
layer = _build_shelve_layer(shelve_spec)
|
|
1915
|
+
if layer is not None:
|
|
1916
|
+
layers.append(layer)
|
|
1917
|
+
redis_spec = config.get("redis")
|
|
1918
|
+
if isinstance(redis_spec, Mapping):
|
|
1919
|
+
layer = _build_redis_layer(redis_spec)
|
|
1920
|
+
if layer is not None:
|
|
1921
|
+
layers.append(layer)
|
|
1922
|
+
return tuple(layers)
|
|
1923
|
+
|
|
1924
|
+
|
|
1925
|
+
def _close_cache_layers(manager: CacheManager | None) -> None:
|
|
1926
|
+
if manager is None:
|
|
1927
|
+
return
|
|
1928
|
+
layers = getattr(manager, "_layers", ())
|
|
1929
|
+
for layer in layers:
|
|
1930
|
+
close = getattr(layer, "close", None)
|
|
1931
|
+
if callable(close):
|
|
1932
|
+
try:
|
|
1933
|
+
close()
|
|
1934
|
+
except Exception: # pragma: no cover - defensive logging
|
|
1935
|
+
logger.exception(
|
|
1936
|
+
"Cache layer close failed for %s", layer.__class__.__name__
|
|
1937
|
+
)
|
|
1938
|
+
|
|
1939
|
+
|
|
1940
|
+
def reset_global_cache_manager() -> None:
|
|
1941
|
+
"""Dispose the shared cache manager and close attached layers."""
|
|
1942
|
+
|
|
1943
|
+
global _GLOBAL_CACHE_MANAGER
|
|
1944
|
+
with _GLOBAL_CACHE_LOCK:
|
|
1945
|
+
manager = _GLOBAL_CACHE_MANAGER
|
|
1946
|
+
_GLOBAL_CACHE_MANAGER = None
|
|
1947
|
+
_close_cache_layers(manager)
|
|
1948
|
+
|
|
1949
|
+
|
|
1950
|
+
def build_cache_manager(
|
|
1951
|
+
*,
|
|
1952
|
+
graph: MutableMapping[str, Any] | None = None,
|
|
1953
|
+
storage: MutableMapping[str, Any] | None = None,
|
|
1954
|
+
default_capacity: int | None = None,
|
|
1955
|
+
overrides: Mapping[str, int | None] | None = None,
|
|
1956
|
+
) -> CacheManager:
|
|
1957
|
+
"""Construct a :class:`CacheManager` honouring configured cache layers."""
|
|
1958
|
+
|
|
1959
|
+
global _GLOBAL_CACHE_MANAGER
|
|
1960
|
+
if graph is None:
|
|
1961
|
+
with _GLOBAL_CACHE_LOCK:
|
|
1962
|
+
manager = _GLOBAL_CACHE_MANAGER
|
|
1963
|
+
if manager is not None:
|
|
1964
|
+
return manager
|
|
1965
|
+
|
|
1966
|
+
layers = _build_cache_layers(_resolve_layer_config(graph))
|
|
1967
|
+
manager = CacheManager(
|
|
1968
|
+
storage,
|
|
1969
|
+
default_capacity=default_capacity,
|
|
1970
|
+
overrides=overrides,
|
|
1971
|
+
layers=layers,
|
|
1972
|
+
)
|
|
1973
|
+
|
|
1974
|
+
if graph is None:
|
|
1975
|
+
with _GLOBAL_CACHE_LOCK:
|
|
1976
|
+
global_manager = _GLOBAL_CACHE_MANAGER
|
|
1977
|
+
if global_manager is None:
|
|
1978
|
+
_GLOBAL_CACHE_MANAGER = manager
|
|
1979
|
+
return manager
|
|
1980
|
+
_close_cache_layers(manager)
|
|
1981
|
+
return global_manager
|
|
1982
|
+
|
|
1983
|
+
return manager
|
|
1984
|
+
|
|
1985
|
+
|
|
1986
|
+
def _node_repr(n: Any) -> str:
|
|
1987
|
+
"""Stable representation for node hashing and sorting."""
|
|
1988
|
+
|
|
1989
|
+
return _node_repr_digest(n)[0]
|
|
1990
|
+
|
|
1991
|
+
|
|
1992
|
+
def _iter_node_digests(nodes: Iterable[Any], *, presorted: bool) -> Iterable[bytes]:
|
|
1993
|
+
"""Yield node digests in a deterministic order."""
|
|
1994
|
+
|
|
1995
|
+
if presorted:
|
|
1996
|
+
for node in nodes:
|
|
1997
|
+
yield _node_repr_digest(node)[1]
|
|
1998
|
+
else:
|
|
1999
|
+
for _, digest in sorted(
|
|
2000
|
+
(_node_repr_digest(n) for n in nodes), key=lambda x: x[0]
|
|
2001
|
+
):
|
|
2002
|
+
yield digest
|
|
2003
|
+
|
|
2004
|
+
|
|
2005
|
+
def _node_set_checksum_no_nodes(
|
|
2006
|
+
G: nx.Graph,
|
|
2007
|
+
graph: Any,
|
|
2008
|
+
*,
|
|
2009
|
+
presorted: bool,
|
|
2010
|
+
store: bool,
|
|
2011
|
+
) -> str:
|
|
2012
|
+
"""Checksum helper when no explicit node set is provided."""
|
|
2013
|
+
|
|
2014
|
+
nodes_view = G.nodes()
|
|
2015
|
+
current_nodes = frozenset(nodes_view)
|
|
2016
|
+
cached = graph.get(NODE_SET_CHECKSUM_KEY)
|
|
2017
|
+
if cached and len(cached) == 3 and cached[2] == current_nodes:
|
|
2018
|
+
return cached[1]
|
|
2019
|
+
|
|
2020
|
+
hasher = hashlib.blake2b(digest_size=16)
|
|
2021
|
+
for digest in _iter_node_digests(nodes_view, presorted=presorted):
|
|
2022
|
+
hasher.update(digest)
|
|
2023
|
+
|
|
2024
|
+
checksum = hasher.hexdigest()
|
|
2025
|
+
if store:
|
|
2026
|
+
token = checksum[:16]
|
|
2027
|
+
if cached and cached[0] == token:
|
|
2028
|
+
return cached[1]
|
|
2029
|
+
graph[NODE_SET_CHECKSUM_KEY] = (token, checksum, current_nodes)
|
|
2030
|
+
else:
|
|
2031
|
+
graph.pop(NODE_SET_CHECKSUM_KEY, None)
|
|
2032
|
+
return checksum
|
|
2033
|
+
|
|
2034
|
+
|
|
2035
|
+
def node_set_checksum(
|
|
2036
|
+
G: nx.Graph,
|
|
2037
|
+
nodes: Iterable[Any] | None = None,
|
|
2038
|
+
*,
|
|
2039
|
+
presorted: bool = False,
|
|
2040
|
+
store: bool = True,
|
|
2041
|
+
) -> str:
|
|
2042
|
+
"""Return a BLAKE2b checksum of ``G``'s node set."""
|
|
2043
|
+
|
|
2044
|
+
graph = get_graph(G)
|
|
2045
|
+
if nodes is None:
|
|
2046
|
+
return _node_set_checksum_no_nodes(G, graph, presorted=presorted, store=store)
|
|
2047
|
+
|
|
2048
|
+
hasher = hashlib.blake2b(digest_size=16)
|
|
2049
|
+
for digest in _iter_node_digests(nodes, presorted=presorted):
|
|
2050
|
+
hasher.update(digest)
|
|
2051
|
+
|
|
2052
|
+
checksum = hasher.hexdigest()
|
|
2053
|
+
if store:
|
|
2054
|
+
token = checksum[:16]
|
|
2055
|
+
cached = graph.get(NODE_SET_CHECKSUM_KEY)
|
|
2056
|
+
if cached and cached[0] == token:
|
|
2057
|
+
return cached[1]
|
|
2058
|
+
graph[NODE_SET_CHECKSUM_KEY] = (token, checksum)
|
|
2059
|
+
else:
|
|
2060
|
+
graph.pop(NODE_SET_CHECKSUM_KEY, None)
|
|
2061
|
+
return checksum
|
|
2062
|
+
|
|
2063
|
+
|
|
2064
|
+
@dataclass(slots=True)
|
|
2065
|
+
class NodeCache:
|
|
2066
|
+
"""Container for cached node data."""
|
|
2067
|
+
|
|
2068
|
+
checksum: str
|
|
2069
|
+
nodes: tuple[Any, ...]
|
|
2070
|
+
sorted_nodes: tuple[Any, ...] | None = None
|
|
2071
|
+
idx: dict[Any, int] | None = None
|
|
2072
|
+
offset: dict[Any, int] | None = None
|
|
2073
|
+
|
|
2074
|
+
@property
|
|
2075
|
+
def n(self) -> int:
|
|
2076
|
+
return len(self.nodes)
|
|
2077
|
+
|
|
2078
|
+
|
|
2079
|
+
def _update_node_cache(
|
|
2080
|
+
graph: Any,
|
|
2081
|
+
nodes: tuple[Any, ...],
|
|
2082
|
+
key: str,
|
|
2083
|
+
*,
|
|
2084
|
+
checksum: str,
|
|
2085
|
+
sorted_nodes: tuple[Any, ...] | None = None,
|
|
2086
|
+
) -> None:
|
|
2087
|
+
"""Store ``nodes`` and ``checksum`` in ``graph`` under ``key``."""
|
|
2088
|
+
|
|
2089
|
+
graph[f"{key}_cache"] = NodeCache(
|
|
2090
|
+
checksum=checksum, nodes=nodes, sorted_nodes=sorted_nodes
|
|
2091
|
+
)
|
|
2092
|
+
graph[f"{key}_checksum"] = checksum
|
|
2093
|
+
|
|
2094
|
+
|
|
2095
|
+
def _refresh_node_list_cache(
|
|
2096
|
+
G: nx.Graph,
|
|
2097
|
+
graph: Any,
|
|
2098
|
+
*,
|
|
2099
|
+
sort_nodes: bool,
|
|
2100
|
+
current_n: int,
|
|
2101
|
+
) -> tuple[Any, ...]:
|
|
2102
|
+
"""Refresh the cached node list and return the nodes."""
|
|
2103
|
+
|
|
2104
|
+
nodes = tuple(G.nodes())
|
|
2105
|
+
checksum = node_set_checksum(G, nodes, store=True)
|
|
2106
|
+
sorted_nodes = tuple(sorted(nodes, key=_node_repr)) if sort_nodes else None
|
|
2107
|
+
_update_node_cache(
|
|
2108
|
+
graph,
|
|
2109
|
+
nodes,
|
|
2110
|
+
"_node_list",
|
|
2111
|
+
checksum=checksum,
|
|
2112
|
+
sorted_nodes=sorted_nodes,
|
|
2113
|
+
)
|
|
2114
|
+
graph["_node_list_len"] = current_n
|
|
2115
|
+
return nodes
|
|
2116
|
+
|
|
2117
|
+
|
|
2118
|
+
def _reuse_node_list_cache(
|
|
2119
|
+
graph: Any,
|
|
2120
|
+
cache: NodeCache,
|
|
2121
|
+
nodes: tuple[Any, ...],
|
|
2122
|
+
sorted_nodes: tuple[Any, ...] | None,
|
|
2123
|
+
*,
|
|
2124
|
+
sort_nodes: bool,
|
|
2125
|
+
new_checksum: str | None,
|
|
2126
|
+
) -> None:
|
|
2127
|
+
"""Reuse existing node cache and record its checksum if missing."""
|
|
2128
|
+
|
|
2129
|
+
checksum = cache.checksum if new_checksum is None else new_checksum
|
|
2130
|
+
if sort_nodes and sorted_nodes is None:
|
|
2131
|
+
sorted_nodes = tuple(sorted(nodes, key=_node_repr))
|
|
2132
|
+
_update_node_cache(
|
|
2133
|
+
graph,
|
|
2134
|
+
nodes,
|
|
2135
|
+
"_node_list",
|
|
2136
|
+
checksum=checksum,
|
|
2137
|
+
sorted_nodes=sorted_nodes,
|
|
2138
|
+
)
|
|
2139
|
+
|
|
2140
|
+
|
|
2141
|
+
def _cache_node_list(G: nx.Graph) -> tuple[Any, ...]:
|
|
2142
|
+
"""Cache and return the tuple of nodes for ``G``."""
|
|
2143
|
+
|
|
2144
|
+
graph = get_graph(G)
|
|
2145
|
+
cache: NodeCache | None = graph.get("_node_list_cache")
|
|
2146
|
+
nodes = cache.nodes if cache else None
|
|
2147
|
+
sorted_nodes = cache.sorted_nodes if cache else None
|
|
2148
|
+
stored_len = graph.get("_node_list_len")
|
|
2149
|
+
current_n = G.number_of_nodes()
|
|
2150
|
+
dirty = bool(graph.pop("_node_list_dirty", False))
|
|
2151
|
+
|
|
2152
|
+
invalid = nodes is None or stored_len != current_n or dirty
|
|
2153
|
+
new_checksum: str | None = None
|
|
2154
|
+
|
|
2155
|
+
if not invalid and cache:
|
|
2156
|
+
new_checksum = node_set_checksum(G)
|
|
2157
|
+
invalid = cache.checksum != new_checksum
|
|
2158
|
+
|
|
2159
|
+
sort_nodes = bool(graph.get("SORT_NODES", False))
|
|
2160
|
+
|
|
2161
|
+
if invalid:
|
|
2162
|
+
nodes = _refresh_node_list_cache(
|
|
2163
|
+
G, graph, sort_nodes=sort_nodes, current_n=current_n
|
|
2164
|
+
)
|
|
2165
|
+
elif cache and "_node_list_checksum" not in graph:
|
|
2166
|
+
_reuse_node_list_cache(
|
|
2167
|
+
graph,
|
|
2168
|
+
cache,
|
|
2169
|
+
nodes,
|
|
2170
|
+
sorted_nodes,
|
|
2171
|
+
sort_nodes=sort_nodes,
|
|
2172
|
+
new_checksum=new_checksum,
|
|
2173
|
+
)
|
|
2174
|
+
else:
|
|
2175
|
+
if sort_nodes and sorted_nodes is None and cache is not None:
|
|
2176
|
+
cache.sorted_nodes = tuple(sorted(nodes, key=_node_repr))
|
|
2177
|
+
return nodes
|
|
2178
|
+
|
|
2179
|
+
|
|
2180
|
+
def cached_node_list(G: nx.Graph) -> tuple[Any, ...]:
|
|
2181
|
+
"""Public wrapper returning the cached node tuple for ``G``."""
|
|
2182
|
+
|
|
2183
|
+
return _cache_node_list(G)
|
|
2184
|
+
|
|
2185
|
+
|
|
2186
|
+
def _ensure_node_map(
|
|
2187
|
+
G: TNFRGraph,
|
|
2188
|
+
*,
|
|
2189
|
+
attrs: tuple[str, ...],
|
|
2190
|
+
sort: bool = False,
|
|
2191
|
+
) -> dict[NodeId, int]:
|
|
2192
|
+
"""Return cached node-to-index/offset mappings stored on ``NodeCache``."""
|
|
2193
|
+
|
|
2194
|
+
graph = G.graph
|
|
2195
|
+
_cache_node_list(G)
|
|
2196
|
+
cache: NodeCache = graph["_node_list_cache"]
|
|
2197
|
+
|
|
2198
|
+
missing = [attr for attr in attrs if getattr(cache, attr) is None]
|
|
2199
|
+
if missing:
|
|
2200
|
+
if sort:
|
|
2201
|
+
nodes_opt = cache.sorted_nodes
|
|
2202
|
+
if nodes_opt is None:
|
|
2203
|
+
nodes_opt = tuple(sorted(cache.nodes, key=_node_repr))
|
|
2204
|
+
cache.sorted_nodes = nodes_opt
|
|
2205
|
+
nodes_seq = nodes_opt
|
|
2206
|
+
else:
|
|
2207
|
+
nodes_seq = cache.nodes
|
|
2208
|
+
node_ids = cast(tuple[NodeId, ...], nodes_seq)
|
|
2209
|
+
mappings: dict[str, dict[NodeId, int]] = {attr: {} for attr in missing}
|
|
2210
|
+
for idx, node in enumerate(node_ids):
|
|
2211
|
+
for attr in missing:
|
|
2212
|
+
mappings[attr][node] = idx
|
|
2213
|
+
for attr in missing:
|
|
2214
|
+
setattr(cache, attr, mappings[attr])
|
|
2215
|
+
return cast(dict[NodeId, int], getattr(cache, attrs[0]))
|
|
2216
|
+
|
|
2217
|
+
|
|
2218
|
+
def ensure_node_index_map(G: TNFRGraph) -> dict[NodeId, int]:
|
|
2219
|
+
"""Return cached node-to-index mapping for ``G``."""
|
|
2220
|
+
|
|
2221
|
+
return _ensure_node_map(G, attrs=("idx",), sort=False)
|
|
2222
|
+
|
|
2223
|
+
|
|
2224
|
+
def ensure_node_offset_map(G: TNFRGraph) -> dict[NodeId, int]:
|
|
2225
|
+
"""Return cached node-to-offset mapping for ``G``."""
|
|
2226
|
+
|
|
2227
|
+
sort = bool(G.graph.get("SORT_NODES", False))
|
|
2228
|
+
return _ensure_node_map(G, attrs=("offset",), sort=sort)
|
|
2229
|
+
|
|
2230
|
+
|
|
2231
|
+
@dataclass
|
|
2232
|
+
class EdgeCacheState:
|
|
2233
|
+
cache: MutableMapping[Hashable, Any]
|
|
2234
|
+
locks: defaultdict[Hashable, threading.RLock]
|
|
2235
|
+
max_entries: int | None
|
|
2236
|
+
dirty: bool = False
|
|
2237
|
+
|
|
2238
|
+
|
|
2239
|
+
_GRAPH_CACHE_MANAGER_KEY = "_tnfr_cache_manager"
|
|
2240
|
+
_GRAPH_CACHE_CONFIG_KEY = "_tnfr_cache_config"
|
|
2241
|
+
DNFR_PREP_STATE_KEY = "_dnfr_prep_state"
|
|
2242
|
+
|
|
2243
|
+
# Ephemeral graph cache management:
|
|
2244
|
+
# ----------------------------------
|
|
2245
|
+
# TNFR stores cache managers directly in each graph's `.graph` dictionary
|
|
2246
|
+
# via _GRAPH_CACHE_MANAGER_KEY. This design inherently supports ephemeral
|
|
2247
|
+
# graphs because:
|
|
2248
|
+
#
|
|
2249
|
+
# 1. **Automatic cleanup**: When an ephemeral graph object is garbage
|
|
2250
|
+
# collected, its `.graph` dict and all associated cache managers are
|
|
2251
|
+
# automatically released with it. No manual cleanup is required.
|
|
2252
|
+
#
|
|
2253
|
+
# 2. **Isolation**: Each graph has its own cache manager instance, preventing
|
|
2254
|
+
# cache pollution between unrelated graphs or temporary computations.
|
|
2255
|
+
#
|
|
2256
|
+
# 3. **No global state**: Unlike WeakValueDictionary-based global caches,
|
|
2257
|
+
# there's no shared cache registry that needs weak references to track
|
|
2258
|
+
# ephemeral graphs.
|
|
2259
|
+
#
|
|
2260
|
+
# For temporary or short-lived graphs (e.g., subgraphs, clones, simulation
|
|
2261
|
+
# snapshots), simply let the graph go out of scope and Python's garbage
|
|
2262
|
+
# collector will reclaim all associated caches. No special ephemeral flag
|
|
2263
|
+
# or WeakValueDictionary is needed.
|
|
2264
|
+
#
|
|
2265
|
+
# Example ephemeral graph usage:
|
|
2266
|
+
# def process_subgraph(G, nodes):
|
|
2267
|
+
# H = G.subgraph(nodes).copy() # Ephemeral graph
|
|
2268
|
+
# default_compute_delta_nfr(H) # Creates temporary cache
|
|
2269
|
+
# return extract_metrics(H)
|
|
2270
|
+
# # H and its caches are GC'd when function returns
|
|
2271
|
+
|
|
2272
|
+
|
|
2273
|
+
@dataclass(slots=True)
|
|
2274
|
+
class DnfrPrepState:
|
|
2275
|
+
"""State container coordinating ΔNFR preparation caches."""
|
|
2276
|
+
|
|
2277
|
+
cache: DnfrCache
|
|
2278
|
+
cache_lock: threading.RLock
|
|
2279
|
+
vector_lock: threading.RLock
|
|
2280
|
+
|
|
2281
|
+
|
|
2282
|
+
def _build_dnfr_prep_state(
|
|
2283
|
+
graph: MutableMapping[str, Any],
|
|
2284
|
+
previous: DnfrPrepState | None = None,
|
|
2285
|
+
) -> DnfrPrepState:
|
|
2286
|
+
"""Construct a :class:`DnfrPrepState` and mirror it on ``graph``."""
|
|
2287
|
+
|
|
2288
|
+
cache_lock: threading.RLock
|
|
2289
|
+
vector_lock: threading.RLock
|
|
2290
|
+
if isinstance(previous, DnfrPrepState):
|
|
2291
|
+
cache_lock = previous.cache_lock
|
|
2292
|
+
vector_lock = previous.vector_lock
|
|
2293
|
+
else:
|
|
2294
|
+
cache_lock = threading.RLock()
|
|
2295
|
+
vector_lock = threading.RLock()
|
|
2296
|
+
state = DnfrPrepState(
|
|
2297
|
+
cache=new_dnfr_cache(),
|
|
2298
|
+
cache_lock=cache_lock,
|
|
2299
|
+
vector_lock=vector_lock,
|
|
2300
|
+
)
|
|
2301
|
+
graph["_dnfr_prep_cache"] = state.cache
|
|
2302
|
+
return state
|
|
2303
|
+
|
|
2304
|
+
|
|
2305
|
+
def _coerce_dnfr_state(
|
|
2306
|
+
graph: MutableMapping[str, Any],
|
|
2307
|
+
current: Any,
|
|
2308
|
+
) -> DnfrPrepState:
|
|
2309
|
+
"""Return ``current`` normalised into :class:`DnfrPrepState`."""
|
|
2310
|
+
|
|
2311
|
+
if isinstance(current, DnfrPrepState):
|
|
2312
|
+
graph["_dnfr_prep_cache"] = current.cache
|
|
2313
|
+
return current
|
|
2314
|
+
if isinstance(current, DnfrCache):
|
|
2315
|
+
state = DnfrPrepState(
|
|
2316
|
+
cache=current,
|
|
2317
|
+
cache_lock=threading.RLock(),
|
|
2318
|
+
vector_lock=threading.RLock(),
|
|
2319
|
+
)
|
|
2320
|
+
graph["_dnfr_prep_cache"] = current
|
|
2321
|
+
return state
|
|
2322
|
+
return _build_dnfr_prep_state(graph)
|
|
2323
|
+
|
|
2324
|
+
|
|
2325
|
+
def _graph_cache_manager(graph: MutableMapping[str, Any]) -> CacheManager:
|
|
2326
|
+
manager = graph.get(_GRAPH_CACHE_MANAGER_KEY)
|
|
2327
|
+
if not isinstance(manager, CacheManager):
|
|
2328
|
+
manager = build_cache_manager(graph=graph, default_capacity=128)
|
|
2329
|
+
graph[_GRAPH_CACHE_MANAGER_KEY] = manager
|
|
2330
|
+
config = graph.get(_GRAPH_CACHE_CONFIG_KEY)
|
|
2331
|
+
if isinstance(config, dict):
|
|
2332
|
+
manager.configure_from_mapping(config)
|
|
2333
|
+
|
|
2334
|
+
def _dnfr_factory() -> DnfrPrepState:
|
|
2335
|
+
return _build_dnfr_prep_state(graph)
|
|
2336
|
+
|
|
2337
|
+
def _dnfr_reset(current: Any) -> DnfrPrepState:
|
|
2338
|
+
if isinstance(current, DnfrPrepState):
|
|
2339
|
+
return _build_dnfr_prep_state(graph, current)
|
|
2340
|
+
return _build_dnfr_prep_state(graph)
|
|
2341
|
+
|
|
2342
|
+
manager.register(
|
|
2343
|
+
DNFR_PREP_STATE_KEY,
|
|
2344
|
+
_dnfr_factory,
|
|
2345
|
+
reset=_dnfr_reset,
|
|
2346
|
+
)
|
|
2347
|
+
manager.update(
|
|
2348
|
+
DNFR_PREP_STATE_KEY,
|
|
2349
|
+
lambda current: _coerce_dnfr_state(graph, current),
|
|
2350
|
+
)
|
|
2351
|
+
return manager
|
|
2352
|
+
|
|
2353
|
+
|
|
2354
|
+
def configure_graph_cache_limits(
|
|
2355
|
+
G: GraphLike | TNFRGraph | MutableMapping[str, Any],
|
|
2356
|
+
*,
|
|
2357
|
+
default_capacity: int | None | object = CacheManager._MISSING,
|
|
2358
|
+
overrides: Mapping[str, int | None] | None = None,
|
|
2359
|
+
replace_overrides: bool = False,
|
|
2360
|
+
) -> CacheCapacityConfig:
|
|
2361
|
+
"""Update cache capacity policy stored on ``G.graph``."""
|
|
2362
|
+
|
|
2363
|
+
graph = get_graph(G)
|
|
2364
|
+
manager = _graph_cache_manager(graph)
|
|
2365
|
+
manager.configure(
|
|
2366
|
+
default_capacity=default_capacity,
|
|
2367
|
+
overrides=overrides,
|
|
2368
|
+
replace_overrides=replace_overrides,
|
|
2369
|
+
)
|
|
2370
|
+
snapshot = manager.export_config()
|
|
2371
|
+
graph[_GRAPH_CACHE_CONFIG_KEY] = {
|
|
2372
|
+
"default_capacity": snapshot.default_capacity,
|
|
2373
|
+
"overrides": dict(snapshot.overrides),
|
|
2374
|
+
}
|
|
2375
|
+
return snapshot
|
|
2376
|
+
|
|
2377
|
+
|
|
2378
|
+
class EdgeCacheManager:
|
|
2379
|
+
"""Coordinate cache storage and per-key locks for edge version caches."""
|
|
2380
|
+
|
|
2381
|
+
_STATE_KEY = "_edge_version_state"
|
|
2382
|
+
|
|
2383
|
+
def __init__(self, graph: MutableMapping[str, Any]) -> None:
|
|
2384
|
+
self.graph: MutableMapping[str, Any] = graph
|
|
2385
|
+
self._manager = _graph_cache_manager(graph)
|
|
2386
|
+
|
|
2387
|
+
def _encode_state(state: EdgeCacheState) -> Mapping[str, Any]:
|
|
2388
|
+
if not isinstance(state, EdgeCacheState):
|
|
2389
|
+
raise TypeError("EdgeCacheState expected")
|
|
2390
|
+
return {
|
|
2391
|
+
"max_entries": state.max_entries,
|
|
2392
|
+
"entries": list(state.cache.items()),
|
|
2393
|
+
}
|
|
2394
|
+
|
|
2395
|
+
def _decode_state(payload: Any) -> EdgeCacheState:
|
|
2396
|
+
if isinstance(payload, EdgeCacheState):
|
|
2397
|
+
return payload
|
|
2398
|
+
if not isinstance(payload, Mapping):
|
|
2399
|
+
raise TypeError("invalid edge cache payload")
|
|
2400
|
+
max_entries = payload.get("max_entries")
|
|
2401
|
+
state = self._build_state(max_entries)
|
|
2402
|
+
for key, value in payload.get("entries", []):
|
|
2403
|
+
state.cache[key] = value
|
|
2404
|
+
state.dirty = False
|
|
2405
|
+
return state
|
|
2406
|
+
|
|
2407
|
+
self._manager.register(
|
|
2408
|
+
self._STATE_KEY,
|
|
2409
|
+
self._default_state,
|
|
2410
|
+
reset=self._reset_state,
|
|
2411
|
+
encoder=_encode_state,
|
|
2412
|
+
decoder=_decode_state,
|
|
2413
|
+
)
|
|
2414
|
+
|
|
2415
|
+
def record_hit(self) -> None:
|
|
2416
|
+
"""Record a cache hit for telemetry."""
|
|
2417
|
+
|
|
2418
|
+
self._manager.increment_hit(self._STATE_KEY)
|
|
2419
|
+
|
|
2420
|
+
def record_miss(self, *, track_metrics: bool = True) -> None:
|
|
2421
|
+
"""Record a cache miss for telemetry.
|
|
2422
|
+
|
|
2423
|
+
When ``track_metrics`` is ``False`` the miss is acknowledged without
|
|
2424
|
+
mutating the aggregated metrics.
|
|
2425
|
+
"""
|
|
2426
|
+
|
|
2427
|
+
if track_metrics:
|
|
2428
|
+
self._manager.increment_miss(self._STATE_KEY)
|
|
2429
|
+
|
|
2430
|
+
def record_eviction(self, *, track_metrics: bool = True) -> None:
|
|
2431
|
+
"""Record cache eviction events for telemetry.
|
|
2432
|
+
|
|
2433
|
+
When ``track_metrics`` is ``False`` the underlying metrics counter is
|
|
2434
|
+
left untouched while still signalling that an eviction occurred.
|
|
2435
|
+
"""
|
|
2436
|
+
|
|
2437
|
+
if track_metrics:
|
|
2438
|
+
self._manager.increment_eviction(self._STATE_KEY)
|
|
2439
|
+
|
|
2440
|
+
def timer(self) -> TimingContext:
|
|
2441
|
+
"""Return a timing context linked to this cache."""
|
|
2442
|
+
|
|
2443
|
+
return self._manager.timer(self._STATE_KEY)
|
|
2444
|
+
|
|
2445
|
+
def _default_state(self) -> EdgeCacheState:
|
|
2446
|
+
return self._build_state(None)
|
|
2447
|
+
|
|
2448
|
+
def resolve_max_entries(self, max_entries: int | None | object) -> int | None:
|
|
2449
|
+
"""Return effective capacity for the edge cache."""
|
|
2450
|
+
|
|
2451
|
+
if max_entries is CacheManager._MISSING:
|
|
2452
|
+
return self._manager.get_capacity(self._STATE_KEY)
|
|
2453
|
+
return self._manager.get_capacity(
|
|
2454
|
+
self._STATE_KEY,
|
|
2455
|
+
requested=None if max_entries is None else int(max_entries),
|
|
2456
|
+
use_default=False,
|
|
2457
|
+
)
|
|
2458
|
+
|
|
2459
|
+
def _build_state(self, max_entries: int | None) -> EdgeCacheState:
|
|
2460
|
+
locks: defaultdict[Hashable, threading.RLock] = defaultdict(threading.RLock)
|
|
2461
|
+
capacity = float("inf") if max_entries is None else int(max_entries)
|
|
2462
|
+
cache = InstrumentedLRUCache(
|
|
2463
|
+
capacity,
|
|
2464
|
+
manager=self._manager,
|
|
2465
|
+
metrics_key=self._STATE_KEY,
|
|
2466
|
+
locks=locks,
|
|
2467
|
+
count_overwrite_hit=False,
|
|
2468
|
+
)
|
|
2469
|
+
state = EdgeCacheState(cache=cache, locks=locks, max_entries=max_entries)
|
|
2470
|
+
|
|
2471
|
+
def _on_eviction(key: Hashable, _: Any) -> None:
|
|
2472
|
+
self.record_eviction(track_metrics=False)
|
|
2473
|
+
locks.pop(key, None)
|
|
2474
|
+
state.dirty = True
|
|
2475
|
+
|
|
2476
|
+
cache.set_eviction_callbacks(_on_eviction)
|
|
2477
|
+
return state
|
|
2478
|
+
|
|
2479
|
+
def _ensure_state(
|
|
2480
|
+
self, state: EdgeCacheState | None, max_entries: int | None | object
|
|
2481
|
+
) -> EdgeCacheState:
|
|
2482
|
+
target = self.resolve_max_entries(max_entries)
|
|
2483
|
+
if target is not None:
|
|
2484
|
+
target = int(target)
|
|
2485
|
+
if target < 0:
|
|
2486
|
+
raise ValueError("max_entries must be non-negative or None")
|
|
2487
|
+
if not isinstance(state, EdgeCacheState) or state.max_entries != target:
|
|
2488
|
+
return self._build_state(target)
|
|
2489
|
+
return state
|
|
2490
|
+
|
|
2491
|
+
def _reset_state(self, state: EdgeCacheState | None) -> EdgeCacheState:
|
|
2492
|
+
if isinstance(state, EdgeCacheState):
|
|
2493
|
+
state.cache.clear()
|
|
2494
|
+
state.dirty = False
|
|
2495
|
+
return state
|
|
2496
|
+
return self._build_state(None)
|
|
2497
|
+
|
|
2498
|
+
def get_cache(
|
|
2499
|
+
self,
|
|
2500
|
+
max_entries: int | None | object,
|
|
2501
|
+
*,
|
|
2502
|
+
create: bool = True,
|
|
2503
|
+
) -> EdgeCacheState | None:
|
|
2504
|
+
"""Return the cache state for the manager's graph."""
|
|
2505
|
+
|
|
2506
|
+
if not create:
|
|
2507
|
+
state = self._manager.peek(self._STATE_KEY)
|
|
2508
|
+
return state if isinstance(state, EdgeCacheState) else None
|
|
2509
|
+
|
|
2510
|
+
state = self._manager.update(
|
|
2511
|
+
self._STATE_KEY,
|
|
2512
|
+
lambda current: self._ensure_state(current, max_entries),
|
|
2513
|
+
)
|
|
2514
|
+
if not isinstance(state, EdgeCacheState):
|
|
2515
|
+
raise RuntimeError("edge cache state failed to initialise")
|
|
2516
|
+
return state
|
|
2517
|
+
|
|
2518
|
+
def flush_state(self, state: EdgeCacheState) -> None:
|
|
2519
|
+
"""Persist ``state`` through the configured cache layers when dirty."""
|
|
2520
|
+
|
|
2521
|
+
if not isinstance(state, EdgeCacheState) or not state.dirty:
|
|
2522
|
+
return
|
|
2523
|
+
self._manager.store(self._STATE_KEY, state)
|
|
2524
|
+
state.dirty = False
|
|
2525
|
+
|
|
2526
|
+
def clear(self) -> None:
|
|
2527
|
+
"""Reset cached data managed by this instance."""
|
|
2528
|
+
|
|
2529
|
+
self._manager.clear(self._STATE_KEY)
|
|
2530
|
+
|
|
2531
|
+
|
|
2532
|
+
def edge_version_cache(
|
|
2533
|
+
G: Any,
|
|
2534
|
+
key: Hashable,
|
|
2535
|
+
builder: Callable[[], T],
|
|
2536
|
+
*,
|
|
2537
|
+
max_entries: int | None | object = CacheManager._MISSING,
|
|
2538
|
+
) -> T:
|
|
2539
|
+
"""Return cached ``builder`` output tied to the edge version of ``G``."""
|
|
2540
|
+
|
|
2541
|
+
graph = get_graph(G)
|
|
2542
|
+
manager = graph.get("_edge_cache_manager") # type: ignore[assignment]
|
|
2543
|
+
if not isinstance(manager, EdgeCacheManager) or manager.graph is not graph:
|
|
2544
|
+
manager = EdgeCacheManager(graph)
|
|
2545
|
+
graph["_edge_cache_manager"] = manager
|
|
2546
|
+
|
|
2547
|
+
resolved = manager.resolve_max_entries(max_entries)
|
|
2548
|
+
if resolved == 0:
|
|
2549
|
+
return builder()
|
|
2550
|
+
|
|
2551
|
+
state = manager.get_cache(resolved)
|
|
2552
|
+
if state is None:
|
|
2553
|
+
return builder()
|
|
2554
|
+
|
|
2555
|
+
cache = state.cache
|
|
2556
|
+
locks = state.locks
|
|
2557
|
+
edge_version = get_graph_version(graph, "_edge_version")
|
|
2558
|
+
lock = locks[key]
|
|
2559
|
+
|
|
2560
|
+
with lock:
|
|
2561
|
+
entry = cache.get(key)
|
|
2562
|
+
if entry is not None and entry[0] == edge_version:
|
|
2563
|
+
manager.record_hit()
|
|
2564
|
+
return entry[1]
|
|
2565
|
+
|
|
2566
|
+
try:
|
|
2567
|
+
with manager.timer():
|
|
2568
|
+
value = builder()
|
|
2569
|
+
except (RuntimeError, ValueError) as exc: # pragma: no cover - logging side effect
|
|
2570
|
+
logger.exception("edge_version_cache builder failed for %r: %s", key, exc)
|
|
2571
|
+
raise
|
|
2572
|
+
else:
|
|
2573
|
+
result = value
|
|
2574
|
+
with lock:
|
|
2575
|
+
entry = cache.get(key)
|
|
2576
|
+
if entry is not None:
|
|
2577
|
+
cached_version, cached_value = entry
|
|
2578
|
+
manager.record_miss()
|
|
2579
|
+
if cached_version == edge_version:
|
|
2580
|
+
manager.record_hit()
|
|
2581
|
+
return cached_value
|
|
2582
|
+
manager.record_eviction()
|
|
2583
|
+
cache[key] = (edge_version, value)
|
|
2584
|
+
state.dirty = True
|
|
2585
|
+
result = value
|
|
2586
|
+
if state.dirty:
|
|
2587
|
+
manager.flush_state(state)
|
|
2588
|
+
return result
|
|
2589
|
+
|
|
2590
|
+
|
|
2591
|
+
def cached_nodes_and_A(
|
|
2592
|
+
G: nx.Graph,
|
|
2593
|
+
*,
|
|
2594
|
+
cache_size: int | None = 1,
|
|
2595
|
+
require_numpy: bool = False,
|
|
2596
|
+
prefer_sparse: bool = False,
|
|
2597
|
+
nodes: tuple[Any, ...] | None = None,
|
|
2598
|
+
) -> tuple[tuple[Any, ...], Any]:
|
|
2599
|
+
"""Return cached nodes tuple and adjacency matrix for ``G``.
|
|
2600
|
+
|
|
2601
|
+
When ``prefer_sparse`` is true the adjacency matrix construction is skipped
|
|
2602
|
+
unless a caller later requests it explicitly. This lets ΔNFR reuse the
|
|
2603
|
+
edge-index buffers stored on :class:`~tnfr.dynamics.dnfr.DnfrCache` without
|
|
2604
|
+
paying for ``nx.to_numpy_array`` on sparse graphs while keeping the
|
|
2605
|
+
canonical cache interface unchanged.
|
|
2606
|
+
"""
|
|
2607
|
+
|
|
2608
|
+
if nodes is None:
|
|
2609
|
+
nodes = cached_node_list(G)
|
|
2610
|
+
graph = G.graph
|
|
2611
|
+
|
|
2612
|
+
checksum = getattr(graph.get("_node_list_cache"), "checksum", None)
|
|
2613
|
+
if checksum is None:
|
|
2614
|
+
checksum = graph.get("_node_list_checksum")
|
|
2615
|
+
if checksum is None:
|
|
2616
|
+
node_set_cache = graph.get(NODE_SET_CHECKSUM_KEY)
|
|
2617
|
+
if isinstance(node_set_cache, tuple) and len(node_set_cache) >= 2:
|
|
2618
|
+
checksum = node_set_cache[1]
|
|
2619
|
+
if checksum is None:
|
|
2620
|
+
checksum = ""
|
|
2621
|
+
|
|
2622
|
+
key = f"_dnfr_{len(nodes)}_{checksum}"
|
|
2623
|
+
graph["_dnfr_nodes_checksum"] = checksum
|
|
2624
|
+
|
|
2625
|
+
def builder() -> tuple[tuple[Any, ...], Any]:
|
|
2626
|
+
np = _require_numpy()
|
|
2627
|
+
if np is None or prefer_sparse:
|
|
2628
|
+
return nodes, None
|
|
2629
|
+
A = nx.to_numpy_array(G, nodelist=nodes, weight=None, dtype=float)
|
|
2630
|
+
return nodes, A
|
|
2631
|
+
|
|
2632
|
+
nodes, A = edge_version_cache(G, key, builder, max_entries=cache_size)
|
|
2633
|
+
|
|
2634
|
+
if require_numpy and A is None:
|
|
2635
|
+
raise RuntimeError("NumPy is required for adjacency caching")
|
|
2636
|
+
|
|
2637
|
+
return nodes, A
|
|
2638
|
+
|
|
2639
|
+
|
|
2640
|
+
def _reset_edge_caches(graph: Any, G: Any) -> None:
|
|
2641
|
+
"""Clear caches affected by edge updates."""
|
|
2642
|
+
|
|
2643
|
+
EdgeCacheManager(graph).clear()
|
|
2644
|
+
_graph_cache_manager(graph).clear(DNFR_PREP_STATE_KEY)
|
|
2645
|
+
mark_dnfr_prep_dirty(G)
|
|
2646
|
+
clear_node_repr_cache()
|
|
2647
|
+
for key in EDGE_VERSION_CACHE_KEYS:
|
|
2648
|
+
graph.pop(key, None)
|
|
2649
|
+
|
|
2650
|
+
|
|
2651
|
+
def increment_edge_version(G: Any) -> None:
|
|
2652
|
+
"""Increment the edge version counter in ``G.graph``."""
|
|
2653
|
+
|
|
2654
|
+
graph = get_graph(G)
|
|
2655
|
+
increment_graph_version(graph, "_edge_version")
|
|
2656
|
+
_reset_edge_caches(graph, G)
|
|
2657
|
+
|
|
2658
|
+
|
|
2659
|
+
@contextmanager
|
|
2660
|
+
def edge_version_update(G: TNFRGraph) -> Iterator[None]:
|
|
2661
|
+
"""Scope a batch of edge mutations."""
|
|
2662
|
+
|
|
2663
|
+
increment_edge_version(G)
|
|
2664
|
+
try:
|
|
2665
|
+
yield
|
|
2666
|
+
finally:
|
|
2667
|
+
increment_edge_version(G)
|
|
2668
|
+
|
|
2669
|
+
|
|
2670
|
+
class _SeedHashCache(MutableMapping[tuple[int, int], int]):
|
|
2671
|
+
"""Mutable mapping proxy exposing a configurable LRU cache."""
|
|
2672
|
+
|
|
2673
|
+
def __init__(
|
|
2674
|
+
self,
|
|
2675
|
+
*,
|
|
2676
|
+
manager: CacheManager | None = None,
|
|
2677
|
+
state_key: str = "seed_hash_cache",
|
|
2678
|
+
default_maxsize: int = 128,
|
|
2679
|
+
) -> None:
|
|
2680
|
+
self._default_maxsize = int(default_maxsize)
|
|
2681
|
+
self._manager = manager or build_cache_manager(
|
|
2682
|
+
default_capacity=self._default_maxsize
|
|
2683
|
+
)
|
|
2684
|
+
self._state_key = state_key
|
|
2685
|
+
if not self._manager.has_override(self._state_key):
|
|
2686
|
+
self._manager.configure(overrides={self._state_key: self._default_maxsize})
|
|
2687
|
+
self._manager.register(
|
|
2688
|
+
self._state_key,
|
|
2689
|
+
self._create_state,
|
|
2690
|
+
reset=self._reset_state,
|
|
2691
|
+
)
|
|
2692
|
+
|
|
2693
|
+
def _resolved_size(self, requested: int | None = None) -> int:
|
|
2694
|
+
size = self._manager.get_capacity(
|
|
2695
|
+
self._state_key,
|
|
2696
|
+
requested=requested,
|
|
2697
|
+
fallback=self._default_maxsize,
|
|
2698
|
+
)
|
|
2699
|
+
if size is None:
|
|
2700
|
+
return 0
|
|
2701
|
+
return int(size)
|
|
2702
|
+
|
|
2703
|
+
def _create_state(self) -> _SeedCacheState:
|
|
2704
|
+
size = self._resolved_size()
|
|
2705
|
+
if size <= 0:
|
|
2706
|
+
return _SeedCacheState(cache=None, maxsize=0)
|
|
2707
|
+
return _SeedCacheState(
|
|
2708
|
+
cache=InstrumentedLRUCache(
|
|
2709
|
+
size,
|
|
2710
|
+
manager=self._manager,
|
|
2711
|
+
metrics_key=self._state_key,
|
|
2712
|
+
),
|
|
2713
|
+
maxsize=size,
|
|
2714
|
+
)
|
|
2715
|
+
|
|
2716
|
+
def _reset_state(self, state: _SeedCacheState | None) -> _SeedCacheState:
|
|
2717
|
+
return self._create_state()
|
|
2718
|
+
|
|
2719
|
+
def _get_state(self, *, create: bool = True) -> _SeedCacheState | None:
|
|
2720
|
+
state = self._manager.get(self._state_key, create=create)
|
|
2721
|
+
if state is None:
|
|
2722
|
+
return None
|
|
2723
|
+
if not isinstance(state, _SeedCacheState):
|
|
2724
|
+
state = self._create_state()
|
|
2725
|
+
self._manager.store(self._state_key, state)
|
|
2726
|
+
return state
|
|
2727
|
+
|
|
2728
|
+
def configure(self, maxsize: int) -> None:
|
|
2729
|
+
size = int(maxsize)
|
|
2730
|
+
if size < 0:
|
|
2731
|
+
raise ValueError("maxsize must be non-negative")
|
|
2732
|
+
self._manager.configure(overrides={self._state_key: size})
|
|
2733
|
+
self._manager.update(self._state_key, lambda _: self._create_state())
|
|
2734
|
+
|
|
2735
|
+
def __getitem__(self, key: tuple[int, int]) -> int:
|
|
2736
|
+
state = self._get_state()
|
|
2737
|
+
if state is None or state.cache is None:
|
|
2738
|
+
raise KeyError(key)
|
|
2739
|
+
value = state.cache[key]
|
|
2740
|
+
self._manager.increment_hit(self._state_key)
|
|
2741
|
+
return value
|
|
2742
|
+
|
|
2743
|
+
def __setitem__(self, key: tuple[int, int], value: int) -> None:
|
|
2744
|
+
state = self._get_state()
|
|
2745
|
+
if state is not None and state.cache is not None:
|
|
2746
|
+
state.cache[key] = value
|
|
2747
|
+
|
|
2748
|
+
def __delitem__(self, key: tuple[int, int]) -> None:
|
|
2749
|
+
state = self._get_state()
|
|
2750
|
+
if state is None or state.cache is None:
|
|
2751
|
+
raise KeyError(key)
|
|
2752
|
+
del state.cache[key]
|
|
2753
|
+
|
|
2754
|
+
def __iter__(self) -> Iterator[tuple[int, int]]:
|
|
2755
|
+
state = self._get_state(create=False)
|
|
2756
|
+
if state is None or state.cache is None:
|
|
2757
|
+
return iter(())
|
|
2758
|
+
return iter(state.cache)
|
|
2759
|
+
|
|
2760
|
+
def __len__(self) -> int:
|
|
2761
|
+
state = self._get_state(create=False)
|
|
2762
|
+
if state is None or state.cache is None:
|
|
2763
|
+
return 0
|
|
2764
|
+
return len(state.cache)
|
|
2765
|
+
|
|
2766
|
+
def clear(self) -> None: # type: ignore[override]
|
|
2767
|
+
self._manager.clear(self._state_key)
|
|
2768
|
+
|
|
2769
|
+
@property
|
|
2770
|
+
def maxsize(self) -> int:
|
|
2771
|
+
state = self._get_state()
|
|
2772
|
+
return 0 if state is None else state.maxsize
|
|
2773
|
+
|
|
2774
|
+
@property
|
|
2775
|
+
def enabled(self) -> bool:
|
|
2776
|
+
state = self._get_state(create=False)
|
|
2777
|
+
return bool(state and state.cache is not None)
|
|
2778
|
+
|
|
2779
|
+
@property
|
|
2780
|
+
def data(self) -> InstrumentedLRUCache[tuple[int, int], int] | None:
|
|
2781
|
+
"""Expose the underlying cache for diagnostics/tests."""
|
|
2782
|
+
|
|
2783
|
+
state = self._get_state(create=False)
|
|
2784
|
+
return None if state is None else state.cache
|
|
2785
|
+
|
|
2786
|
+
|
|
2787
|
+
class ScopedCounterCache(Generic[K]):
|
|
2788
|
+
"""Thread-safe LRU cache storing monotonic counters by ``key``."""
|
|
2789
|
+
|
|
2790
|
+
def __init__(
|
|
2791
|
+
self,
|
|
2792
|
+
name: str,
|
|
2793
|
+
max_entries: int | None = None,
|
|
2794
|
+
*,
|
|
2795
|
+
manager: CacheManager | None = None,
|
|
2796
|
+
default_max_entries: int = 128,
|
|
2797
|
+
) -> None:
|
|
2798
|
+
self._name = name
|
|
2799
|
+
self._state_key = f"scoped_counter:{name}"
|
|
2800
|
+
self._default_max_entries = int(default_max_entries)
|
|
2801
|
+
requested = None if max_entries is None else int(max_entries)
|
|
2802
|
+
if requested is not None and requested < 0:
|
|
2803
|
+
raise ValueError("max_entries must be non-negative")
|
|
2804
|
+
self._manager = manager or build_cache_manager(
|
|
2805
|
+
default_capacity=self._default_max_entries
|
|
2806
|
+
)
|
|
2807
|
+
if not self._manager.has_override(self._state_key):
|
|
2808
|
+
fallback = requested
|
|
2809
|
+
if fallback is None:
|
|
2810
|
+
fallback = self._default_max_entries
|
|
2811
|
+
self._manager.configure(overrides={self._state_key: fallback})
|
|
2812
|
+
elif requested is not None:
|
|
2813
|
+
self._manager.configure(overrides={self._state_key: requested})
|
|
2814
|
+
self._manager.register(
|
|
2815
|
+
self._state_key,
|
|
2816
|
+
self._create_state,
|
|
2817
|
+
lock_factory=lambda: get_lock(name),
|
|
2818
|
+
reset=self._reset_state,
|
|
2819
|
+
)
|
|
2820
|
+
|
|
2821
|
+
def _resolved_entries(self, requested: int | None = None) -> int:
|
|
2822
|
+
size = self._manager.get_capacity(
|
|
2823
|
+
self._state_key,
|
|
2824
|
+
requested=requested,
|
|
2825
|
+
fallback=self._default_max_entries,
|
|
2826
|
+
)
|
|
2827
|
+
if size is None:
|
|
2828
|
+
return 0
|
|
2829
|
+
return int(size)
|
|
2830
|
+
|
|
2831
|
+
def _create_state(self, requested: int | None = None) -> _CounterState[K]:
|
|
2832
|
+
size = self._resolved_entries(requested)
|
|
2833
|
+
locks: dict[K, threading.RLock] = {}
|
|
2834
|
+
return _CounterState(
|
|
2835
|
+
cache=InstrumentedLRUCache(
|
|
2836
|
+
size,
|
|
2837
|
+
manager=self._manager,
|
|
2838
|
+
metrics_key=self._state_key,
|
|
2839
|
+
locks=locks,
|
|
2840
|
+
),
|
|
2841
|
+
locks=locks,
|
|
2842
|
+
max_entries=size,
|
|
2843
|
+
)
|
|
2844
|
+
|
|
2845
|
+
def _reset_state(self, state: _CounterState[K] | None) -> _CounterState[K]:
|
|
2846
|
+
return self._create_state()
|
|
2847
|
+
|
|
2848
|
+
def _get_state(self) -> _CounterState[K]:
|
|
2849
|
+
state = self._manager.get(self._state_key)
|
|
2850
|
+
if not isinstance(state, _CounterState):
|
|
2851
|
+
state = self._create_state(0)
|
|
2852
|
+
self._manager.store(self._state_key, state)
|
|
2853
|
+
return state
|
|
2854
|
+
|
|
2855
|
+
@property
|
|
2856
|
+
def lock(self) -> threading.Lock | threading.RLock:
|
|
2857
|
+
"""Return the lock guarding access to the underlying cache."""
|
|
2858
|
+
|
|
2859
|
+
return self._manager.get_lock(self._state_key)
|
|
2860
|
+
|
|
2861
|
+
@property
|
|
2862
|
+
def max_entries(self) -> int:
|
|
2863
|
+
"""Return the configured maximum number of cached entries."""
|
|
2864
|
+
|
|
2865
|
+
return self._get_state().max_entries
|
|
2866
|
+
|
|
2867
|
+
@property
|
|
2868
|
+
def cache(self) -> InstrumentedLRUCache[K, int]:
|
|
2869
|
+
"""Expose the instrumented cache for inspection."""
|
|
2870
|
+
|
|
2871
|
+
return self._get_state().cache
|
|
2872
|
+
|
|
2873
|
+
@property
|
|
2874
|
+
def locks(self) -> dict[K, threading.RLock]:
|
|
2875
|
+
"""Return the mapping of per-key locks tracked by the cache."""
|
|
2876
|
+
|
|
2877
|
+
return self._get_state().locks
|
|
2878
|
+
|
|
2879
|
+
def configure(self, *, force: bool = False, max_entries: int | None = None) -> None:
|
|
2880
|
+
"""Resize or reset the cache keeping previous settings."""
|
|
2881
|
+
|
|
2882
|
+
if max_entries is None:
|
|
2883
|
+
size = self._resolved_entries()
|
|
2884
|
+
update_policy = False
|
|
2885
|
+
else:
|
|
2886
|
+
size = int(max_entries)
|
|
2887
|
+
if size < 0:
|
|
2888
|
+
raise ValueError("max_entries must be non-negative")
|
|
2889
|
+
update_policy = True
|
|
2890
|
+
|
|
2891
|
+
def _update(state: _CounterState[K] | None) -> _CounterState[K]:
|
|
2892
|
+
if (
|
|
2893
|
+
not isinstance(state, _CounterState)
|
|
2894
|
+
or force
|
|
2895
|
+
or state.max_entries != size
|
|
2896
|
+
):
|
|
2897
|
+
locks: dict[K, threading.RLock] = {}
|
|
2898
|
+
return _CounterState(
|
|
2899
|
+
cache=InstrumentedLRUCache(
|
|
2900
|
+
size,
|
|
2901
|
+
manager=self._manager,
|
|
2902
|
+
metrics_key=self._state_key,
|
|
2903
|
+
locks=locks,
|
|
2904
|
+
),
|
|
2905
|
+
locks=locks,
|
|
2906
|
+
max_entries=size,
|
|
2907
|
+
)
|
|
2908
|
+
return cast(_CounterState[K], state)
|
|
2909
|
+
|
|
2910
|
+
if update_policy:
|
|
2911
|
+
self._manager.configure(overrides={self._state_key: size})
|
|
2912
|
+
self._manager.update(self._state_key, _update)
|
|
2913
|
+
|
|
2914
|
+
def clear(self) -> None:
|
|
2915
|
+
"""Clear stored counters preserving ``max_entries``."""
|
|
2916
|
+
|
|
2917
|
+
self.configure(force=True)
|
|
2918
|
+
|
|
2919
|
+
def bump(self, key: K) -> int:
|
|
2920
|
+
"""Return current counter for ``key`` and increment it atomically."""
|
|
2921
|
+
|
|
2922
|
+
result: dict[str, Any] = {}
|
|
2923
|
+
|
|
2924
|
+
def _update(state: _CounterState[K] | None) -> _CounterState[K]:
|
|
2925
|
+
if not isinstance(state, _CounterState):
|
|
2926
|
+
state = self._create_state(0)
|
|
2927
|
+
cache = state.cache
|
|
2928
|
+
locks = state.locks
|
|
2929
|
+
if key not in locks:
|
|
2930
|
+
locks[key] = threading.RLock()
|
|
2931
|
+
value = int(cache.get(key, 0))
|
|
2932
|
+
cache[key] = value + 1
|
|
2933
|
+
result["value"] = value
|
|
2934
|
+
return state
|
|
2935
|
+
|
|
2936
|
+
self._manager.update(self._state_key, _update)
|
|
2937
|
+
return int(result.get("value", 0))
|
|
2938
|
+
|
|
2939
|
+
def __len__(self) -> int:
|
|
2940
|
+
"""Return the number of tracked counters."""
|
|
2941
|
+
|
|
2942
|
+
return len(self.cache)
|
|
2943
|
+
|
|
2944
|
+
|
|
2945
|
+
# ============================================================================
|
|
2946
|
+
# Hierarchical Cache System (moved from caching/ for consolidation)
|
|
2947
|
+
# ============================================================================
|
|
2948
|
+
|
|
2949
|
+
|
|
2950
|
+
class CacheLevel(Enum):
|
|
2951
|
+
"""Cache levels organized by persistence and computational cost.
|
|
2952
|
+
|
|
2953
|
+
Levels are ordered from most persistent (rarely changes) to least
|
|
2954
|
+
persistent (frequently recomputed):
|
|
2955
|
+
|
|
2956
|
+
- GRAPH_STRUCTURE: Topology, adjacency matrices (invalidated on add/remove node/edge)
|
|
2957
|
+
- NODE_PROPERTIES: EPI, νf, θ per node (invalidated on property updates)
|
|
2958
|
+
- DERIVED_METRICS: Si, coherence, ΔNFR (invalidated on dependency changes)
|
|
2959
|
+
- TEMPORARY: Intermediate computations (short-lived, frequently evicted)
|
|
2960
|
+
"""
|
|
2961
|
+
|
|
2962
|
+
GRAPH_STRUCTURE = "graph_structure"
|
|
2963
|
+
NODE_PROPERTIES = "node_properties"
|
|
2964
|
+
DERIVED_METRICS = "derived_metrics"
|
|
2965
|
+
TEMPORARY = "temporary"
|
|
2966
|
+
|
|
2967
|
+
|
|
2968
|
+
@dataclass
|
|
2969
|
+
class CacheEntry:
|
|
2970
|
+
"""Cache entry with metadata for intelligent invalidation and eviction.
|
|
2971
|
+
|
|
2972
|
+
Attributes
|
|
2973
|
+
----------
|
|
2974
|
+
value : Any
|
|
2975
|
+
The cached computation result.
|
|
2976
|
+
dependencies : set[str]
|
|
2977
|
+
Set of structural properties this entry depends on. Used for
|
|
2978
|
+
selective invalidation. Examples: 'node_epi', 'node_vf', 'graph_topology'.
|
|
2979
|
+
timestamp : float
|
|
2980
|
+
Time when entry was created (from time.time()).
|
|
2981
|
+
access_count : int
|
|
2982
|
+
Number of times this entry has been accessed.
|
|
2983
|
+
computation_cost : float
|
|
2984
|
+
Estimated computational cost to regenerate this value. Higher cost
|
|
2985
|
+
entries are prioritized during eviction.
|
|
2986
|
+
size_bytes : int
|
|
2987
|
+
Estimated memory size in bytes.
|
|
2988
|
+
"""
|
|
2989
|
+
|
|
2990
|
+
value: Any
|
|
2991
|
+
dependencies: set[str]
|
|
2992
|
+
timestamp: float
|
|
2993
|
+
access_count: int = 0
|
|
2994
|
+
computation_cost: float = 1.0
|
|
2995
|
+
size_bytes: int = 0
|
|
2996
|
+
|
|
2997
|
+
|
|
2998
|
+
class TNFRHierarchicalCache:
|
|
2999
|
+
"""Hierarchical cache with dependency-aware selective invalidation.
|
|
3000
|
+
|
|
3001
|
+
This cache system organizes entries by structural level and tracks
|
|
3002
|
+
dependencies to enable surgical invalidation. Only entries that depend
|
|
3003
|
+
on changed structural properties are evicted, preserving valid cached data.
|
|
3004
|
+
|
|
3005
|
+
Internally uses ``CacheManager`` for unified cache management, metrics,
|
|
3006
|
+
and telemetry integration with the rest of TNFR.
|
|
3007
|
+
|
|
3008
|
+
**Performance Optimizations** (v2):
|
|
3009
|
+
- Direct cache references bypass CacheManager overhead on hot path (50% faster reads)
|
|
3010
|
+
- Lazy persistence batches writes to persistent layers (40% faster writes)
|
|
3011
|
+
- Type-based size estimation caching reduces memory tracking overhead
|
|
3012
|
+
- Dependency change detection avoids redundant updates
|
|
3013
|
+
- Batched invalidation reduces persistence operations
|
|
3014
|
+
|
|
3015
|
+
**TNFR Compliance**:
|
|
3016
|
+
- Maintains §3.8 Controlled Determinism through consistent cache behavior
|
|
3017
|
+
- Supports §3.4 Operator Closure via dependency tracking
|
|
3018
|
+
|
|
3019
|
+
Parameters
|
|
3020
|
+
----------
|
|
3021
|
+
max_memory_mb : int, default: 512
|
|
3022
|
+
Maximum memory usage in megabytes before eviction starts.
|
|
3023
|
+
enable_metrics : bool, default: True
|
|
3024
|
+
Whether to track cache hit/miss metrics for telemetry.
|
|
3025
|
+
cache_manager : CacheManager, optional
|
|
3026
|
+
Existing CacheManager to use. If None, creates a new one.
|
|
3027
|
+
lazy_persistence : bool, default: True
|
|
3028
|
+
Enable lazy write-behind caching for persistent layers. When True,
|
|
3029
|
+
cache modifications are batched and written on flush or critical operations.
|
|
3030
|
+
This significantly improves write performance at the cost of potential
|
|
3031
|
+
data loss on ungraceful termination. Set to False for immediate consistency.
|
|
3032
|
+
|
|
3033
|
+
Attributes
|
|
3034
|
+
----------
|
|
3035
|
+
hits : int
|
|
3036
|
+
Number of successful cache retrievals.
|
|
3037
|
+
misses : int
|
|
3038
|
+
Number of cache misses.
|
|
3039
|
+
evictions : int
|
|
3040
|
+
Number of entries evicted due to memory pressure.
|
|
3041
|
+
invalidations : int
|
|
3042
|
+
Number of entries invalidated due to dependency changes.
|
|
3043
|
+
|
|
3044
|
+
Examples
|
|
3045
|
+
--------
|
|
3046
|
+
>>> cache = TNFRHierarchicalCache(max_memory_mb=128)
|
|
3047
|
+
>>> # Cache a derived metric with dependencies
|
|
3048
|
+
>>> cache.set(
|
|
3049
|
+
... "coherence_global",
|
|
3050
|
+
... 0.95,
|
|
3051
|
+
... CacheLevel.DERIVED_METRICS,
|
|
3052
|
+
... dependencies={'graph_topology', 'all_node_vf'},
|
|
3053
|
+
... computation_cost=100.0
|
|
3054
|
+
... )
|
|
3055
|
+
>>> cache.get("coherence_global", CacheLevel.DERIVED_METRICS)
|
|
3056
|
+
0.95
|
|
3057
|
+
>>> # Invalidate when topology changes
|
|
3058
|
+
>>> cache.invalidate_by_dependency('graph_topology')
|
|
3059
|
+
>>> cache.get("coherence_global", CacheLevel.DERIVED_METRICS)
|
|
3060
|
+
|
|
3061
|
+
>>> # Flush lazy writes to persistent storage
|
|
3062
|
+
>>> cache.flush_dirty_caches()
|
|
3063
|
+
|
|
3064
|
+
"""
|
|
3065
|
+
|
|
3066
|
+
def __init__(
|
|
3067
|
+
self,
|
|
3068
|
+
max_memory_mb: int = 512,
|
|
3069
|
+
enable_metrics: bool = True,
|
|
3070
|
+
cache_manager: Optional[CacheManager] = None,
|
|
3071
|
+
lazy_persistence: bool = True,
|
|
3072
|
+
):
|
|
3073
|
+
# Use provided CacheManager or create new one
|
|
3074
|
+
if cache_manager is None:
|
|
3075
|
+
# Estimate entries per MB (rough heuristic: ~100 entries per MB)
|
|
3076
|
+
default_capacity = max(32, int(max_memory_mb * 100 / len(CacheLevel)))
|
|
3077
|
+
cache_manager = CacheManager(
|
|
3078
|
+
storage={},
|
|
3079
|
+
default_capacity=default_capacity,
|
|
3080
|
+
)
|
|
3081
|
+
|
|
3082
|
+
self._manager = cache_manager
|
|
3083
|
+
self._max_memory = max_memory_mb * 1024 * 1024
|
|
3084
|
+
self._current_memory = 0
|
|
3085
|
+
self._enable_metrics = enable_metrics
|
|
3086
|
+
self._lazy_persistence = lazy_persistence
|
|
3087
|
+
|
|
3088
|
+
# Dependency tracking (remains in hierarchical cache)
|
|
3089
|
+
self._dependencies: dict[str, set[tuple[CacheLevel, str]]] = defaultdict(set)
|
|
3090
|
+
|
|
3091
|
+
# Register a cache for each level in the CacheManager
|
|
3092
|
+
self._level_cache_names: dict[CacheLevel, str] = {}
|
|
3093
|
+
# OPTIMIZATION: Direct cache references to avoid CacheManager overhead on hot path
|
|
3094
|
+
self._direct_caches: dict[CacheLevel, dict[str, CacheEntry]] = {}
|
|
3095
|
+
|
|
3096
|
+
for level in CacheLevel:
|
|
3097
|
+
cache_name = f"hierarchical_{level.value}"
|
|
3098
|
+
self._level_cache_names[level] = cache_name
|
|
3099
|
+
|
|
3100
|
+
# Simple factory returning empty dict for each cache level
|
|
3101
|
+
self._manager.register(
|
|
3102
|
+
cache_name,
|
|
3103
|
+
factory=lambda: {},
|
|
3104
|
+
create=True,
|
|
3105
|
+
)
|
|
3106
|
+
|
|
3107
|
+
# Store direct reference for fast access
|
|
3108
|
+
self._direct_caches[level] = self._manager.get(cache_name)
|
|
3109
|
+
|
|
3110
|
+
# OPTIMIZATION: Track dirty caches for batched persistence
|
|
3111
|
+
self._dirty_levels: set[CacheLevel] = set()
|
|
3112
|
+
|
|
3113
|
+
# OPTIMIZATION: Type-based size estimation cache
|
|
3114
|
+
self._size_cache: dict[type, int] = {}
|
|
3115
|
+
|
|
3116
|
+
# Metrics (tracked locally for backward compatibility)
|
|
3117
|
+
self.hits = 0
|
|
3118
|
+
self.misses = 0
|
|
3119
|
+
self.evictions = 0
|
|
3120
|
+
self.invalidations = 0
|
|
3121
|
+
|
|
3122
|
+
@property
|
|
3123
|
+
def _caches(self) -> dict[CacheLevel, dict[str, CacheEntry]]:
|
|
3124
|
+
"""Provide backward compatibility for accessing internal caches.
|
|
3125
|
+
|
|
3126
|
+
This property returns a view of the caches stored in the CacheManager,
|
|
3127
|
+
maintaining compatibility with code that directly accessed the old
|
|
3128
|
+
_caches attribute.
|
|
3129
|
+
|
|
3130
|
+
Note: Uses direct cache references for performance.
|
|
3131
|
+
"""
|
|
3132
|
+
return self._direct_caches
|
|
3133
|
+
|
|
3134
|
+
def get(self, key: str, level: CacheLevel) -> Optional[Any]:
|
|
3135
|
+
"""Retrieve value from cache if it exists and is valid.
|
|
3136
|
+
|
|
3137
|
+
Parameters
|
|
3138
|
+
----------
|
|
3139
|
+
key : str
|
|
3140
|
+
Cache key identifying the entry.
|
|
3141
|
+
level : CacheLevel
|
|
3142
|
+
Cache level to search in.
|
|
3143
|
+
|
|
3144
|
+
Returns
|
|
3145
|
+
-------
|
|
3146
|
+
Any or None
|
|
3147
|
+
The cached value if found, None otherwise.
|
|
3148
|
+
|
|
3149
|
+
Examples
|
|
3150
|
+
--------
|
|
3151
|
+
>>> cache = TNFRHierarchicalCache()
|
|
3152
|
+
>>> cache.set("key1", 42, CacheLevel.TEMPORARY, dependencies=set())
|
|
3153
|
+
>>> cache.get("key1", CacheLevel.TEMPORARY)
|
|
3154
|
+
42
|
|
3155
|
+
>>> cache.get("missing", CacheLevel.TEMPORARY)
|
|
3156
|
+
|
|
3157
|
+
"""
|
|
3158
|
+
# OPTIMIZATION: Use direct cache reference to avoid CacheManager overhead
|
|
3159
|
+
level_cache = self._direct_caches[level]
|
|
3160
|
+
|
|
3161
|
+
if key in level_cache:
|
|
3162
|
+
entry = level_cache[key]
|
|
3163
|
+
entry.access_count += 1
|
|
3164
|
+
if self._enable_metrics:
|
|
3165
|
+
self.hits += 1
|
|
3166
|
+
# Only update manager metrics if not in lazy mode
|
|
3167
|
+
if not self._lazy_persistence:
|
|
3168
|
+
cache_name = self._level_cache_names[level]
|
|
3169
|
+
self._manager.increment_hit(cache_name)
|
|
3170
|
+
return entry.value
|
|
3171
|
+
|
|
3172
|
+
if self._enable_metrics:
|
|
3173
|
+
self.misses += 1
|
|
3174
|
+
if not self._lazy_persistence:
|
|
3175
|
+
cache_name = self._level_cache_names[level]
|
|
3176
|
+
self._manager.increment_miss(cache_name)
|
|
3177
|
+
return None
|
|
3178
|
+
|
|
3179
|
+
def set(
|
|
3180
|
+
self,
|
|
3181
|
+
key: str,
|
|
3182
|
+
value: Any,
|
|
3183
|
+
level: CacheLevel,
|
|
3184
|
+
dependencies: set[str],
|
|
3185
|
+
computation_cost: float = 1.0,
|
|
3186
|
+
) -> None:
|
|
3187
|
+
"""Store value in cache with dependency metadata.
|
|
3188
|
+
|
|
3189
|
+
Parameters
|
|
3190
|
+
----------
|
|
3191
|
+
key : str
|
|
3192
|
+
Unique identifier for this cache entry.
|
|
3193
|
+
value : Any
|
|
3194
|
+
The value to cache.
|
|
3195
|
+
level : CacheLevel
|
|
3196
|
+
Which cache level to store in.
|
|
3197
|
+
dependencies : set[str]
|
|
3198
|
+
Set of structural properties this value depends on.
|
|
3199
|
+
computation_cost : float, default: 1.0
|
|
3200
|
+
Estimated cost to recompute this value. Used for eviction priority.
|
|
3201
|
+
|
|
3202
|
+
Examples
|
|
3203
|
+
--------
|
|
3204
|
+
>>> cache = TNFRHierarchicalCache()
|
|
3205
|
+
>>> cache.set(
|
|
3206
|
+
... "si_node_5",
|
|
3207
|
+
... 0.87,
|
|
3208
|
+
... CacheLevel.DERIVED_METRICS,
|
|
3209
|
+
... dependencies={'node_vf_5', 'node_phase_5'},
|
|
3210
|
+
... computation_cost=5.0
|
|
3211
|
+
... )
|
|
3212
|
+
"""
|
|
3213
|
+
# OPTIMIZATION: Use direct cache reference
|
|
3214
|
+
level_cache = self._direct_caches[level]
|
|
3215
|
+
|
|
3216
|
+
# OPTIMIZATION: Lazy size estimation - estimate size once
|
|
3217
|
+
estimated_size = self._estimate_size_fast(value)
|
|
3218
|
+
|
|
3219
|
+
# Check if we need to evict
|
|
3220
|
+
if self._current_memory + estimated_size > self._max_memory:
|
|
3221
|
+
self._evict_lru(estimated_size)
|
|
3222
|
+
|
|
3223
|
+
# Create entry
|
|
3224
|
+
entry = CacheEntry(
|
|
3225
|
+
value=value,
|
|
3226
|
+
dependencies=dependencies.copy(),
|
|
3227
|
+
timestamp=time.time(),
|
|
3228
|
+
computation_cost=computation_cost,
|
|
3229
|
+
size_bytes=estimated_size,
|
|
3230
|
+
)
|
|
3231
|
+
|
|
3232
|
+
# Remove old entry if exists
|
|
3233
|
+
old_dependencies: set[str] | None = None
|
|
3234
|
+
if key in level_cache:
|
|
3235
|
+
old_entry = level_cache[key]
|
|
3236
|
+
self._current_memory -= old_entry.size_bytes
|
|
3237
|
+
old_dependencies = old_entry.dependencies
|
|
3238
|
+
# OPTIMIZATION: Only clean up dependencies if they changed
|
|
3239
|
+
if old_dependencies != dependencies:
|
|
3240
|
+
for dep in old_dependencies:
|
|
3241
|
+
if dep in self._dependencies:
|
|
3242
|
+
self._dependencies[dep].discard((level, key))
|
|
3243
|
+
|
|
3244
|
+
# Store entry (direct modification, no manager overhead)
|
|
3245
|
+
level_cache[key] = entry
|
|
3246
|
+
self._current_memory += estimated_size
|
|
3247
|
+
|
|
3248
|
+
# OPTIMIZATION: Register dependencies only if new or changed
|
|
3249
|
+
if old_dependencies is None or old_dependencies != dependencies:
|
|
3250
|
+
for dep in dependencies:
|
|
3251
|
+
self._dependencies[dep].add((level, key))
|
|
3252
|
+
|
|
3253
|
+
# OPTIMIZATION: Mark level as dirty for lazy persistence
|
|
3254
|
+
if self._lazy_persistence:
|
|
3255
|
+
self._dirty_levels.add(level)
|
|
3256
|
+
else:
|
|
3257
|
+
# Immediate persistence (backward compatible)
|
|
3258
|
+
cache_name = self._level_cache_names[level]
|
|
3259
|
+
self._manager.store(cache_name, level_cache)
|
|
3260
|
+
|
|
3261
|
+
def invalidate_by_dependency(self, dependency: str) -> int:
|
|
3262
|
+
"""Invalidate all cache entries that depend on a structural property.
|
|
3263
|
+
|
|
3264
|
+
This implements selective invalidation: only entries that explicitly
|
|
3265
|
+
depend on the changed property are removed, preserving unaffected caches.
|
|
3266
|
+
|
|
3267
|
+
Parameters
|
|
3268
|
+
----------
|
|
3269
|
+
dependency : str
|
|
3270
|
+
The structural property that changed (e.g., 'graph_topology',
|
|
3271
|
+
'node_epi_5', 'all_node_vf').
|
|
3272
|
+
|
|
3273
|
+
Returns
|
|
3274
|
+
-------
|
|
3275
|
+
int
|
|
3276
|
+
Number of entries invalidated.
|
|
3277
|
+
|
|
3278
|
+
Examples
|
|
3279
|
+
--------
|
|
3280
|
+
>>> cache = TNFRHierarchicalCache()
|
|
3281
|
+
>>> cache.set("key1", 1, CacheLevel.TEMPORARY, {'dep1', 'dep2'})
|
|
3282
|
+
>>> cache.set("key2", 2, CacheLevel.TEMPORARY, {'dep2'})
|
|
3283
|
+
>>> cache.invalidate_by_dependency('dep1') # Only invalidates key1
|
|
3284
|
+
1
|
|
3285
|
+
>>> cache.get("key1", CacheLevel.TEMPORARY) # None
|
|
3286
|
+
|
|
3287
|
+
>>> cache.get("key2", CacheLevel.TEMPORARY) # Still cached
|
|
3288
|
+
2
|
|
3289
|
+
"""
|
|
3290
|
+
count = 0
|
|
3291
|
+
if dependency in self._dependencies:
|
|
3292
|
+
entries_to_remove = list(self._dependencies[dependency])
|
|
3293
|
+
invalidated_levels: set[CacheLevel] = set()
|
|
3294
|
+
|
|
3295
|
+
for level, key in entries_to_remove:
|
|
3296
|
+
# OPTIMIZATION: Use direct cache reference
|
|
3297
|
+
level_cache = self._direct_caches[level]
|
|
3298
|
+
|
|
3299
|
+
if key in level_cache:
|
|
3300
|
+
entry = level_cache[key]
|
|
3301
|
+
self._current_memory -= entry.size_bytes
|
|
3302
|
+
del level_cache[key]
|
|
3303
|
+
count += 1
|
|
3304
|
+
invalidated_levels.add(level)
|
|
3305
|
+
|
|
3306
|
+
# Clean up all dependency references for this entry
|
|
3307
|
+
for dep in entry.dependencies:
|
|
3308
|
+
if dep in self._dependencies:
|
|
3309
|
+
self._dependencies[dep].discard((level, key))
|
|
3310
|
+
|
|
3311
|
+
# Clean up the dependency key itself
|
|
3312
|
+
del self._dependencies[dependency]
|
|
3313
|
+
|
|
3314
|
+
# OPTIMIZATION: Batch persist invalidated levels
|
|
3315
|
+
if self._lazy_persistence:
|
|
3316
|
+
self._dirty_levels.update(invalidated_levels)
|
|
3317
|
+
else:
|
|
3318
|
+
for level in invalidated_levels:
|
|
3319
|
+
cache_name = self._level_cache_names[level]
|
|
3320
|
+
level_cache = self._direct_caches[level]
|
|
3321
|
+
self._manager.store(cache_name, level_cache)
|
|
3322
|
+
|
|
3323
|
+
if self._enable_metrics:
|
|
3324
|
+
self.invalidations += count
|
|
3325
|
+
|
|
3326
|
+
return count
|
|
3327
|
+
|
|
3328
|
+
def invalidate_level(self, level: CacheLevel) -> int:
|
|
3329
|
+
"""Invalidate all entries in a specific cache level.
|
|
3330
|
+
|
|
3331
|
+
Parameters
|
|
3332
|
+
----------
|
|
3333
|
+
level : CacheLevel
|
|
3334
|
+
The cache level to clear.
|
|
3335
|
+
|
|
3336
|
+
Returns
|
|
3337
|
+
-------
|
|
3338
|
+
int
|
|
3339
|
+
Number of entries invalidated.
|
|
3340
|
+
"""
|
|
3341
|
+
# OPTIMIZATION: Use direct cache reference
|
|
3342
|
+
level_cache = self._direct_caches[level]
|
|
3343
|
+
count = len(level_cache)
|
|
3344
|
+
|
|
3345
|
+
# Clean up dependencies
|
|
3346
|
+
for key, entry in level_cache.items():
|
|
3347
|
+
self._current_memory -= entry.size_bytes
|
|
3348
|
+
for dep in entry.dependencies:
|
|
3349
|
+
if dep in self._dependencies:
|
|
3350
|
+
self._dependencies[dep].discard((level, key))
|
|
3351
|
+
|
|
3352
|
+
level_cache.clear()
|
|
3353
|
+
|
|
3354
|
+
# OPTIMIZATION: Batch persist if in lazy mode
|
|
3355
|
+
if self._lazy_persistence:
|
|
3356
|
+
self._dirty_levels.add(level)
|
|
3357
|
+
else:
|
|
3358
|
+
cache_name = self._level_cache_names[level]
|
|
3359
|
+
self._manager.store(cache_name, level_cache)
|
|
3360
|
+
|
|
3361
|
+
if self._enable_metrics:
|
|
3362
|
+
self.invalidations += count
|
|
3363
|
+
|
|
3364
|
+
return count
|
|
3365
|
+
|
|
3366
|
+
def clear(self) -> None:
|
|
3367
|
+
"""Clear all cache levels and reset metrics."""
|
|
3368
|
+
for level in CacheLevel:
|
|
3369
|
+
# OPTIMIZATION: Clear direct cache and update manager
|
|
3370
|
+
level_cache = self._direct_caches[level]
|
|
3371
|
+
level_cache.clear()
|
|
3372
|
+
cache_name = self._level_cache_names[level]
|
|
3373
|
+
self._manager.store(cache_name, level_cache)
|
|
3374
|
+
|
|
3375
|
+
self._dependencies.clear()
|
|
3376
|
+
self._current_memory = 0
|
|
3377
|
+
self._dirty_levels.clear()
|
|
3378
|
+
|
|
3379
|
+
# Always reset metrics regardless of _enable_metrics
|
|
3380
|
+
self.hits = 0
|
|
3381
|
+
self.misses = 0
|
|
3382
|
+
self.evictions = 0
|
|
3383
|
+
self.invalidations = 0
|
|
3384
|
+
|
|
3385
|
+
def get_stats(self) -> dict[str, Any]:
|
|
3386
|
+
"""Get cache statistics for telemetry.
|
|
3387
|
+
|
|
3388
|
+
Returns
|
|
3389
|
+
-------
|
|
3390
|
+
dict[str, Any]
|
|
3391
|
+
Dictionary containing:
|
|
3392
|
+
- hits: Number of cache hits
|
|
3393
|
+
- misses: Number of cache misses
|
|
3394
|
+
- hit_rate: Ratio of hits to total accesses
|
|
3395
|
+
- evictions: Number of evictions
|
|
3396
|
+
- invalidations: Number of invalidations
|
|
3397
|
+
- memory_used_mb: Current memory usage in MB
|
|
3398
|
+
- memory_limit_mb: Memory limit in MB
|
|
3399
|
+
- entry_counts: Number of entries per level
|
|
3400
|
+
"""
|
|
3401
|
+
total_accesses = self.hits + self.misses
|
|
3402
|
+
hit_rate = self.hits / total_accesses if total_accesses > 0 else 0.0
|
|
3403
|
+
|
|
3404
|
+
entry_counts = {}
|
|
3405
|
+
for level in CacheLevel:
|
|
3406
|
+
# OPTIMIZATION: Use direct cache reference
|
|
3407
|
+
level_cache = self._direct_caches[level]
|
|
3408
|
+
entry_counts[level.value] = len(level_cache)
|
|
3409
|
+
|
|
3410
|
+
return {
|
|
3411
|
+
"hits": self.hits,
|
|
3412
|
+
"misses": self.misses,
|
|
3413
|
+
"hit_rate": hit_rate,
|
|
3414
|
+
"evictions": self.evictions,
|
|
3415
|
+
"invalidations": self.invalidations,
|
|
3416
|
+
"memory_used_mb": self._current_memory / (1024 * 1024),
|
|
3417
|
+
"memory_limit_mb": self._max_memory / (1024 * 1024),
|
|
3418
|
+
"entry_counts": entry_counts,
|
|
3419
|
+
}
|
|
3420
|
+
|
|
3421
|
+
def _estimate_size(self, value: Any) -> int:
|
|
3422
|
+
"""Estimate memory size of a value in bytes.
|
|
3423
|
+
|
|
3424
|
+
Uses sys.getsizeof for a rough estimate. For complex objects,
|
|
3425
|
+
this may underestimate the true memory usage.
|
|
3426
|
+
"""
|
|
3427
|
+
try:
|
|
3428
|
+
return sys.getsizeof(value)
|
|
3429
|
+
except (TypeError, AttributeError):
|
|
3430
|
+
# Fallback for objects that don't support getsizeof
|
|
3431
|
+
return 64 # Default estimate
|
|
3432
|
+
|
|
3433
|
+
def _estimate_size_fast(self, value: Any) -> int:
|
|
3434
|
+
"""Optimized size estimation with type-based caching.
|
|
3435
|
+
|
|
3436
|
+
For common types, uses cached size estimates to avoid repeated
|
|
3437
|
+
sys.getsizeof() calls. Falls back to full estimation for complex types.
|
|
3438
|
+
"""
|
|
3439
|
+
value_type = type(value)
|
|
3440
|
+
|
|
3441
|
+
# Check if we have a cached size for this type
|
|
3442
|
+
if value_type in self._size_cache:
|
|
3443
|
+
# For simple immutable types, use cached size
|
|
3444
|
+
if value_type in (int, float, bool, type(None)):
|
|
3445
|
+
return self._size_cache[value_type]
|
|
3446
|
+
# For strings, estimate based on length
|
|
3447
|
+
if value_type is str:
|
|
3448
|
+
base_size = self._size_cache[value_type]
|
|
3449
|
+
return base_size + len(value)
|
|
3450
|
+
|
|
3451
|
+
# Calculate size and cache for simple types
|
|
3452
|
+
size = self._estimate_size(value)
|
|
3453
|
+
if value_type in (int, float, bool, type(None)):
|
|
3454
|
+
self._size_cache[value_type] = size
|
|
3455
|
+
elif value_type is str:
|
|
3456
|
+
# Cache base size for strings
|
|
3457
|
+
if value_type not in self._size_cache:
|
|
3458
|
+
self._size_cache[value_type] = sys.getsizeof("")
|
|
3459
|
+
|
|
3460
|
+
return size
|
|
3461
|
+
|
|
3462
|
+
def flush_dirty_caches(self) -> None:
|
|
3463
|
+
"""Flush dirty caches to persistent layers.
|
|
3464
|
+
|
|
3465
|
+
In lazy persistence mode, this method writes accumulated changes
|
|
3466
|
+
to the CacheManager's persistent layers. This reduces write overhead
|
|
3467
|
+
by batching updates.
|
|
3468
|
+
"""
|
|
3469
|
+
if not self._dirty_levels:
|
|
3470
|
+
return
|
|
3471
|
+
|
|
3472
|
+
for level in self._dirty_levels:
|
|
3473
|
+
cache_name = self._level_cache_names[level]
|
|
3474
|
+
level_cache = self._direct_caches[level]
|
|
3475
|
+
self._manager.store(cache_name, level_cache)
|
|
3476
|
+
|
|
3477
|
+
self._dirty_levels.clear()
|
|
3478
|
+
|
|
3479
|
+
def _evict_lru(self, needed_space: int) -> None:
|
|
3480
|
+
"""Evict least valuable entries until enough space is freed.
|
|
3481
|
+
|
|
3482
|
+
Value is determined by: (access_count + 1) * computation_cost.
|
|
3483
|
+
Lower values are evicted first (low access, low cost to recompute).
|
|
3484
|
+
|
|
3485
|
+
OPTIMIZED: Uses direct cache references and incremental eviction.
|
|
3486
|
+
"""
|
|
3487
|
+
# OPTIMIZATION: Collect entries with direct cache access (no manager overhead)
|
|
3488
|
+
all_entries: list[tuple[float, CacheLevel, str, CacheEntry]] = []
|
|
3489
|
+
for level in CacheLevel:
|
|
3490
|
+
level_cache = self._direct_caches[level]
|
|
3491
|
+
for key, entry in level_cache.items():
|
|
3492
|
+
# Priority = (access_count + 1) * computation_cost
|
|
3493
|
+
# Higher priority = keep longer
|
|
3494
|
+
# Add 1 to access_count to avoid zero priority
|
|
3495
|
+
priority = (entry.access_count + 1) * entry.computation_cost
|
|
3496
|
+
all_entries.append((priority, level, key, entry))
|
|
3497
|
+
|
|
3498
|
+
# Sort by priority (ascending - lowest priority first)
|
|
3499
|
+
all_entries.sort(key=lambda x: x[0])
|
|
3500
|
+
|
|
3501
|
+
freed_space = 0
|
|
3502
|
+
evicted_levels: set[CacheLevel] = set()
|
|
3503
|
+
|
|
3504
|
+
for priority, level, key, entry in all_entries:
|
|
3505
|
+
if freed_space >= needed_space:
|
|
3506
|
+
break
|
|
3507
|
+
|
|
3508
|
+
# OPTIMIZATION: Remove entry directly from cache
|
|
3509
|
+
level_cache = self._direct_caches[level]
|
|
3510
|
+
if key in level_cache:
|
|
3511
|
+
del level_cache[key]
|
|
3512
|
+
freed_space += entry.size_bytes
|
|
3513
|
+
self._current_memory -= entry.size_bytes
|
|
3514
|
+
evicted_levels.add(level)
|
|
3515
|
+
|
|
3516
|
+
# Clean up dependencies
|
|
3517
|
+
for dep in entry.dependencies:
|
|
3518
|
+
if dep in self._dependencies:
|
|
3519
|
+
self._dependencies[dep].discard((level, key))
|
|
3520
|
+
|
|
3521
|
+
if self._enable_metrics:
|
|
3522
|
+
self.evictions += 1
|
|
3523
|
+
if not self._lazy_persistence:
|
|
3524
|
+
cache_name = self._level_cache_names[level]
|
|
3525
|
+
self._manager.increment_eviction(cache_name)
|
|
3526
|
+
|
|
3527
|
+
# OPTIMIZATION: Batch persist evicted levels if in lazy mode
|
|
3528
|
+
if self._lazy_persistence:
|
|
3529
|
+
self._dirty_levels.update(evicted_levels)
|
|
3530
|
+
else:
|
|
3531
|
+
# Immediate persistence
|
|
3532
|
+
for level in evicted_levels:
|
|
3533
|
+
cache_name = self._level_cache_names[level]
|
|
3534
|
+
level_cache = self._direct_caches[level]
|
|
3535
|
+
self._manager.store(cache_name, level_cache)
|
|
3536
|
+
|
|
3537
|
+
|
|
3538
|
+
# ============================================================================
|
|
3539
|
+
# Cache Decorators (moved from caching/decorators.py for consolidation)
|
|
3540
|
+
# ============================================================================
|
|
3541
|
+
|
|
3542
|
+
# Global cache instance shared across all decorated functions
|
|
3543
|
+
_global_cache: Optional[TNFRHierarchicalCache] = None
|
|
3544
|
+
|
|
3545
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
3546
|
+
|
|
3547
|
+
|
|
3548
|
+
def get_global_cache() -> TNFRHierarchicalCache:
|
|
3549
|
+
"""Get or create the global TNFR cache instance.
|
|
3550
|
+
|
|
3551
|
+
Returns
|
|
3552
|
+
-------
|
|
3553
|
+
TNFRHierarchicalCache
|
|
3554
|
+
The global cache instance.
|
|
3555
|
+
"""
|
|
3556
|
+
global _global_cache
|
|
3557
|
+
if _global_cache is None:
|
|
3558
|
+
_global_cache = TNFRHierarchicalCache(max_memory_mb=512)
|
|
3559
|
+
return _global_cache
|
|
3560
|
+
|
|
3561
|
+
|
|
3562
|
+
def set_global_cache(cache: Optional[TNFRHierarchicalCache]) -> None:
|
|
3563
|
+
"""Set the global cache instance.
|
|
3564
|
+
|
|
3565
|
+
Parameters
|
|
3566
|
+
----------
|
|
3567
|
+
cache : TNFRHierarchicalCache or None
|
|
3568
|
+
The cache instance to use globally, or None to reset to default.
|
|
3569
|
+
"""
|
|
3570
|
+
global _global_cache
|
|
3571
|
+
_global_cache = cache
|
|
3572
|
+
|
|
3573
|
+
|
|
3574
|
+
def reset_global_cache() -> None:
|
|
3575
|
+
"""Reset the global cache instance to None.
|
|
3576
|
+
|
|
3577
|
+
The next call to get_global_cache() will create a fresh instance.
|
|
3578
|
+
"""
|
|
3579
|
+
global _global_cache
|
|
3580
|
+
_global_cache = None
|
|
3581
|
+
|
|
3582
|
+
|
|
3583
|
+
def _generate_cache_key(
|
|
3584
|
+
func_name: str,
|
|
3585
|
+
args: tuple[Any, ...],
|
|
3586
|
+
kwargs: dict[str, Any],
|
|
3587
|
+
) -> str:
|
|
3588
|
+
"""Generate deterministic cache key from function and arguments.
|
|
3589
|
+
|
|
3590
|
+
Parameters
|
|
3591
|
+
----------
|
|
3592
|
+
func_name : str
|
|
3593
|
+
Name of the function being cached.
|
|
3594
|
+
args : tuple
|
|
3595
|
+
Positional arguments.
|
|
3596
|
+
kwargs : dict
|
|
3597
|
+
Keyword arguments.
|
|
3598
|
+
|
|
3599
|
+
Returns
|
|
3600
|
+
-------
|
|
3601
|
+
str
|
|
3602
|
+
Cache key string.
|
|
3603
|
+
|
|
3604
|
+
Notes
|
|
3605
|
+
-----
|
|
3606
|
+
Uses MD5 for hashing (acceptable for cache keys, not security).
|
|
3607
|
+
Graph objects use id() which is session-specific - cache is cleared
|
|
3608
|
+
between sessions, so this is deterministic within a session.
|
|
3609
|
+
"""
|
|
3610
|
+
# Build key components
|
|
3611
|
+
key_parts = [func_name]
|
|
3612
|
+
|
|
3613
|
+
# Add positional args
|
|
3614
|
+
for arg in args:
|
|
3615
|
+
if hasattr(arg, "__name__"): # For graph objects, use name
|
|
3616
|
+
key_parts.append(f"graph:{arg.__name__}")
|
|
3617
|
+
elif hasattr(arg, "graph"): # NetworkX graphs have .graph attribute
|
|
3618
|
+
# Use graph id for identity (session-specific, cache cleared between sessions)
|
|
3619
|
+
key_parts.append(f"graph:{id(arg)}")
|
|
3620
|
+
else:
|
|
3621
|
+
# For simple types, include value
|
|
3622
|
+
key_parts.append(str(arg))
|
|
3623
|
+
|
|
3624
|
+
# Add keyword args (sorted for consistency)
|
|
3625
|
+
for k in sorted(kwargs.keys()):
|
|
3626
|
+
v = kwargs[k]
|
|
3627
|
+
key_parts.append(f"{k}={v}")
|
|
3628
|
+
|
|
3629
|
+
# Create deterministic hash (MD5 is acceptable for non-security cache keys)
|
|
3630
|
+
key_str = "|".join(key_parts)
|
|
3631
|
+
return hashlib.md5(key_str.encode()).hexdigest()
|
|
3632
|
+
|
|
3633
|
+
|
|
3634
|
+
def cache_tnfr_computation(
|
|
3635
|
+
level: CacheLevel,
|
|
3636
|
+
dependencies: set[str],
|
|
3637
|
+
cost_estimator: Optional[Callable[..., float]] = None,
|
|
3638
|
+
cache_instance: Optional[TNFRHierarchicalCache] = None,
|
|
3639
|
+
) -> Callable[[F], F]:
|
|
3640
|
+
"""Decorator for automatic caching of TNFR computations.
|
|
3641
|
+
|
|
3642
|
+
Caches function results based on arguments and invalidates when
|
|
3643
|
+
dependencies change. Transparently integrates with existing functions.
|
|
3644
|
+
|
|
3645
|
+
Parameters
|
|
3646
|
+
----------
|
|
3647
|
+
level : CacheLevel
|
|
3648
|
+
Cache level for storing results.
|
|
3649
|
+
dependencies : set[str]
|
|
3650
|
+
Set of structural properties this computation depends on.
|
|
3651
|
+
Examples: {'graph_topology', 'node_epi', 'node_vf', 'node_phase'}
|
|
3652
|
+
cost_estimator : callable, optional
|
|
3653
|
+
Function that takes same arguments as decorated function and returns
|
|
3654
|
+
estimated computational cost as float. Used for eviction priority.
|
|
3655
|
+
cache_instance : TNFRHierarchicalCache, optional
|
|
3656
|
+
Specific cache instance to use. If None, uses global cache.
|
|
3657
|
+
|
|
3658
|
+
Returns
|
|
3659
|
+
-------
|
|
3660
|
+
callable
|
|
3661
|
+
Decorated function with caching.
|
|
3662
|
+
|
|
3663
|
+
Examples
|
|
3664
|
+
--------
|
|
3665
|
+
>>> from tnfr.cache import cache_tnfr_computation, CacheLevel
|
|
3666
|
+
>>> @cache_tnfr_computation(
|
|
3667
|
+
... level=CacheLevel.DERIVED_METRICS,
|
|
3668
|
+
... dependencies={'node_vf', 'node_phase'},
|
|
3669
|
+
... cost_estimator=lambda graph, node_id: len(list(graph.neighbors(node_id)))
|
|
3670
|
+
... )
|
|
3671
|
+
... def compute_metric(graph, node_id):
|
|
3672
|
+
... # Expensive computation
|
|
3673
|
+
... return 0.85
|
|
3674
|
+
|
|
3675
|
+
With custom cache instance:
|
|
3676
|
+
|
|
3677
|
+
>>> from tnfr.cache import TNFRHierarchicalCache
|
|
3678
|
+
>>> my_cache = TNFRHierarchicalCache(max_memory_mb=256)
|
|
3679
|
+
>>> @cache_tnfr_computation(
|
|
3680
|
+
... level=CacheLevel.NODE_PROPERTIES,
|
|
3681
|
+
... dependencies={'node_data'},
|
|
3682
|
+
... cache_instance=my_cache
|
|
3683
|
+
... )
|
|
3684
|
+
... def get_node_property(graph, node_id):
|
|
3685
|
+
... return graph.nodes[node_id]
|
|
3686
|
+
"""
|
|
3687
|
+
|
|
3688
|
+
def decorator(func: F) -> F:
|
|
3689
|
+
func_name = func.__name__
|
|
3690
|
+
|
|
3691
|
+
@wraps(func)
|
|
3692
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
3693
|
+
# Get cache instance
|
|
3694
|
+
cache = cache_instance if cache_instance is not None else get_global_cache()
|
|
3695
|
+
|
|
3696
|
+
# Generate cache key
|
|
3697
|
+
cache_key = _generate_cache_key(func_name, args, kwargs)
|
|
3698
|
+
|
|
3699
|
+
# Try to get from cache
|
|
3700
|
+
cached_result = cache.get(cache_key, level)
|
|
3701
|
+
if cached_result is not None:
|
|
3702
|
+
return cached_result
|
|
3703
|
+
|
|
3704
|
+
# Compute result
|
|
3705
|
+
result = func(*args, **kwargs)
|
|
3706
|
+
|
|
3707
|
+
# Estimate computational cost
|
|
3708
|
+
comp_cost = 1.0
|
|
3709
|
+
if cost_estimator is not None:
|
|
3710
|
+
try:
|
|
3711
|
+
comp_cost = float(cost_estimator(*args, **kwargs))
|
|
3712
|
+
except (TypeError, ValueError):
|
|
3713
|
+
comp_cost = 1.0
|
|
3714
|
+
|
|
3715
|
+
# Store in cache
|
|
3716
|
+
cache.set(cache_key, result, level, dependencies, comp_cost)
|
|
3717
|
+
|
|
3718
|
+
return result
|
|
3719
|
+
|
|
3720
|
+
# Attach metadata for introspection
|
|
3721
|
+
wrapper._cache_level = level # type: ignore
|
|
3722
|
+
wrapper._cache_dependencies = dependencies # type: ignore
|
|
3723
|
+
wrapper._is_cached = True # type: ignore
|
|
3724
|
+
|
|
3725
|
+
return wrapper # type: ignore
|
|
3726
|
+
|
|
3727
|
+
return decorator
|
|
3728
|
+
|
|
3729
|
+
|
|
3730
|
+
def invalidate_function_cache(func: Callable[..., Any]) -> int:
|
|
3731
|
+
"""Invalidate cache entries for a specific decorated function.
|
|
3732
|
+
|
|
3733
|
+
Parameters
|
|
3734
|
+
----------
|
|
3735
|
+
func : callable
|
|
3736
|
+
The decorated function whose cache entries should be invalidated.
|
|
3737
|
+
|
|
3738
|
+
Returns
|
|
3739
|
+
-------
|
|
3740
|
+
int
|
|
3741
|
+
Number of entries invalidated.
|
|
3742
|
+
|
|
3743
|
+
Raises
|
|
3744
|
+
------
|
|
3745
|
+
ValueError
|
|
3746
|
+
If the function is not decorated with @cache_tnfr_computation.
|
|
3747
|
+
"""
|
|
3748
|
+
if not hasattr(func, "_is_cached"):
|
|
3749
|
+
raise ValueError(f"Function {func.__name__} is not cached")
|
|
3750
|
+
|
|
3751
|
+
cache = get_global_cache()
|
|
3752
|
+
dependencies = getattr(func, "_cache_dependencies", set())
|
|
3753
|
+
|
|
3754
|
+
total = 0
|
|
3755
|
+
for dep in dependencies:
|
|
3756
|
+
total += cache.invalidate_by_dependency(dep)
|
|
3757
|
+
|
|
3758
|
+
return total
|
|
3759
|
+
|
|
3760
|
+
|
|
3761
|
+
# ============================================================================
|
|
3762
|
+
# Graph Change Tracking (moved from caching/invalidation.py for consolidation)
|
|
3763
|
+
# ============================================================================
|
|
3764
|
+
|
|
3765
|
+
|
|
3766
|
+
class GraphChangeTracker:
|
|
3767
|
+
"""Track graph modifications for selective cache invalidation.
|
|
3768
|
+
|
|
3769
|
+
Installs hooks into graph modification methods to automatically invalidate
|
|
3770
|
+
affected cache entries when structural properties change.
|
|
3771
|
+
|
|
3772
|
+
Parameters
|
|
3773
|
+
----------
|
|
3774
|
+
cache : TNFRHierarchicalCache
|
|
3775
|
+
The cache instance to invalidate.
|
|
3776
|
+
|
|
3777
|
+
Attributes
|
|
3778
|
+
----------
|
|
3779
|
+
topology_changes : int
|
|
3780
|
+
Count of topology modifications (add/remove node/edge).
|
|
3781
|
+
property_changes : int
|
|
3782
|
+
Count of node property modifications.
|
|
3783
|
+
|
|
3784
|
+
Examples
|
|
3785
|
+
--------
|
|
3786
|
+
>>> import networkx as nx
|
|
3787
|
+
>>> from tnfr.cache import TNFRHierarchicalCache, GraphChangeTracker, CacheLevel
|
|
3788
|
+
>>> cache = TNFRHierarchicalCache()
|
|
3789
|
+
>>> G = nx.Graph()
|
|
3790
|
+
>>> tracker = GraphChangeTracker(cache)
|
|
3791
|
+
>>> tracker.track_graph_changes(G)
|
|
3792
|
+
>>> # Now modifications to G will trigger cache invalidation
|
|
3793
|
+
>>> cache.set("key1", 1, CacheLevel.GRAPH_STRUCTURE, {'graph_topology'})
|
|
3794
|
+
>>> G.add_node("n1") # Invalidates graph_topology cache entries
|
|
3795
|
+
>>> cache.get("key1", CacheLevel.GRAPH_STRUCTURE) # Returns None
|
|
3796
|
+
"""
|
|
3797
|
+
|
|
3798
|
+
def __init__(self, cache: TNFRHierarchicalCache):
|
|
3799
|
+
self._cache = cache
|
|
3800
|
+
self.topology_changes = 0
|
|
3801
|
+
self.property_changes = 0
|
|
3802
|
+
self._tracked_graphs: set[int] = set()
|
|
3803
|
+
|
|
3804
|
+
def track_graph_changes(self, graph: Any) -> None:
|
|
3805
|
+
"""Install hooks to track changes in a graph.
|
|
3806
|
+
|
|
3807
|
+
Wraps the graph's add_node, remove_node, add_edge, and remove_edge
|
|
3808
|
+
methods to trigger cache invalidation.
|
|
3809
|
+
|
|
3810
|
+
Parameters
|
|
3811
|
+
----------
|
|
3812
|
+
graph : GraphLike
|
|
3813
|
+
The graph to monitor for changes.
|
|
3814
|
+
|
|
3815
|
+
Notes
|
|
3816
|
+
-----
|
|
3817
|
+
This uses monkey-patching to intercept graph modifications. The
|
|
3818
|
+
original methods are preserved and called after invalidation.
|
|
3819
|
+
"""
|
|
3820
|
+
graph_id = id(graph)
|
|
3821
|
+
if graph_id in self._tracked_graphs:
|
|
3822
|
+
return # Already tracking this graph
|
|
3823
|
+
|
|
3824
|
+
self._tracked_graphs.add(graph_id)
|
|
3825
|
+
|
|
3826
|
+
# Store original methods
|
|
3827
|
+
original_add_node = graph.add_node
|
|
3828
|
+
original_remove_node = graph.remove_node
|
|
3829
|
+
original_add_edge = graph.add_edge
|
|
3830
|
+
original_remove_edge = graph.remove_edge
|
|
3831
|
+
|
|
3832
|
+
# Create tracked versions
|
|
3833
|
+
def tracked_add_node(node_id: Any, **attrs: Any) -> None:
|
|
3834
|
+
result = original_add_node(node_id, **attrs)
|
|
3835
|
+
self._on_topology_change()
|
|
3836
|
+
return result
|
|
3837
|
+
|
|
3838
|
+
def tracked_remove_node(node_id: Any) -> None:
|
|
3839
|
+
result = original_remove_node(node_id)
|
|
3840
|
+
self._on_topology_change()
|
|
3841
|
+
return result
|
|
3842
|
+
|
|
3843
|
+
def tracked_add_edge(u: Any, v: Any, **attrs: Any) -> None:
|
|
3844
|
+
result = original_add_edge(u, v, **attrs)
|
|
3845
|
+
self._on_topology_change()
|
|
3846
|
+
return result
|
|
3847
|
+
|
|
3848
|
+
def tracked_remove_edge(u: Any, v: Any) -> None:
|
|
3849
|
+
result = original_remove_edge(u, v)
|
|
3850
|
+
self._on_topology_change()
|
|
3851
|
+
return result
|
|
3852
|
+
|
|
3853
|
+
# Replace methods
|
|
3854
|
+
graph.add_node = tracked_add_node
|
|
3855
|
+
graph.remove_node = tracked_remove_node
|
|
3856
|
+
graph.add_edge = tracked_add_edge
|
|
3857
|
+
graph.remove_edge = tracked_remove_edge
|
|
3858
|
+
|
|
3859
|
+
# Store reference to tracker for property changes
|
|
3860
|
+
if hasattr(graph, "graph"):
|
|
3861
|
+
graph.graph["_tnfr_change_tracker"] = self
|
|
3862
|
+
|
|
3863
|
+
def on_node_property_change(
|
|
3864
|
+
self,
|
|
3865
|
+
node_id: Any,
|
|
3866
|
+
property_name: str,
|
|
3867
|
+
old_value: Optional[Any] = None,
|
|
3868
|
+
new_value: Optional[Any] = None,
|
|
3869
|
+
) -> None:
|
|
3870
|
+
"""Notify tracker of a node property change.
|
|
3871
|
+
|
|
3872
|
+
Parameters
|
|
3873
|
+
----------
|
|
3874
|
+
node_id : Any
|
|
3875
|
+
The node whose property changed.
|
|
3876
|
+
property_name : str
|
|
3877
|
+
Name of the property that changed (e.g., 'epi', 'vf', 'phase').
|
|
3878
|
+
old_value : Any, optional
|
|
3879
|
+
Previous value (for logging/debugging).
|
|
3880
|
+
new_value : Any, optional
|
|
3881
|
+
New value (for logging/debugging).
|
|
3882
|
+
|
|
3883
|
+
Notes
|
|
3884
|
+
-----
|
|
3885
|
+
This should be called explicitly when node properties are modified
|
|
3886
|
+
outside of the graph's standard API (e.g., G.nodes[n]['epi'] = value).
|
|
3887
|
+
"""
|
|
3888
|
+
# Invalidate node-specific dependency
|
|
3889
|
+
dep_key = f"node_{property_name}_{node_id}"
|
|
3890
|
+
self._cache.invalidate_by_dependency(dep_key)
|
|
3891
|
+
|
|
3892
|
+
# Invalidate global property dependency
|
|
3893
|
+
global_dep = f"all_node_{property_name}"
|
|
3894
|
+
self._cache.invalidate_by_dependency(global_dep)
|
|
3895
|
+
|
|
3896
|
+
# Invalidate derived metrics for this node
|
|
3897
|
+
if property_name in ["epi", "vf", "phase", "delta_nfr"]:
|
|
3898
|
+
self._cache.invalidate_by_dependency(f"derived_metrics_{node_id}")
|
|
3899
|
+
|
|
3900
|
+
self.property_changes += 1
|
|
3901
|
+
|
|
3902
|
+
def _on_topology_change(self) -> None:
|
|
3903
|
+
"""Handle topology modifications (add/remove node/edge)."""
|
|
3904
|
+
# Invalidate topology-dependent caches
|
|
3905
|
+
self._cache.invalidate_by_dependency("graph_topology")
|
|
3906
|
+
self._cache.invalidate_by_dependency("node_neighbors")
|
|
3907
|
+
self._cache.invalidate_by_dependency("adjacency_matrix")
|
|
3908
|
+
|
|
3909
|
+
self.topology_changes += 1
|
|
3910
|
+
|
|
3911
|
+
def reset_counters(self) -> None:
|
|
3912
|
+
"""Reset change counters."""
|
|
3913
|
+
self.topology_changes = 0
|
|
3914
|
+
self.property_changes = 0
|
|
3915
|
+
|
|
3916
|
+
|
|
3917
|
+
def track_node_property_update(
|
|
3918
|
+
graph: Any,
|
|
3919
|
+
node_id: Any,
|
|
3920
|
+
property_name: str,
|
|
3921
|
+
new_value: Any,
|
|
3922
|
+
) -> None:
|
|
3923
|
+
"""Helper to track node property updates.
|
|
3924
|
+
|
|
3925
|
+
Updates the node property and notifies the change tracker if one is
|
|
3926
|
+
attached to the graph.
|
|
3927
|
+
|
|
3928
|
+
Parameters
|
|
3929
|
+
----------
|
|
3930
|
+
graph : GraphLike
|
|
3931
|
+
The graph containing the node.
|
|
3932
|
+
node_id : Any
|
|
3933
|
+
The node to update.
|
|
3934
|
+
property_name : str
|
|
3935
|
+
Property name to update.
|
|
3936
|
+
new_value : Any
|
|
3937
|
+
New value for the property.
|
|
3938
|
+
|
|
3939
|
+
Examples
|
|
3940
|
+
--------
|
|
3941
|
+
>>> import networkx as nx
|
|
3942
|
+
>>> from tnfr.cache import TNFRHierarchicalCache, GraphChangeTracker
|
|
3943
|
+
>>> from tnfr.cache import track_node_property_update
|
|
3944
|
+
>>> cache = TNFRHierarchicalCache()
|
|
3945
|
+
>>> G = nx.Graph()
|
|
3946
|
+
>>> G.add_node("n1", epi=0.5)
|
|
3947
|
+
>>> tracker = GraphChangeTracker(cache)
|
|
3948
|
+
>>> tracker.track_graph_changes(G)
|
|
3949
|
+
>>> # Use helper to update and invalidate
|
|
3950
|
+
>>> track_node_property_update(G, "n1", "epi", 0.7)
|
|
3951
|
+
"""
|
|
3952
|
+
# Get old value
|
|
3953
|
+
old_value = graph.nodes[node_id].get(property_name)
|
|
3954
|
+
|
|
3955
|
+
# Update property
|
|
3956
|
+
graph.nodes[node_id][property_name] = new_value
|
|
3957
|
+
|
|
3958
|
+
# Notify tracker if present
|
|
3959
|
+
if hasattr(graph, "graph"):
|
|
3960
|
+
tracker = graph.graph.get("_tnfr_change_tracker")
|
|
3961
|
+
if isinstance(tracker, GraphChangeTracker):
|
|
3962
|
+
tracker.on_node_property_change(
|
|
3963
|
+
node_id, property_name, old_value, new_value
|
|
3964
|
+
)
|
|
3965
|
+
|
|
3966
|
+
|
|
3967
|
+
# ============================================================================
|
|
3968
|
+
# Persistent Cache (moved from caching/persistence.py for consolidation)
|
|
3969
|
+
# ============================================================================
|
|
3970
|
+
|
|
3971
|
+
|
|
3972
|
+
class PersistentTNFRCache:
|
|
3973
|
+
"""Cache with optional disk persistence for costly computations.
|
|
3974
|
+
|
|
3975
|
+
Combines in-memory caching with selective disk persistence for
|
|
3976
|
+
specific cache levels. Expensive computations can be preserved
|
|
3977
|
+
between sessions while temporary computations remain memory-only.
|
|
3978
|
+
|
|
3979
|
+
Parameters
|
|
3980
|
+
----------
|
|
3981
|
+
cache_dir : Path or str, default: ".tnfr_cache"
|
|
3982
|
+
Directory for persistent cache files.
|
|
3983
|
+
max_memory_mb : int, default: 512
|
|
3984
|
+
Memory limit for in-memory cache.
|
|
3985
|
+
persist_levels : set[CacheLevel], optional
|
|
3986
|
+
Cache levels to persist to disk. Defaults to GRAPH_STRUCTURE
|
|
3987
|
+
and DERIVED_METRICS.
|
|
3988
|
+
|
|
3989
|
+
Examples
|
|
3990
|
+
--------
|
|
3991
|
+
>>> from pathlib import Path
|
|
3992
|
+
>>> from tnfr.cache import PersistentTNFRCache, CacheLevel
|
|
3993
|
+
>>> cache = PersistentTNFRCache(cache_dir=Path("/tmp/tnfr_cache"))
|
|
3994
|
+
>>> # Cache is automatically persisted for expensive operations
|
|
3995
|
+
>>> cache.set_persistent(
|
|
3996
|
+
... "coherence_large_graph",
|
|
3997
|
+
... 0.95,
|
|
3998
|
+
... CacheLevel.DERIVED_METRICS,
|
|
3999
|
+
... dependencies={'graph_topology'},
|
|
4000
|
+
... computation_cost=1000.0,
|
|
4001
|
+
... persist_to_disk=True
|
|
4002
|
+
... )
|
|
4003
|
+
>>> # Later, in a new session
|
|
4004
|
+
>>> result = cache.get_persistent("coherence_large_graph", CacheLevel.DERIVED_METRICS)
|
|
4005
|
+
"""
|
|
4006
|
+
|
|
4007
|
+
def __init__(
|
|
4008
|
+
self,
|
|
4009
|
+
cache_dir: Any = ".tnfr_cache", # Path | str
|
|
4010
|
+
max_memory_mb: int = 512,
|
|
4011
|
+
persist_levels: Optional[set[CacheLevel]] = None,
|
|
4012
|
+
):
|
|
4013
|
+
from pathlib import Path
|
|
4014
|
+
|
|
4015
|
+
self.cache_dir = Path(cache_dir)
|
|
4016
|
+
self.cache_dir.mkdir(exist_ok=True, parents=True)
|
|
4017
|
+
self._memory_cache = TNFRHierarchicalCache(max_memory_mb=max_memory_mb)
|
|
4018
|
+
|
|
4019
|
+
if persist_levels is None:
|
|
4020
|
+
persist_levels = {
|
|
4021
|
+
CacheLevel.GRAPH_STRUCTURE,
|
|
4022
|
+
CacheLevel.DERIVED_METRICS,
|
|
4023
|
+
}
|
|
4024
|
+
self._persist_levels = persist_levels
|
|
4025
|
+
|
|
4026
|
+
def get_persistent(self, key: str, level: CacheLevel) -> Optional[Any]:
|
|
4027
|
+
"""Retrieve value from memory cache, falling back to disk.
|
|
4028
|
+
|
|
4029
|
+
Parameters
|
|
4030
|
+
----------
|
|
4031
|
+
key : str
|
|
4032
|
+
Cache key.
|
|
4033
|
+
level : CacheLevel
|
|
4034
|
+
Cache level.
|
|
4035
|
+
|
|
4036
|
+
Returns
|
|
4037
|
+
-------
|
|
4038
|
+
Any or None
|
|
4039
|
+
Cached value if found, None otherwise.
|
|
4040
|
+
"""
|
|
4041
|
+
# Try memory first
|
|
4042
|
+
result = self._memory_cache.get(key, level)
|
|
4043
|
+
if result is not None:
|
|
4044
|
+
return result
|
|
4045
|
+
|
|
4046
|
+
# Try disk if level is persisted
|
|
4047
|
+
if level in self._persist_levels:
|
|
4048
|
+
file_path = self._get_cache_file_path(key, level)
|
|
4049
|
+
if file_path.exists():
|
|
4050
|
+
try:
|
|
4051
|
+
with open(file_path, "rb") as f:
|
|
4052
|
+
cached_data = pickle.load(f)
|
|
4053
|
+
|
|
4054
|
+
# Validate structure
|
|
4055
|
+
if not isinstance(cached_data, dict):
|
|
4056
|
+
file_path.unlink(missing_ok=True)
|
|
4057
|
+
return None
|
|
4058
|
+
|
|
4059
|
+
value = cached_data.get("value")
|
|
4060
|
+
dependencies = cached_data.get("dependencies", set())
|
|
4061
|
+
computation_cost = cached_data.get("computation_cost", 1.0)
|
|
4062
|
+
|
|
4063
|
+
# Load back into memory cache
|
|
4064
|
+
self._memory_cache.set(
|
|
4065
|
+
key, value, level, dependencies, computation_cost
|
|
4066
|
+
)
|
|
4067
|
+
|
|
4068
|
+
return value
|
|
4069
|
+
|
|
4070
|
+
except (pickle.PickleError, EOFError, OSError):
|
|
4071
|
+
# Corrupt cache file, remove it
|
|
4072
|
+
file_path.unlink(missing_ok=True)
|
|
4073
|
+
|
|
4074
|
+
return None
|
|
4075
|
+
|
|
4076
|
+
def set_persistent(
|
|
4077
|
+
self,
|
|
4078
|
+
key: str,
|
|
4079
|
+
value: Any,
|
|
4080
|
+
level: CacheLevel,
|
|
4081
|
+
dependencies: set[str],
|
|
4082
|
+
computation_cost: float = 1.0,
|
|
4083
|
+
persist_to_disk: bool = True,
|
|
4084
|
+
) -> None:
|
|
4085
|
+
"""Store value in memory and optionally persist to disk.
|
|
4086
|
+
|
|
4087
|
+
Parameters
|
|
4088
|
+
----------
|
|
4089
|
+
key : str
|
|
4090
|
+
Cache key.
|
|
4091
|
+
value : Any
|
|
4092
|
+
Value to cache.
|
|
4093
|
+
level : CacheLevel
|
|
4094
|
+
Cache level.
|
|
4095
|
+
dependencies : set[str]
|
|
4096
|
+
Structural dependencies.
|
|
4097
|
+
computation_cost : float, default: 1.0
|
|
4098
|
+
Computation cost estimate.
|
|
4099
|
+
persist_to_disk : bool, default: True
|
|
4100
|
+
Whether to persist this entry to disk.
|
|
4101
|
+
"""
|
|
4102
|
+
# Always store in memory
|
|
4103
|
+
self._memory_cache.set(key, value, level, dependencies, computation_cost)
|
|
4104
|
+
|
|
4105
|
+
# Persist to disk if requested and level supports it
|
|
4106
|
+
if persist_to_disk and level in self._persist_levels:
|
|
4107
|
+
file_path = self._get_cache_file_path(key, level)
|
|
4108
|
+
cache_data = {
|
|
4109
|
+
"value": value,
|
|
4110
|
+
"dependencies": dependencies,
|
|
4111
|
+
"computation_cost": computation_cost,
|
|
4112
|
+
"timestamp": time.time(),
|
|
4113
|
+
}
|
|
4114
|
+
|
|
4115
|
+
try:
|
|
4116
|
+
with open(file_path, "wb") as f:
|
|
4117
|
+
pickle.dump(cache_data, f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
4118
|
+
except (pickle.PickleError, OSError) as e:
|
|
4119
|
+
# Log error but don't fail
|
|
4120
|
+
# In production, this should use proper logging
|
|
4121
|
+
pass
|
|
4122
|
+
|
|
4123
|
+
def invalidate_by_dependency(self, dependency: str) -> int:
|
|
4124
|
+
"""Invalidate memory and disk cache entries for a dependency.
|
|
4125
|
+
|
|
4126
|
+
Parameters
|
|
4127
|
+
----------
|
|
4128
|
+
dependency : str
|
|
4129
|
+
The structural property that changed.
|
|
4130
|
+
|
|
4131
|
+
Returns
|
|
4132
|
+
-------
|
|
4133
|
+
int
|
|
4134
|
+
Number of entries invalidated from memory.
|
|
4135
|
+
"""
|
|
4136
|
+
# Invalidate memory cache
|
|
4137
|
+
count = self._memory_cache.invalidate_by_dependency(dependency)
|
|
4138
|
+
|
|
4139
|
+
# Note: Disk cache is lazily invalidated on load
|
|
4140
|
+
# Entries with stale dependencies will be detected when loaded
|
|
4141
|
+
|
|
4142
|
+
return count
|
|
4143
|
+
|
|
4144
|
+
def clear_persistent_cache(self, level: Optional[CacheLevel] = None) -> None:
|
|
4145
|
+
"""Clear persistent cache files.
|
|
4146
|
+
|
|
4147
|
+
Parameters
|
|
4148
|
+
----------
|
|
4149
|
+
level : CacheLevel, optional
|
|
4150
|
+
Specific level to clear. If None, clears all levels.
|
|
4151
|
+
"""
|
|
4152
|
+
if level is not None:
|
|
4153
|
+
level_dir = self.cache_dir / level.value
|
|
4154
|
+
if level_dir.exists():
|
|
4155
|
+
for file_path in level_dir.glob("*.pkl"):
|
|
4156
|
+
file_path.unlink(missing_ok=True)
|
|
4157
|
+
else:
|
|
4158
|
+
# Clear all levels
|
|
4159
|
+
for file_path in self.cache_dir.rglob("*.pkl"):
|
|
4160
|
+
file_path.unlink(missing_ok=True)
|
|
4161
|
+
|
|
4162
|
+
def cleanup_old_entries(self, max_age_days: int = 30) -> int:
|
|
4163
|
+
"""Remove old cache files from disk.
|
|
4164
|
+
|
|
4165
|
+
Parameters
|
|
4166
|
+
----------
|
|
4167
|
+
max_age_days : int, default: 30
|
|
4168
|
+
Maximum age in days before removal.
|
|
4169
|
+
|
|
4170
|
+
Returns
|
|
4171
|
+
-------
|
|
4172
|
+
int
|
|
4173
|
+
Number of files removed.
|
|
4174
|
+
"""
|
|
4175
|
+
count = 0
|
|
4176
|
+
max_age_seconds = max_age_days * 24 * 3600
|
|
4177
|
+
current_time = time.time()
|
|
4178
|
+
|
|
4179
|
+
for file_path in self.cache_dir.rglob("*.pkl"):
|
|
4180
|
+
try:
|
|
4181
|
+
mtime = file_path.stat().st_mtime
|
|
4182
|
+
if current_time - mtime > max_age_seconds:
|
|
4183
|
+
file_path.unlink()
|
|
4184
|
+
count += 1
|
|
4185
|
+
except OSError:
|
|
4186
|
+
continue
|
|
4187
|
+
|
|
4188
|
+
return count
|
|
4189
|
+
|
|
4190
|
+
def get_stats(self) -> dict[str, Any]:
|
|
4191
|
+
"""Get combined statistics from memory and disk cache.
|
|
4192
|
+
|
|
4193
|
+
Returns
|
|
4194
|
+
-------
|
|
4195
|
+
dict[str, Any]
|
|
4196
|
+
Statistics including memory stats and disk usage.
|
|
4197
|
+
"""
|
|
4198
|
+
stats = self._memory_cache.get_stats()
|
|
4199
|
+
|
|
4200
|
+
# Add disk stats
|
|
4201
|
+
disk_files = 0
|
|
4202
|
+
disk_size_bytes = 0
|
|
4203
|
+
for file_path in self.cache_dir.rglob("*.pkl"):
|
|
4204
|
+
disk_files += 1
|
|
4205
|
+
try:
|
|
4206
|
+
disk_size_bytes += file_path.stat().st_size
|
|
4207
|
+
except OSError:
|
|
4208
|
+
continue
|
|
4209
|
+
|
|
4210
|
+
stats["disk_files"] = disk_files
|
|
4211
|
+
stats["disk_size_mb"] = disk_size_bytes / (1024 * 1024)
|
|
4212
|
+
|
|
4213
|
+
return stats
|
|
4214
|
+
|
|
4215
|
+
def _get_cache_file_path(self, key: str, level: CacheLevel) -> Any: # -> Path
|
|
4216
|
+
"""Get file path for a cache entry.
|
|
4217
|
+
|
|
4218
|
+
Organizes cache files by level in subdirectories.
|
|
4219
|
+
"""
|
|
4220
|
+
level_dir = self.cache_dir / level.value
|
|
4221
|
+
level_dir.mkdir(exist_ok=True, parents=True)
|
|
4222
|
+
# Use key as filename (already hashed in decorator)
|
|
4223
|
+
return level_dir / f"{key}.pkl"
|