vitalroute 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 VitalRoute Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,321 @@
1
+ Metadata-Version: 2.4
2
+ Name: vitalroute
3
+ Version: 0.1.0
4
+ Summary: Task-aware training controller via layer vitality monitoring
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://github.com/vitalroute/vitalroute
7
+ Project-URL: Repository, https://github.com/vitalroute/vitalroute
8
+ Project-URL: Bug Tracker, https://github.com/vitalroute/vitalroute/issues
9
+ Project-URL: Changelog, https://github.com/vitalroute/vitalroute/blob/main/CHANGELOG.md
10
+ Keywords: imbalanced learning,neural network,dead neurons,adaptive training,class sampling,transfer learning,pytorch,machine learning
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Operating System :: OS Independent
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: numpy>=1.24
25
+ Provides-Extra: demo
26
+ Requires-Dist: scikit-learn>=1.3; extra == "demo"
27
+ Provides-Extra: torch
28
+ Requires-Dist: torch>=2.0; extra == "torch"
29
+ Requires-Dist: torchvision>=0.15; extra == "torch"
30
+ Provides-Extra: dev
31
+ Requires-Dist: scikit-learn>=1.3; extra == "dev"
32
+ Requires-Dist: torch>=2.0; extra == "dev"
33
+ Requires-Dist: torchvision>=0.15; extra == "dev"
34
+ Requires-Dist: pytest>=7.0; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ # VitalRoute
38
+
39
+ [![PyPI version](https://img.shields.io/pypi/v/vitalroute.svg)](https://pypi.org/project/vitalroute/)
40
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
41
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
42
+ [![Tests](https://img.shields.io/badge/tests-pytest-brightgreen.svg)](tests/)
43
+
44
+ **Task-aware training controller** for feed-forward classifiers. It sits on top of
45
+ your normal optimizer (Adam, SGD, etc.) and decides *when* to apply training
46
+ tactics based only on **how your training set is shaped** — not by
47
+ hand-tuning flags for every dataset.
48
+
49
+ ## Background
50
+
51
+ VitalRoute grew out of a research line that treated neural networks like
52
+ organisms: hidden units can show **stasis** (non-responding), **weak
53
+ coupling**, or **saturation**, and pretrained models can be **inherited**
54
+ into a child task the way biological structure carries over. The original
55
+ work framed those ideas as pathology and inheritance on a cell hierarchy;
56
+ here they are distilled into a small, practical library — vitality probes,
57
+ label-free parent choice, and class-aware sampling — without tying you to
58
+ any particular legacy codebase or naming scheme.
59
+
60
+ ## Idea (plain language)
61
+
62
+ A classic biological metaphor inspired this work: treat the network like a
63
+ body you can **examine** while it learns.
64
+
65
+ | Signal | Meaning |
66
+ |---|---|
67
+ | **Stasis** | Hidden unit barely responds (dead ReLU, etc.) |
68
+ | **Weak weights** | Weight column has collapsed |
69
+ | **Weak input** | Incoming activations are tiny vs weights |
70
+ | **Saturation** | Unit stuck near a constant output |
71
+
72
+ From those readings, VitalRoute can:
73
+
74
+ 1. **Vitality sampler** — For **imbalanced** data, oversample classes with high
75
+ **composite stress** (all four signals, not only stasis).
76
+ 2. **Transfer pick** — For **scarce** data, choose the best pretrained parent by
77
+ lowest stasis on the new inputs (no labels needed), then warm-start weights.
78
+ 3. **Hard-sample sampler** — When class rebalancing is off, oversample individual
79
+ examples with high per-sample stress (stasis + weak coupling + low confidence).
80
+ 4. **LR scale** — Slow learning on layers with high stasis:
81
+ `lr_l = base_lr / (1 + α · stasis_l)` (helps on hard tasks at hot LR).
82
+ 5. **Monitor** — Watch layer health; **reset** stuck units only when stasis is
83
+ high (skipped for CNN-style models with a separate head).
84
+
85
+ An **adaptive router** turns (1)–(4) on or off from class counts and dataset size.
86
+
87
+ ## Install
88
+
89
+ ```powershell
90
+ cd vitalroute
91
+ pip install -r requirements.txt
92
+ pip install -e .
93
+ ```
94
+
95
+ Requires Python 3.10+ and NumPy.
96
+
97
+ ## Quick use
98
+
99
+ ```python
100
+ import numpy as np
101
+ from vitalroute import adaptive_controller, profile_task, route_plan
102
+ from vitalroute.backbone import MLP, LayerSpec, Adam
103
+
104
+ # Your training arrays
105
+ X_train, y_train = ...
106
+ num_classes = 10
107
+
108
+ # Preview routing (no training)
109
+ prof = profile_task(y_train, num_classes)
110
+ plan = route_plan(prof, parent_pool_available=False)
111
+ print(plan.label) # e.g. "imbalance", "transfer", "transfer+imbalance", "monitor"
112
+
113
+ # Attach to your training loop
114
+ ctrl = adaptive_controller(y_train, num_classes, parent_pool=None, verbose=True)
115
+ opt = ctrl.make_optimizer("adam", lr=1e-3) # vitality-scaled when route includes lr_scale
116
+
117
+ sampler, _ = ctrl.bootstrap(model, X_train, y_train, num_classes=num_classes)
118
+
119
+ for epoch in range(epochs):
120
+ ctrl.on_epoch_start(model, X_train, opt, epoch) # refresh LR scales if enabled
121
+ if sampler is not None:
122
+ idx = sampler.sample_indices(epoch, model, X_train, y_train, len(y_train))
123
+ X_ep, y_ep = X_train[idx], y_train[idx]
124
+ else:
125
+ X_ep, y_ep = X_train, y_train
126
+ # ... your batches, loss, optimizer step ...
127
+ ctrl.after_epoch(model, X_train, rng=np.random.default_rng(epoch))
128
+ ```
129
+
130
+ See `examples/digits_imbalanced_demo.py` for a runnable sketch.
131
+
132
+ ## Router rules (defaults)
133
+
134
+ | Condition | Enabled |
135
+ |---|---|
136
+ | `min_class / max_class < 0.25` and minority ≥ 15 samples | Vitality sampler (composite stress) |
137
+ | `n ≤ 200` or `min_class ≤ 12`, and parent pool provided | Transfer pick |
138
+ | Scarce **balanced** data | Transfer + hard-sample sampler (no class sampler) |
139
+ | `n ≥ 80`, sampler off | LR scale |
140
+ | `n ≥ 40`, sampler off | Hard-sample sampler |
141
+ | Always (when training) | Monitor (+ conditional reset) |
142
+
143
+ ## What this project is / is not
144
+
145
+ **Is:**
146
+
147
+ - A small library (NumPy + optional PyTorch) extracted from a larger neural-network research codebase
148
+ - Evidence-backed on imbalanced digits, Fashion-MNIST long-tail, and scarce transfer tasks
149
+ - Compatible with any PyTorch `nn.Module` via `VitalityProbe` forward hooks
150
+ - Compatible with the custom NumPy backbone via `adaptive_controller`
151
+
152
+ **Is not:**
153
+
154
+ - A replacement for backprop or PyTorch
155
+ - A guarantee of SOTA accuracy on vision (use a real CNN framework for that)
156
+ - A claim of novelty vs all of ML — curriculum and transfer learning exist; the hook is **vitality-driven routing**
157
+
158
+ ## How it compares to inverse-frequency weighting
159
+
160
+ On a clean long-tail benchmark, VitalRoute ≈ inverse-frequency (inv_freq). They converge to the same answer because rare classes and broken-neuron classes heavily overlap — the network sees minority classes less, so their neurons die more.
161
+
162
+ **Where VitalRoute has a real edge over inv_freq:**
163
+
164
+ | Scenario | Why VitalRoute helps |
165
+ |---|---|
166
+ | Imbalanced but not uniformly scarce | A class with enough samples but high confusability (broken neurons) gets oversampled; inv_freq ignores it |
167
+ | Difficulty shifts mid-training | VitalRoute refreshes stress every N epochs; inv_freq is static |
168
+ | Label-free transfer selection | Picks the best pretrained parent by stasis on new inputs — no labels needed. inv_freq has no equivalent |
169
+ | Hard-sample curriculum | Per-sample stress (stasis + low confidence) for scarce balanced data; inv_freq only works at class level |
170
+
171
+ If your problem is purely long-tail with clean class boundaries, inv_freq is simpler and nearly as good. If classes overlap, difficulty shifts, or you need transfer selection without labels, VitalRoute adds real value.
172
+
173
+ ## Package layout
174
+
175
+ ```text
176
+ vitalroute/
177
+ README.md
178
+ PAPER.md # research paper style writeup
179
+ INTEGRATION.md
180
+ pyproject.toml
181
+ vitalroute/
182
+ vitality.py # layer stress probes + per-class/per-sample stress
183
+ imbalance.py # composite vitality class sampler (NumPy)
184
+ hard_samples.py # per-sample stress sampler (NumPy)
185
+ lr_scale.py # vitality-scaled Adam / SGD (NumPy)
186
+ transfer.py # label-free parent pick
187
+ router.py # task profile + adaptive controller (NumPy)
188
+ torch_probe.py # VitalityProbe — forward hooks for any nn.Module
189
+ torch_samplers.py # TorchVitalitySampler + TorchHardSampleSampler
190
+ torch_controller.py # TorchTrainingController + torch_adaptive_controller
191
+ backbone/ # optional reference MLP for demos
192
+ examples/
193
+ digits_imbalanced_demo.py # NumPy backbone quick demo
194
+ benchmark_baselines.py # NumPy baseline comparison (digits)
195
+ torch_probe_demo.py # VitalityProbe on a PyTorch MLP
196
+ torch_benchmark_fmnist.py # PyTorch baseline comparison (Fashion-MNIST)
197
+ cifar10_resnet_benchmark.py # ResNet18 / CIFAR-10 (GPU recommended)
198
+ tests/
199
+ ```
200
+
201
+ ## Evidence summary
202
+
203
+ Measured on public-style benchmarks during development:
204
+
205
+ | Setting | Typical gain |
206
+ |---|---|
207
+ | Imbalanced digits / Fashion minority classes | +2–4% minority accuracy vs uniform |
208
+ | vs inverse-frequency baseline (same imbalanced digits) | +0.7% minority, lower variance |
209
+ | Scarce digit subset with parent pool | up to +10% vs cold start |
210
+ | Scarce cat/dog (MLP / small CNN) | +2–3% with transfer pick |
211
+
212
+ **NumPy backbone benchmark** (`examples/benchmark_baselines.py`), 3 seeds, 30 epochs, 5:1 imbalance on digits:
213
+
214
+ ```
215
+ Method Overall Minority
216
+ uniform 93.7%±1.1% 87.9%±2.3%
217
+ inv_freq 94.4%±0.8% 90.1%±1.0%
218
+ vitalroute 95.1%±0.3% 90.8%±1.0% ← best overall + lowest variance
219
+ stasis_only 95.0%±0.7% 90.7%±1.5%
220
+ ```
221
+
222
+ **PyTorch benchmark** (`examples/torch_benchmark_fmnist.py`), 3 seeds, 20 epochs, 10:1 imbalance on Fashion-MNIST MLP:
223
+
224
+ ```
225
+ Method Overall Minority
226
+ uniform 80.1%±0.4% 72.8%±1.2%
227
+ inv_freq 81.7%±0.5% 77.6%±0.9%
228
+ focal 80.0%±0.3% 72.6%±0.2%
229
+ vitalroute 81.7%±0.2% 76.5%±0.6% ← matches inv_freq, beats focal/uniform
230
+ ```
231
+
232
+ VitalRoute matches inverse-frequency on overall accuracy and minority accuracy, while showing notably lower variance than competing methods. On the digits backbone it gains an additional +0.7% minority over inv_freq at significantly lower variance.
233
+
234
+ ## PyTorch integration
235
+
236
+ ### Probe only (read vitality signals)
237
+
238
+ `VitalityProbe` attaches to any `torch.nn.Module` via forward hooks — no
239
+ changes to your model or optimizer required:
240
+
241
+ ```python
242
+ from vitalroute.torch_probe import VitalityProbe
243
+
244
+ probe = VitalityProbe(model) # attach once; pairs Linear→ReLU automatically
245
+
246
+ for epoch in range(epochs):
247
+ train_one_epoch(model, ...)
248
+ probe.observe(X_train) # one forward pass, no gradients
249
+ print(probe.summary()) # per-layer stasis + composite stress
250
+ print(f"mean stasis: {probe.mean_stasis():.3f}")
251
+
252
+ # Per-class and per-sample stress for custom sampling
253
+ class_scores = probe.per_class_stress(X_train, y_train, num_classes=10)
254
+ sample_scores = probe.per_sample_stress(X_train, y_train)
255
+
256
+ probe.detach() # clean up hooks
257
+ ```
258
+
259
+ ### Full adaptive controller
260
+
261
+ `torch_adaptive_controller` reads your class distribution and picks tactics automatically:
262
+
263
+ ```python
264
+ from vitalroute.torch_controller import torch_adaptive_controller
265
+ from torch.utils.data import DataLoader
266
+
267
+ ctrl = torch_adaptive_controller(y_train, num_classes=10, verbose=True)
268
+
269
+ # X_probe/y_probe: small stratified batch (~50/class) for the probe
270
+ # y_full: full training labels for the sampler's class pools
271
+ sampler = ctrl.setup(model, X_probe, y_probe, y_full=y_train_full,
272
+ num_classes=10)
273
+
274
+ loader = DataLoader(dataset, sampler=sampler, batch_size=64)
275
+
276
+ for epoch in range(epochs):
277
+ ctrl.on_epoch_start(model, X_probe, optimizer, epoch)
278
+ for X_batch, y_batch in loader:
279
+ ... # your normal loss + backward + step
280
+ ctrl.after_epoch(model, X_probe, y_probe)
281
+
282
+ ctrl.detach()
283
+ ```
284
+
285
+ See `examples/torch_probe_demo.py` and `examples/torch_benchmark_fmnist.py` for runnable examples.
286
+
287
+ ## License
288
+
289
+ MIT
290
+
291
+ ## Related Work
292
+
293
+ VitalRoute draws on or is informed by the following lines of research. Where VitalRoute differs is noted.
294
+
295
+ **Adaptive class resampling**
296
+ - [ART: Adaptive Resampling-based Training for Imbalanced Classification](https://arxiv.org/abs/2509.00955) (2025) — periodically refreshes class sampling weights using class-wise F1 scores. VitalRoute uses internal neuron health signals instead of output metrics.
297
+
298
+ **Dead neuron analysis and pruning**
299
+ - [When to Prune? A Policy towards Early Structural Pruning](https://openreview.net/pdf?id=2wFXD2upSQ) — uses dead-neuron rates to guide structured pruning during training. VitalRoute uses the same signal to drive *sampling*, not pruning.
300
+ - [Dead neurons in Deep Learning (overview)](https://medium.com/@abhishekjainindore24/dead-neurons-in-deep-learning-their-effects-and-remedies-to-solve-it-e63da4dd9212)
301
+
302
+ **Dynamic network structure for imbalanced learning**
303
+ - [Adaptive Neuron Growth/Pruning for Imbalanced Classification](https://arxiv.org/abs/2507.09940) (2025) — adds/removes neurons per class using gradient magnitude. Orthogonal to VitalRoute: modifies architecture rather than sampling.
304
+
305
+ **Per-layer learning rate scaling**
306
+ - [LENA: Layer-wise Adaptive LR Scaling](https://dl.acm.org/doi/fullHtml/10.1145/3485447.3511989) — scales per-layer LR by gradient variance. VitalRoute scales by stasis (dead unit fraction), a complementary signal.
307
+ - [LLR: Heavy-Tail Guided Layerwise LR for LLMs](https://arxiv.org/html/2605.22297v1) (2025) — uses weight spectrum heavy-tailedness. Same goal, different diagnostic.
308
+ - [AdaLip: Adaptive LR per Layer via Lipschitz Estimation](https://d-nb.info/1283272997/34) — Lipschitz-constant-based per-layer LR.
309
+ - [LARS](https://arxiv.org/abs/1708.03888) / [LAMB](https://arxiv.org/abs/1904.00962) — weight/gradient ratio scaling; used in large-batch distributed training.
310
+
311
+ **Label-free transfer model selection**
312
+ - [TURTLE: Unsupervised Transfer Learning](https://arxiv.org/html/2406.07236v1) (2024) — selects pretrained models without labels via representation-level generalization objectives. VitalRoute uses stasis rate on new data — simpler, different rationale.
313
+ - [DISCO: Spectral Component Distribution for Transfer Assessment](https://arxiv.org/html/2412.19085v2) (2024) — SVD of feature distributions for transferability scoring.
314
+
315
+ **Focal Loss (baseline used in benchmarks)**
316
+ - [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) — Lin et al., 2017. Standard hard-example weighting via loss modulation.
317
+
318
+ **Curriculum / hard-sample learning**
319
+ - [Self-Paced Learning](https://papers.nips.cc/paper_files/paper/2010/hash/e57c6b956a6521b28495f2886ca0977a-Abstract.html) — Bengio et al., 2009. Foundation for curriculum-style training.
320
+ - [Online Hard Example Mining](https://arxiv.org/abs/1604.03540) — Shrivastava et al., 2016. Per-sample difficulty weighting from loss values.
321
+
@@ -0,0 +1,285 @@
1
+ # VitalRoute
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/vitalroute.svg)](https://pypi.org/project/vitalroute/)
4
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
5
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
6
+ [![Tests](https://img.shields.io/badge/tests-pytest-brightgreen.svg)](tests/)
7
+
8
+ **Task-aware training controller** for feed-forward classifiers. It sits on top of
9
+ your normal optimizer (Adam, SGD, etc.) and decides *when* to apply training
10
+ tactics based only on **how your training set is shaped** — not by
11
+ hand-tuning flags for every dataset.
12
+
13
+ ## Background
14
+
15
+ VitalRoute grew out of a research line that treated neural networks like
16
+ organisms: hidden units can show **stasis** (non-responding), **weak
17
+ coupling**, or **saturation**, and pretrained models can be **inherited**
18
+ into a child task the way biological structure carries over. The original
19
+ work framed those ideas as pathology and inheritance on a cell hierarchy;
20
+ here they are distilled into a small, practical library — vitality probes,
21
+ label-free parent choice, and class-aware sampling — without tying you to
22
+ any particular legacy codebase or naming scheme.
23
+
24
+ ## Idea (plain language)
25
+
26
+ A classic biological metaphor inspired this work: treat the network like a
27
+ body you can **examine** while it learns.
28
+
29
+ | Signal | Meaning |
30
+ |---|---|
31
+ | **Stasis** | Hidden unit barely responds (dead ReLU, etc.) |
32
+ | **Weak weights** | Weight column has collapsed |
33
+ | **Weak input** | Incoming activations are tiny vs weights |
34
+ | **Saturation** | Unit stuck near a constant output |
35
+
36
+ From those readings, VitalRoute can:
37
+
38
+ 1. **Vitality sampler** — For **imbalanced** data, oversample classes with high
39
+ **composite stress** (all four signals, not only stasis).
40
+ 2. **Transfer pick** — For **scarce** data, choose the best pretrained parent by
41
+ lowest stasis on the new inputs (no labels needed), then warm-start weights.
42
+ 3. **Hard-sample sampler** — When class rebalancing is off, oversample individual
43
+ examples with high per-sample stress (stasis + weak coupling + low confidence).
44
+ 4. **LR scale** — Slow learning on layers with high stasis:
45
+ `lr_l = base_lr / (1 + α · stasis_l)` (helps on hard tasks at hot LR).
46
+ 5. **Monitor** — Watch layer health; **reset** stuck units only when stasis is
47
+ high (skipped for CNN-style models with a separate head).
48
+
49
+ An **adaptive router** turns (1)–(4) on or off from class counts and dataset size.
50
+
51
+ ## Install
52
+
53
+ ```powershell
54
+ cd vitalroute
55
+ pip install -r requirements.txt
56
+ pip install -e .
57
+ ```
58
+
59
+ Requires Python 3.10+ and NumPy.
60
+
61
+ ## Quick use
62
+
63
+ ```python
64
+ import numpy as np
65
+ from vitalroute import adaptive_controller, profile_task, route_plan
66
+ from vitalroute.backbone import MLP, LayerSpec, Adam
67
+
68
+ # Your training arrays
69
+ X_train, y_train = ...
70
+ num_classes = 10
71
+
72
+ # Preview routing (no training)
73
+ prof = profile_task(y_train, num_classes)
74
+ plan = route_plan(prof, parent_pool_available=False)
75
+ print(plan.label) # e.g. "imbalance", "transfer", "transfer+imbalance", "monitor"
76
+
77
+ # Attach to your training loop
78
+ ctrl = adaptive_controller(y_train, num_classes, parent_pool=None, verbose=True)
79
+ opt = ctrl.make_optimizer("adam", lr=1e-3) # vitality-scaled when route includes lr_scale
80
+
81
+ sampler, _ = ctrl.bootstrap(model, X_train, y_train, num_classes=num_classes)
82
+
83
+ for epoch in range(epochs):
84
+ ctrl.on_epoch_start(model, X_train, opt, epoch) # refresh LR scales if enabled
85
+ if sampler is not None:
86
+ idx = sampler.sample_indices(epoch, model, X_train, y_train, len(y_train))
87
+ X_ep, y_ep = X_train[idx], y_train[idx]
88
+ else:
89
+ X_ep, y_ep = X_train, y_train
90
+ # ... your batches, loss, optimizer step ...
91
+ ctrl.after_epoch(model, X_train, rng=np.random.default_rng(epoch))
92
+ ```
93
+
94
+ See `examples/digits_imbalanced_demo.py` for a runnable sketch.
95
+
96
+ ## Router rules (defaults)
97
+
98
+ | Condition | Enabled |
99
+ |---|---|
100
+ | `min_class / max_class < 0.25` and minority ≥ 15 samples | Vitality sampler (composite stress) |
101
+ | `n ≤ 200` or `min_class ≤ 12`, and parent pool provided | Transfer pick |
102
+ | Scarce **balanced** data | Transfer + hard-sample sampler (no class sampler) |
103
+ | `n ≥ 80`, sampler off | LR scale |
104
+ | `n ≥ 40`, sampler off | Hard-sample sampler |
105
+ | Always (when training) | Monitor (+ conditional reset) |
106
+
107
+ ## What this project is / is not
108
+
109
+ **Is:**
110
+
111
+ - A small library (NumPy + optional PyTorch) extracted from a larger neural-network research codebase
112
+ - Evidence-backed on imbalanced digits, Fashion-MNIST long-tail, and scarce transfer tasks
113
+ - Compatible with any PyTorch `nn.Module` via `VitalityProbe` forward hooks
114
+ - Compatible with the custom NumPy backbone via `adaptive_controller`
115
+
116
+ **Is not:**
117
+
118
+ - A replacement for backprop or PyTorch
119
+ - A guarantee of SOTA accuracy on vision (use a real CNN framework for that)
120
+ - A claim of novelty vs all of ML — curriculum and transfer learning exist; the hook is **vitality-driven routing**
121
+
122
+ ## How it compares to inverse-frequency weighting
123
+
124
+ On a clean long-tail benchmark, VitalRoute ≈ inverse-frequency (inv_freq). They converge to the same answer because rare classes and broken-neuron classes heavily overlap — the network sees minority classes less, so their neurons die more.
125
+
126
+ **Where VitalRoute has a real edge over inv_freq:**
127
+
128
+ | Scenario | Why VitalRoute helps |
129
+ |---|---|
130
+ | Imbalanced but not uniformly scarce | A class with enough samples but high confusability (broken neurons) gets oversampled; inv_freq ignores it |
131
+ | Difficulty shifts mid-training | VitalRoute refreshes stress every N epochs; inv_freq is static |
132
+ | Label-free transfer selection | Picks the best pretrained parent by stasis on new inputs — no labels needed. inv_freq has no equivalent |
133
+ | Hard-sample curriculum | Per-sample stress (stasis + low confidence) for scarce balanced data; inv_freq only works at class level |
134
+
135
+ If your problem is purely long-tail with clean class boundaries, inv_freq is simpler and nearly as good. If classes overlap, difficulty shifts, or you need transfer selection without labels, VitalRoute adds real value.
136
+
137
+ ## Package layout
138
+
139
+ ```text
140
+ vitalroute/
141
+ README.md
142
+ PAPER.md # research paper style writeup
143
+ INTEGRATION.md
144
+ pyproject.toml
145
+ vitalroute/
146
+ vitality.py # layer stress probes + per-class/per-sample stress
147
+ imbalance.py # composite vitality class sampler (NumPy)
148
+ hard_samples.py # per-sample stress sampler (NumPy)
149
+ lr_scale.py # vitality-scaled Adam / SGD (NumPy)
150
+ transfer.py # label-free parent pick
151
+ router.py # task profile + adaptive controller (NumPy)
152
+ torch_probe.py # VitalityProbe — forward hooks for any nn.Module
153
+ torch_samplers.py # TorchVitalitySampler + TorchHardSampleSampler
154
+ torch_controller.py # TorchTrainingController + torch_adaptive_controller
155
+ backbone/ # optional reference MLP for demos
156
+ examples/
157
+ digits_imbalanced_demo.py # NumPy backbone quick demo
158
+ benchmark_baselines.py # NumPy baseline comparison (digits)
159
+ torch_probe_demo.py # VitalityProbe on a PyTorch MLP
160
+ torch_benchmark_fmnist.py # PyTorch baseline comparison (Fashion-MNIST)
161
+ cifar10_resnet_benchmark.py # ResNet18 / CIFAR-10 (GPU recommended)
162
+ tests/
163
+ ```
164
+
165
+ ## Evidence summary
166
+
167
+ Measured on public-style benchmarks during development:
168
+
169
+ | Setting | Typical gain |
170
+ |---|---|
171
+ | Imbalanced digits / Fashion minority classes | +2–4% minority accuracy vs uniform |
172
+ | vs inverse-frequency baseline (same imbalanced digits) | +0.7% minority, lower variance |
173
+ | Scarce digit subset with parent pool | up to +10% vs cold start |
174
+ | Scarce cat/dog (MLP / small CNN) | +2–3% with transfer pick |
175
+
176
+ **NumPy backbone benchmark** (`examples/benchmark_baselines.py`), 3 seeds, 30 epochs, 5:1 imbalance on digits:
177
+
178
+ ```
179
+ Method Overall Minority
180
+ uniform 93.7%±1.1% 87.9%±2.3%
181
+ inv_freq 94.4%±0.8% 90.1%±1.0%
182
+ vitalroute 95.1%±0.3% 90.8%±1.0% ← best overall + lowest variance
183
+ stasis_only 95.0%±0.7% 90.7%±1.5%
184
+ ```
185
+
186
+ **PyTorch benchmark** (`examples/torch_benchmark_fmnist.py`), 3 seeds, 20 epochs, 10:1 imbalance on Fashion-MNIST MLP:
187
+
188
+ ```
189
+ Method Overall Minority
190
+ uniform 80.1%±0.4% 72.8%±1.2%
191
+ inv_freq 81.7%±0.5% 77.6%±0.9%
192
+ focal 80.0%±0.3% 72.6%±0.2%
193
+ vitalroute 81.7%±0.2% 76.5%±0.6% ← matches inv_freq, beats focal/uniform
194
+ ```
195
+
196
+ VitalRoute matches inverse-frequency on overall accuracy and minority accuracy, while showing notably lower variance than competing methods. On the digits backbone it gains an additional +0.7% minority over inv_freq at significantly lower variance.
197
+
198
+ ## PyTorch integration
199
+
200
+ ### Probe only (read vitality signals)
201
+
202
+ `VitalityProbe` attaches to any `torch.nn.Module` via forward hooks — no
203
+ changes to your model or optimizer required:
204
+
205
+ ```python
206
+ from vitalroute.torch_probe import VitalityProbe
207
+
208
+ probe = VitalityProbe(model) # attach once; pairs Linear→ReLU automatically
209
+
210
+ for epoch in range(epochs):
211
+ train_one_epoch(model, ...)
212
+ probe.observe(X_train) # one forward pass, no gradients
213
+ print(probe.summary()) # per-layer stasis + composite stress
214
+ print(f"mean stasis: {probe.mean_stasis():.3f}")
215
+
216
+ # Per-class and per-sample stress for custom sampling
217
+ class_scores = probe.per_class_stress(X_train, y_train, num_classes=10)
218
+ sample_scores = probe.per_sample_stress(X_train, y_train)
219
+
220
+ probe.detach() # clean up hooks
221
+ ```
222
+
223
+ ### Full adaptive controller
224
+
225
+ `torch_adaptive_controller` reads your class distribution and picks tactics automatically:
226
+
227
+ ```python
228
+ from vitalroute.torch_controller import torch_adaptive_controller
229
+ from torch.utils.data import DataLoader
230
+
231
+ ctrl = torch_adaptive_controller(y_train, num_classes=10, verbose=True)
232
+
233
+ # X_probe/y_probe: small stratified batch (~50/class) for the probe
234
+ # y_full: full training labels for the sampler's class pools
235
+ sampler = ctrl.setup(model, X_probe, y_probe, y_full=y_train_full,
236
+ num_classes=10)
237
+
238
+ loader = DataLoader(dataset, sampler=sampler, batch_size=64)
239
+
240
+ for epoch in range(epochs):
241
+ ctrl.on_epoch_start(model, X_probe, optimizer, epoch)
242
+ for X_batch, y_batch in loader:
243
+ ... # your normal loss + backward + step
244
+ ctrl.after_epoch(model, X_probe, y_probe)
245
+
246
+ ctrl.detach()
247
+ ```
248
+
249
+ See `examples/torch_probe_demo.py` and `examples/torch_benchmark_fmnist.py` for runnable examples.
250
+
251
+ ## License
252
+
253
+ MIT
254
+
255
+ ## Related Work
256
+
257
+ VitalRoute draws on or is informed by the following lines of research. Where VitalRoute differs is noted.
258
+
259
+ **Adaptive class resampling**
260
+ - [ART: Adaptive Resampling-based Training for Imbalanced Classification](https://arxiv.org/abs/2509.00955) (2025) — periodically refreshes class sampling weights using class-wise F1 scores. VitalRoute uses internal neuron health signals instead of output metrics.
261
+
262
+ **Dead neuron analysis and pruning**
263
+ - [When to Prune? A Policy towards Early Structural Pruning](https://openreview.net/pdf?id=2wFXD2upSQ) — uses dead-neuron rates to guide structured pruning during training. VitalRoute uses the same signal to drive *sampling*, not pruning.
264
+ - [Dead neurons in Deep Learning (overview)](https://medium.com/@abhishekjainindore24/dead-neurons-in-deep-learning-their-effects-and-remedies-to-solve-it-e63da4dd9212)
265
+
266
+ **Dynamic network structure for imbalanced learning**
267
+ - [Adaptive Neuron Growth/Pruning for Imbalanced Classification](https://arxiv.org/abs/2507.09940) (2025) — adds/removes neurons per class using gradient magnitude. Orthogonal to VitalRoute: modifies architecture rather than sampling.
268
+
269
+ **Per-layer learning rate scaling**
270
+ - [LENA: Layer-wise Adaptive LR Scaling](https://dl.acm.org/doi/fullHtml/10.1145/3485447.3511989) — scales per-layer LR by gradient variance. VitalRoute scales by stasis (dead unit fraction), a complementary signal.
271
+ - [LLR: Heavy-Tail Guided Layerwise LR for LLMs](https://arxiv.org/html/2605.22297v1) (2025) — uses weight spectrum heavy-tailedness. Same goal, different diagnostic.
272
+ - [AdaLip: Adaptive LR per Layer via Lipschitz Estimation](https://d-nb.info/1283272997/34) — Lipschitz-constant-based per-layer LR.
273
+ - [LARS](https://arxiv.org/abs/1708.03888) / [LAMB](https://arxiv.org/abs/1904.00962) — weight/gradient ratio scaling; used in large-batch distributed training.
274
+
275
+ **Label-free transfer model selection**
276
+ - [TURTLE: Unsupervised Transfer Learning](https://arxiv.org/html/2406.07236v1) (2024) — selects pretrained models without labels via representation-level generalization objectives. VitalRoute uses stasis rate on new data — simpler, different rationale.
277
+ - [DISCO: Spectral Component Distribution for Transfer Assessment](https://arxiv.org/html/2412.19085v2) (2024) — SVD of feature distributions for transferability scoring.
278
+
279
+ **Focal Loss (baseline used in benchmarks)**
280
+ - [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) — Lin et al., 2017. Standard hard-example weighting via loss modulation.
281
+
282
+ **Curriculum / hard-sample learning**
283
+ - [Self-Paced Learning](https://papers.nips.cc/paper_files/paper/2010/hash/e57c6b956a6521b28495f2886ca0977a-Abstract.html) — Bengio et al., 2009. Foundation for curriculum-style training.
284
+ - [Online Hard Example Mining](https://arxiv.org/abs/1604.03540) — Shrivastava et al., 2016. Per-sample difficulty weighting from loss values.
285
+
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "vitalroute"
7
+ version = "0.1.0"
8
+ description = "Task-aware training controller via layer vitality monitoring"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ dependencies = ["numpy>=1.24"]
14
+ keywords = [
15
+ "imbalanced learning",
16
+ "neural network",
17
+ "dead neurons",
18
+ "adaptive training",
19
+ "class sampling",
20
+ "transfer learning",
21
+ "pytorch",
22
+ "machine learning",
23
+ ]
24
+ classifiers = [
25
+ "Development Status :: 3 - Alpha",
26
+ "Intended Audience :: Science/Research",
27
+ "Intended Audience :: Developers",
28
+ "Programming Language :: Python :: 3",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Programming Language :: Python :: 3.11",
31
+ "Programming Language :: Python :: 3.12",
32
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
33
+ "Topic :: Software Development :: Libraries :: Python Modules",
34
+ "Operating System :: OS Independent",
35
+ ]
36
+
37
+ [project.urls]
38
+ Homepage = "https://github.com/vitalroute/vitalroute"
39
+ Repository = "https://github.com/vitalroute/vitalroute"
40
+ "Bug Tracker" = "https://github.com/vitalroute/vitalroute/issues"
41
+ Changelog = "https://github.com/vitalroute/vitalroute/blob/main/CHANGELOG.md"
42
+
43
+ [project.optional-dependencies]
44
+ demo = ["scikit-learn>=1.3"]
45
+ torch = ["torch>=2.0", "torchvision>=0.15"]
46
+ dev = ["scikit-learn>=1.3", "torch>=2.0", "torchvision>=0.15", "pytest>=7.0"]
47
+
48
+ [tool.setuptools.packages.find]
49
+ where = ["."]
50
+ include = ["vitalroute*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+