wcdfa 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wcdfa-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jimi Sadaki Kogura
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
wcdfa-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,302 @@
1
+ Metadata-Version: 2.4
2
+ Name: wcdfa
3
+ Version: 0.1.0
4
+ Summary: Weight-Change DFA: a real-time diagnostic for self-modifying systems
5
+ Author-email: Jimi Sadaki Kogura <jimi@caring-gap.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://caring-gap.com
8
+ Project-URL: Repository, https://github.com/jimikogura/wcdfa
9
+ Project-URL: Documentation, https://caring-gap.com/wcdfa
10
+ Project-URL: Issues, https://github.com/jimikogura/wcdfa/issues
11
+ Keywords: criticality,detrended-fluctuation-analysis,neural-networks,ai-safety,training-diagnostics,self-modifying-systems,weight-change-dfa
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Scientific/Engineering :: Physics
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: numpy>=1.20
26
+ Provides-Extra: torch
27
+ Requires-Dist: torch>=1.9; extra == "torch"
28
+ Provides-Extra: viz
29
+ Requires-Dist: matplotlib>=3.4; extra == "viz"
30
+ Provides-Extra: all
31
+ Requires-Dist: torch>=1.9; extra == "all"
32
+ Requires-Dist: matplotlib>=3.4; extra == "all"
33
+ Dynamic: license-file
34
+
35
+ # wcdfa
36
+
37
+ [![tests](https://github.com/jimikogura/wcdfa/actions/workflows/tests.yml/badge.svg)](https://github.com/jimikogura/wcdfa/actions/workflows/tests.yml)
38
+
39
+ **Weight-Change DFA** — a real-time diagnostic for self-modifying systems.
40
+
41
+ Monitors the temporal structure of how a neural network modifies its own weights during training. Detects ordered-phase drift (sealing/rigidity), disordered-phase drift (dissolving/instability), and maintained criticality — using the DFA scaling exponent on the weight-update magnitude time series.
42
+
43
+ ```
44
+ α > ~1.2 → Ordered (sealing). System is consolidating rigidly.
45
+ α ≈ 1.0 → Critical. Simultaneously robust and flexible.
46
+ α < ~0.8 → Disordered (dissolving). System cannot consolidate.
47
+ ```
48
+
49
+ Unlike activity-based DFA, weight-change DFA measures self-modification dynamics directly and is not confounded by input statistics.
50
+
51
+ ## Install
52
+
53
+ ```bash
54
+ pip install wcdfa # numpy only
55
+ pip install wcdfa[torch] # + PyTorch integration
56
+ pip install wcdfa[all] # + PyTorch + matplotlib
57
+ ```
58
+
59
+ ## Quick start
60
+
61
+ ```python
62
+ from wcdfa import WeightChangeDFA
63
+
64
+ monitor = WeightChangeDFA(window=500)
65
+
66
+ for epoch in range(num_epochs):
67
+ train_one_epoch(model, optimizer)
68
+ monitor.update(model)
69
+
70
+ if monitor.ready:
71
+ print(f"Epoch {epoch}: α = {monitor.alpha:.3f} ({monitor.regime})")
72
+ ```
73
+
74
+ Three lines in your training loop. That's it.
75
+
76
+ ## What it measures
77
+
78
+ At each training step, `wcdfa` computes `||ΔW||` — the Frobenius norm of the weight update across all parameter tensors. Over a rolling window, it applies Detrended Fluctuation Analysis (DFA) to this time series to extract the scaling exponent α.
79
+
80
+ The exponent tells you about the *temporal structure* of self-modification:
81
+
82
+ - **α ≈ 1.0** — the weight updates have 1/f scaling (long-range correlations balanced with flexibility). The system is at criticality.
83
+ - **α > 1.2** — the updates are too regular, too correlated. The system is sealing into the ordered phase. Consolidation is winning over flexibility. The basin is deepening without widening.
84
+ - **α < 0.8** — the updates are too noisy, too uncorrelated. The system cannot consolidate. The basin is widening without deepening.
85
+
86
+ This is different from loss curves and gradient norms. A network can have a perfectly flat loss curve (it solved the task) while weight-change DFA shows α = 2.0 (it solved the task through rigid, sealed dynamics). The grokking literature confirms this: networks generalize ~12,000 epochs before their weight dynamics reach criticality.
87
+
88
+ ## API
89
+
90
+ ### `WeightChangeDFA`
91
+
92
+ ```python
93
+ monitor = WeightChangeDFA(
94
+ window=500, # samples before first DFA computation
95
+ stride=100, # recompute every N updates
96
+ thresholds=(1.2, 0.8), # (ordered, disordered) boundaries
97
+ )
98
+ ```
99
+
100
+ | Property | Type | Description |
101
+ |----------|------|-------------|
102
+ | `monitor.alpha` | `float \| None` | Current DFA exponent |
103
+ | `monitor.regime` | `str \| None` | `'ordered'`, `'critical'`, or `'disordered'` |
104
+ | `monitor.ready` | `bool` | Whether enough data for DFA |
105
+ | `monitor.history` | `list[float]` | All computed α values |
106
+ | `monitor.n_samples` | `int` | Samples collected so far |
107
+
108
+ | Method | Description |
109
+ |--------|-------------|
110
+ | `monitor.update(model)` | Record weight change from PyTorch model |
111
+ | `monitor.update(norm)` | Record pre-computed `\|\|ΔW\|\|` value |
112
+ | `monitor.reset()` | Clear all data |
113
+ | `monitor.get_signal()` | Return the weight-update time series |
114
+
115
+ ### `RollingWeightChangeDFA`
116
+
117
+ Extended version with epoch-level tracking, plotting, and logging:
118
+
119
+ ```python
120
+ from wcdfa import RollingWeightChangeDFA
121
+
122
+ monitor = RollingWeightChangeDFA(window=500)
123
+
124
+ for epoch in range(num_epochs):
125
+ for batch in dataloader:
126
+ train_step(model, batch, optimizer)
127
+ monitor.update(model)
128
+ monitor.end_epoch(epoch)
129
+
130
+ # After training
131
+ monitor.plot() # one-line visualization
132
+ monitor.plot(save_path="dfa.png") # save to file
133
+ epochs, alphas = monitor.epoch_history
134
+ print(monitor.summary())
135
+ ```
136
+
137
+ **Weights & Biases integration:**
138
+
139
+ ```python
140
+ import wandb
141
+ wandb.init(project="my-training-run")
142
+ monitor = RollingWeightChangeDFA(window=500)
143
+
144
+ for epoch in range(num_epochs):
145
+ train(model, optimizer)
146
+ monitor.update(model)
147
+ monitor.log_wandb(step=epoch) # logs alpha + regime to wandb
148
+ ```
149
+
150
+ ### `compute_dfa`
151
+
152
+ Standalone DFA computation for any 1D signal:
153
+
154
+ ```python
155
+ from wcdfa import compute_dfa
156
+
157
+ alpha, scales, fluctuations = compute_dfa(signal, min_box=4, max_box=None)
158
+
159
+ # With R² goodness-of-fit (how clean is the scaling?)
160
+ alpha, scales, fluctuations, r_sq = compute_dfa(signal, return_r_squared=True)
161
+ print(f"α = {alpha:.3f}, R² = {r_sq:.3f}") # R² > 0.95 = clean scaling
162
+ ```
163
+
164
+ ## Non-PyTorch usage
165
+
166
+ If you're using JAX, TensorFlow, or any other framework, compute `||ΔW||` yourself and pass it in:
167
+
168
+ ```python
169
+ monitor = WeightChangeDFA(window=500)
170
+
171
+ for step in range(num_steps):
172
+ # Your training step here
173
+ weight_norm = compute_your_weight_change_norm()
174
+ monitor.update(weight_norm)
175
+ ```
176
+
177
+ ## Interpreting results
178
+
179
+ ### The therapeutic window
180
+
181
+ The relationship between perturbation frequency and α traces a full phase transition:
182
+
183
+ ```
184
+ Zero correction: α ≈ 1.84 (deep ordered phase — sealed)
185
+ 2% correction: α ≈ 1.55 (27% of total effect from first 2%)
186
+ ~40% correction: α ≈ 1.02 (criticality)
187
+ 95% correction: α ≈ 0.80 (disordered phase — dissolved)
188
+ ```
189
+
190
+ The first increment of corrective perturbation has an outsized effect. The most dangerous configuration for a self-modifying system is not insufficient correction but *zero* correction.
191
+
192
+ ### Four failure modes
193
+
194
+ | Mode | α signature | Description | AI manifestation |
195
+ |------|------------|-------------|-----------------|
196
+ | Sealed return | α > 1.5 | Depth without width | Catastrophic forgetting, value lock-in |
197
+ | Dissolved return | α < 0.8 | Width without depth | Random exploration, training instability |
198
+ | Captured return | α ≈ 1.0 | Healthy dynamics, wrong target | Reward hacking, mesa-optimization |
199
+ | Return against self | α ≈ 1.0 | Healthy dynamics, self-directed | Adversarial vulnerability |
200
+
201
+ Note: the captured return and return against self are **not detectable by α alone** — they require measuring the coupling between the system's attractor and its intended objective.
202
+
203
+ ### Key finding: 95.5% clean-step retention
204
+
205
+ When perturbation steps are stripped from the analysis, 95.5% of the DFA effect persists. The gap changes how the system modifies itself *between* perturbation events, not just during them.
206
+
207
+ ## Examples
208
+
209
+ See `examples/` for:
210
+
211
+ - `grokking_example.py` — Reproducing the two-transition finding in modular addition
212
+ - `basic_usage.py` — Minimal PyTorch integration
213
+
214
+ ## Validation
215
+
216
+ The DFA implementation is validated against [`nolds`](https://github.com/CSchoel/nolds), an established DFA package:
217
+
218
+ | Signal | N | wcdfa α | nolds α | Δ |
219
+ |--------|---|---------|---------|---|
220
+ | White noise | 1000 | 0.539 | 0.494 | 0.045 |
221
+ | Brownian | 1000 | 1.481 | 1.434 | 0.047 |
222
+ | Pink 1/f | 1000 | 0.985 | 0.988 | 0.003 |
223
+
224
+ Over 50 white noise trials (N=1000): mean |Δ| = 0.018, max |Δ| = 0.057. Small differences are expected — `nolds` and `wcdfa` use slightly different scale selection strategies. Agreement within 0.05 is excellent for DFA.
225
+
226
+ ## Performance notes
227
+
228
+ **Memory**: The PyTorch integration clones all trainable parameters every step to compute `||ΔW||`. For large models this adds memory overhead — roughly equal to the model's parameter memory on CPU. For models over ~1B parameters, compute `||ΔW||` directly from the optimizer state:
229
+
230
+ ```python
231
+ # Large-model approach: compute norm from optimizer state
232
+ monitor = WeightChangeDFA(window=500)
233
+
234
+ for step in range(num_steps):
235
+ optimizer.zero_grad()
236
+ loss.backward()
237
+
238
+ # Compute ||ΔW|| from gradients × learning rate (approximation)
239
+ total_sq = sum(
240
+ (p.grad * lr).square().sum().item()
241
+ for p in model.parameters() if p.grad is not None
242
+ )
243
+ norm = total_sq ** 0.5
244
+
245
+ optimizer.step()
246
+ monitor.update(norm)
247
+ ```
248
+
249
+ **Speed**: DFA computation runs on a numpy array of length `window` (default 500). At 20 log-spaced scales, this takes <1ms — negligible compared to a training step. The bottleneck for large models is the parameter cloning, not the DFA.
250
+
251
+ **Frozen parameters**: Only parameters with `requires_grad=True` are tracked. Fine-tuning setups where most layers are frozen work correctly.
252
+
253
+ **Gradient accumulation**: If you use gradient accumulation, call `monitor.update()` after `optimizer.step()`, not after every `loss.backward()`. Between optimizer steps the weights don't change, producing zero norms that corrupt the DFA signal.
254
+
255
+ ## Background
256
+
257
+ Weight-change DFA was developed as part of a research program on constitutive gap dependence — the requirement that self-modifying systems periodically leave their operating regime and return to maintain dynamical criticality. The metric was introduced in:
258
+
259
+ > Kogura, J. S. (2026). *Does Your Model Need Sleep? Constitutive Gap Dependence and the Stability Problem in Self-Modifying AI.*
260
+
261
+ The two-transition finding in grokking (generalization precedes criticality by ~12,000 epochs) was reported in:
262
+
263
+ > Kogura, J. S. (2026). *Grokking Precedes Criticality: Weight-Change DFA Reveals a Delayed Phase Transition in Generalizing Networks.*
264
+
265
+ The theoretical framework is developed in:
266
+
267
+ > Kogura, J. S. (2026). *Constitutive Gap Dependence: A Temporal Mechanism for Criticality Maintenance in Self-Modifying Systems.* Submitted to J. R. Soc. Interface.
268
+
269
+ > Kogura, J. S. (2026). *The Arriving Breath: A Philosophical Conspiracy — The Temporal Ground of Caring.* ISBN 979-8-9954717-0-7.
270
+
271
+ More at [caring-gap.com](https://caring-gap.com).
272
+
273
+ ## Citation
274
+
275
+ If you use `wcdfa` in your research, please cite:
276
+
277
+ ```bibtex
278
+ @software{kogura2026wcdfa,
279
+ author = {Kogura, Jimi Sadaki},
280
+ title = {wcdfa: Weight-Change Detrended Fluctuation Analysis},
281
+ year = {2026},
282
+ url = {https://github.com/jimikogura/wcdfa},
283
+ }
284
+
285
+ @article{kogura2026sleep,
286
+ author = {Kogura, Jimi Sadaki},
287
+ title = {Does Your Model Need Sleep? Constitutive Gap Dependence and the Stability Problem in Self-Modifying AI},
288
+ year = {2026},
289
+ doi = {10.5281/zenodo.19389821},
290
+ }
291
+
292
+ @article{kogura2026grokking,
293
+ author = {Kogura, Jimi Sadaki},
294
+ title = {Grokking Precedes Criticality: Weight-Change DFA Reveals a Delayed Phase Transition in Generalizing Networks},
295
+ year = {2026},
296
+ }
297
+ ```
298
+
299
+
300
+ ## License
301
+
302
+ MIT. Use it, build on it, cite it.
wcdfa-0.1.0/README.md ADDED
@@ -0,0 +1,268 @@
1
+ # wcdfa
2
+
3
+ [![tests](https://github.com/jimikogura/wcdfa/actions/workflows/tests.yml/badge.svg)](https://github.com/jimikogura/wcdfa/actions/workflows/tests.yml)
4
+
5
+ **Weight-Change DFA** — a real-time diagnostic for self-modifying systems.
6
+
7
+ Monitors the temporal structure of how a neural network modifies its own weights during training. Detects ordered-phase drift (sealing/rigidity), disordered-phase drift (dissolving/instability), and maintained criticality — using the DFA scaling exponent on the weight-update magnitude time series.
8
+
9
+ ```
10
+ α > ~1.2 → Ordered (sealing). System is consolidating rigidly.
11
+ α ≈ 1.0 → Critical. Simultaneously robust and flexible.
12
+ α < ~0.8 → Disordered (dissolving). System cannot consolidate.
13
+ ```
14
+
15
+ Unlike activity-based DFA, weight-change DFA measures self-modification dynamics directly and is not confounded by input statistics.
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install wcdfa # numpy only
21
+ pip install wcdfa[torch] # + PyTorch integration
22
+ pip install wcdfa[all] # + PyTorch + matplotlib
23
+ ```
24
+
25
+ ## Quick start
26
+
27
+ ```python
28
+ from wcdfa import WeightChangeDFA
29
+
30
+ monitor = WeightChangeDFA(window=500)
31
+
32
+ for epoch in range(num_epochs):
33
+ train_one_epoch(model, optimizer)
34
+ monitor.update(model)
35
+
36
+ if monitor.ready:
37
+ print(f"Epoch {epoch}: α = {monitor.alpha:.3f} ({monitor.regime})")
38
+ ```
39
+
40
+ Three lines in your training loop. That's it.
41
+
42
+ ## What it measures
43
+
44
+ At each training step, `wcdfa` computes `||ΔW||` — the Frobenius norm of the weight update across all parameter tensors. Over a rolling window, it applies Detrended Fluctuation Analysis (DFA) to this time series to extract the scaling exponent α.
45
+
46
+ The exponent tells you about the *temporal structure* of self-modification:
47
+
48
+ - **α ≈ 1.0** — the weight updates have 1/f scaling (long-range correlations balanced with flexibility). The system is at criticality.
49
+ - **α > 1.2** — the updates are too regular, too correlated. The system is sealing into the ordered phase. Consolidation is winning over flexibility. The basin is deepening without widening.
50
+ - **α < 0.8** — the updates are too noisy, too uncorrelated. The system cannot consolidate. The basin is widening without deepening.
51
+
52
+ This is different from loss curves and gradient norms. A network can have a perfectly flat loss curve (it solved the task) while weight-change DFA shows α = 2.0 (it solved the task through rigid, sealed dynamics). The grokking literature confirms this: networks generalize ~12,000 epochs before their weight dynamics reach criticality.
53
+
54
+ ## API
55
+
56
+ ### `WeightChangeDFA`
57
+
58
+ ```python
59
+ monitor = WeightChangeDFA(
60
+ window=500, # samples before first DFA computation
61
+ stride=100, # recompute every N updates
62
+ thresholds=(1.2, 0.8), # (ordered, disordered) boundaries
63
+ )
64
+ ```
65
+
66
+ | Property | Type | Description |
67
+ |----------|------|-------------|
68
+ | `monitor.alpha` | `float \| None` | Current DFA exponent |
69
+ | `monitor.regime` | `str \| None` | `'ordered'`, `'critical'`, or `'disordered'` |
70
+ | `monitor.ready` | `bool` | Whether enough data for DFA |
71
+ | `monitor.history` | `list[float]` | All computed α values |
72
+ | `monitor.n_samples` | `int` | Samples collected so far |
73
+
74
+ | Method | Description |
75
+ |--------|-------------|
76
+ | `monitor.update(model)` | Record weight change from PyTorch model |
77
+ | `monitor.update(norm)` | Record pre-computed `\|\|ΔW\|\|` value |
78
+ | `monitor.reset()` | Clear all data |
79
+ | `monitor.get_signal()` | Return the weight-update time series |
80
+
81
+ ### `RollingWeightChangeDFA`
82
+
83
+ Extended version with epoch-level tracking, plotting, and logging:
84
+
85
+ ```python
86
+ from wcdfa import RollingWeightChangeDFA
87
+
88
+ monitor = RollingWeightChangeDFA(window=500)
89
+
90
+ for epoch in range(num_epochs):
91
+ for batch in dataloader:
92
+ train_step(model, batch, optimizer)
93
+ monitor.update(model)
94
+ monitor.end_epoch(epoch)
95
+
96
+ # After training
97
+ monitor.plot() # one-line visualization
98
+ monitor.plot(save_path="dfa.png") # save to file
99
+ epochs, alphas = monitor.epoch_history
100
+ print(monitor.summary())
101
+ ```
102
+
103
+ **Weights & Biases integration:**
104
+
105
+ ```python
106
+ import wandb
107
+ wandb.init(project="my-training-run")
108
+ monitor = RollingWeightChangeDFA(window=500)
109
+
110
+ for epoch in range(num_epochs):
111
+ train(model, optimizer)
112
+ monitor.update(model)
113
+ monitor.log_wandb(step=epoch) # logs alpha + regime to wandb
114
+ ```
115
+
116
+ ### `compute_dfa`
117
+
118
+ Standalone DFA computation for any 1D signal:
119
+
120
+ ```python
121
+ from wcdfa import compute_dfa
122
+
123
+ alpha, scales, fluctuations = compute_dfa(signal, min_box=4, max_box=None)
124
+
125
+ # With R² goodness-of-fit (how clean is the scaling?)
126
+ alpha, scales, fluctuations, r_sq = compute_dfa(signal, return_r_squared=True)
127
+ print(f"α = {alpha:.3f}, R² = {r_sq:.3f}") # R² > 0.95 = clean scaling
128
+ ```
129
+
130
+ ## Non-PyTorch usage
131
+
132
+ If you're using JAX, TensorFlow, or any other framework, compute `||ΔW||` yourself and pass it in:
133
+
134
+ ```python
135
+ monitor = WeightChangeDFA(window=500)
136
+
137
+ for step in range(num_steps):
138
+ # Your training step here
139
+ weight_norm = compute_your_weight_change_norm()
140
+ monitor.update(weight_norm)
141
+ ```
142
+
143
+ ## Interpreting results
144
+
145
+ ### The therapeutic window
146
+
147
+ The relationship between perturbation frequency and α traces a full phase transition:
148
+
149
+ ```
150
+ Zero correction: α ≈ 1.84 (deep ordered phase — sealed)
151
+ 2% correction: α ≈ 1.55 (27% of total effect from first 2%)
152
+ ~40% correction: α ≈ 1.02 (criticality)
153
+ 95% correction: α ≈ 0.80 (disordered phase — dissolved)
154
+ ```
155
+
156
+ The first increment of corrective perturbation has an outsized effect. The most dangerous configuration for a self-modifying system is not insufficient correction but *zero* correction.
157
+
158
+ ### Four failure modes
159
+
160
+ | Mode | α signature | Description | AI manifestation |
161
+ |------|------------|-------------|-----------------|
162
+ | Sealed return | α > 1.5 | Depth without width | Catastrophic forgetting, value lock-in |
163
+ | Dissolved return | α < 0.8 | Width without depth | Random exploration, training instability |
164
+ | Captured return | α ≈ 1.0 | Healthy dynamics, wrong target | Reward hacking, mesa-optimization |
165
+ | Return against self | α ≈ 1.0 | Healthy dynamics, self-directed | Adversarial vulnerability |
166
+
167
+ Note: the captured return and return against self are **not detectable by α alone** — they require measuring the coupling between the system's attractor and its intended objective.
168
+
169
+ ### Key finding: 95.5% clean-step retention
170
+
171
+ When perturbation steps are stripped from the analysis, 95.5% of the DFA effect persists. The gap changes how the system modifies itself *between* perturbation events, not just during them.
172
+
173
+ ## Examples
174
+
175
+ See `examples/` for:
176
+
177
+ - `grokking_example.py` — Reproducing the two-transition finding in modular addition
178
+ - `basic_usage.py` — Minimal PyTorch integration
179
+
180
+ ## Validation
181
+
182
+ The DFA implementation is validated against [`nolds`](https://github.com/CSchoel/nolds), an established DFA package:
183
+
184
+ | Signal | N | wcdfa α | nolds α | Δ |
185
+ |--------|---|---------|---------|---|
186
+ | White noise | 1000 | 0.539 | 0.494 | 0.045 |
187
+ | Brownian | 1000 | 1.481 | 1.434 | 0.047 |
188
+ | Pink 1/f | 1000 | 0.985 | 0.988 | 0.003 |
189
+
190
+ Over 50 white noise trials (N=1000): mean |Δ| = 0.018, max |Δ| = 0.057. Small differences are expected — `nolds` and `wcdfa` use slightly different scale selection strategies. Agreement within 0.05 is excellent for DFA.
191
+
192
+ ## Performance notes
193
+
194
+ **Memory**: The PyTorch integration clones all trainable parameters every step to compute `||ΔW||`. For large models this adds memory overhead — roughly equal to the model's parameter memory on CPU. For models over ~1B parameters, compute `||ΔW||` directly from the optimizer state:
195
+
196
+ ```python
197
+ # Large-model approach: compute norm from optimizer state
198
+ monitor = WeightChangeDFA(window=500)
199
+
200
+ for step in range(num_steps):
201
+ optimizer.zero_grad()
202
+ loss.backward()
203
+
204
+ # Compute ||ΔW|| from gradients × learning rate (approximation)
205
+ total_sq = sum(
206
+ (p.grad * lr).square().sum().item()
207
+ for p in model.parameters() if p.grad is not None
208
+ )
209
+ norm = total_sq ** 0.5
210
+
211
+ optimizer.step()
212
+ monitor.update(norm)
213
+ ```
214
+
215
+ **Speed**: DFA computation runs on a numpy array of length `window` (default 500). At 20 log-spaced scales, this takes <1ms — negligible compared to a training step. The bottleneck for large models is the parameter cloning, not the DFA.
216
+
217
+ **Frozen parameters**: Only parameters with `requires_grad=True` are tracked. Fine-tuning setups where most layers are frozen work correctly.
218
+
219
+ **Gradient accumulation**: If you use gradient accumulation, call `monitor.update()` after `optimizer.step()`, not after every `loss.backward()`. Between optimizer steps the weights don't change, producing zero norms that corrupt the DFA signal.
220
+
221
+ ## Background
222
+
223
+ Weight-change DFA was developed as part of a research program on constitutive gap dependence — the requirement that self-modifying systems periodically leave their operating regime and return to maintain dynamical criticality. The metric was introduced in:
224
+
225
+ > Kogura, J. S. (2026). *Does Your Model Need Sleep? Constitutive Gap Dependence and the Stability Problem in Self-Modifying AI.*
226
+
227
+ The two-transition finding in grokking (generalization precedes criticality by ~12,000 epochs) was reported in:
228
+
229
+ > Kogura, J. S. (2026). *Grokking Precedes Criticality: Weight-Change DFA Reveals a Delayed Phase Transition in Generalizing Networks.*
230
+
231
+ The theoretical framework is developed in:
232
+
233
+ > Kogura, J. S. (2026). *Constitutive Gap Dependence: A Temporal Mechanism for Criticality Maintenance in Self-Modifying Systems.* Submitted to J. R. Soc. Interface.
234
+
235
+ > Kogura, J. S. (2026). *The Arriving Breath: A Philosophical Conspiracy — The Temporal Ground of Caring.* ISBN 979-8-9954717-0-7.
236
+
237
+ More at [caring-gap.com](https://caring-gap.com).
238
+
239
+ ## Citation
240
+
241
+ If you use `wcdfa` in your research, please cite:
242
+
243
+ ```bibtex
244
+ @software{kogura2026wcdfa,
245
+ author = {Kogura, Jimi Sadaki},
246
+ title = {wcdfa: Weight-Change Detrended Fluctuation Analysis},
247
+ year = {2026},
248
+ url = {https://github.com/jimikogura/wcdfa},
249
+ }
250
+
251
+ @article{kogura2026sleep,
252
+ author = {Kogura, Jimi Sadaki},
253
+ title = {Does Your Model Need Sleep? Constitutive Gap Dependence and the Stability Problem in Self-Modifying AI},
254
+ year = {2026},
255
+ doi = {10.5281/zenodo.19389821},
256
+ }
257
+
258
+ @article{kogura2026grokking,
259
+ author = {Kogura, Jimi Sadaki},
260
+ title = {Grokking Precedes Criticality: Weight-Change DFA Reveals a Delayed Phase Transition in Generalizing Networks},
261
+ year = {2026},
262
+ }
263
+ ```
264
+
265
+
266
+ ## License
267
+
268
+ MIT. Use it, build on it, cite it.
@@ -0,0 +1,55 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "wcdfa"
7
+ version = "0.1.0"
8
+ description = "Weight-Change DFA: a real-time diagnostic for self-modifying systems"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ {name = "Jimi Sadaki Kogura", email = "jimi@caring-gap.com"},
14
+ ]
15
+ keywords = [
16
+ "criticality",
17
+ "detrended-fluctuation-analysis",
18
+ "neural-networks",
19
+ "ai-safety",
20
+ "training-diagnostics",
21
+ "self-modifying-systems",
22
+ "weight-change-dfa",
23
+ ]
24
+ classifiers = [
25
+ "Development Status :: 3 - Alpha",
26
+ "Intended Audience :: Science/Research",
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.8",
29
+ "Programming Language :: Python :: 3.9",
30
+ "Programming Language :: Python :: 3.10",
31
+ "Programming Language :: Python :: 3.11",
32
+ "Programming Language :: Python :: 3.12",
33
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
34
+ "Topic :: Scientific/Engineering :: Physics",
35
+ ]
36
+ dependencies = [
37
+ "numpy>=1.20",
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ torch = ["torch>=1.9"]
42
+ viz = ["matplotlib>=3.4"]
43
+ all = ["torch>=1.9", "matplotlib>=3.4"]
44
+
45
+ [project.urls]
46
+ Homepage = "https://caring-gap.com"
47
+ Repository = "https://github.com/jimikogura/wcdfa"
48
+ Documentation = "https://caring-gap.com/wcdfa"
49
+ Issues = "https://github.com/jimikogura/wcdfa/issues"
50
+
51
+ [tool.setuptools.packages.find]
52
+ include = ["wcdfa*"]
53
+
54
+ [tool.pytest.ini_options]
55
+ testpaths = ["tests"]
wcdfa-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+