spdal 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spdal-0.1.0/LICENSE +28 -0
- spdal-0.1.0/PKG-INFO +267 -0
- spdal-0.1.0/README.md +245 -0
- spdal-0.1.0/pyproject.toml +28 -0
- spdal-0.1.0/setup.cfg +4 -0
- spdal-0.1.0/src/spdal/__init__.py +18 -0
- spdal-0.1.0/src/spdal/_base.py +370 -0
- spdal-0.1.0/src/spdal/_utils.py +58 -0
- spdal-0.1.0/src/spdal/d4.py +133 -0
- spdal-0.1.0/src/spdal/lrhe.py +118 -0
- spdal-0.1.0/src/spdal/ppvh.py +4 -0
- spdal-0.1.0/src/spdal/scil.py +231 -0
- spdal-0.1.0/src/spdal/shef.py +216 -0
- spdal-0.1.0/src/spdal/traced.py +427 -0
- spdal-0.1.0/src/spdal/vebf.py +104 -0
- spdal-0.1.0/src/spdal.egg-info/PKG-INFO +267 -0
- spdal-0.1.0/src/spdal.egg-info/SOURCES.txt +23 -0
- spdal-0.1.0/src/spdal.egg-info/dependency_links.txt +1 -0
- spdal-0.1.0/src/spdal.egg-info/requires.txt +13 -0
- spdal-0.1.0/src/spdal.egg-info/top_level.txt +1 -0
- spdal-0.1.0/tests/test_classifiers.py +1335 -0
- spdal-0.1.0/tests/test_edge_cases.py +461 -0
- spdal-0.1.0/tests/test_hyperparams.py +508 -0
- spdal-0.1.0/tests/test_phishing.py +261 -0
- spdal-0.1.0/tests/test_shef_formula.py +358 -0
spdal-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Peemapat Wongsriphisant
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without modification,
|
|
6
|
+
are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this
|
|
12
|
+
list of conditions and the following disclaimer in the documentation and/or
|
|
13
|
+
other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its contributors may
|
|
16
|
+
be used to endorse or promote products derived from this software without
|
|
17
|
+
specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
20
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
21
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
22
|
+
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
|
23
|
+
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
24
|
+
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
25
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
26
|
+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
27
|
+
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
28
|
+
OF THE POSSIBILITY OF SUCH DAMAGE.
|
spdal-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spdal
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Single-pass and discard-after-learn hyperellipsoid classifiers for online learning
|
|
5
|
+
Author-email: Peemapat Wongsriphisant <peemapat.w@gmail.com>
|
|
6
|
+
License: BSD-3-Clause
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: scikit-learn>=1.3
|
|
11
|
+
Requires-Dist: numpy>=1.24
|
|
12
|
+
Requires-Dist: scipy>=1.10
|
|
13
|
+
Requires-Dist: pandas>=2.0
|
|
14
|
+
Requires-Dist: tqdm>=4.0
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
17
|
+
Provides-Extra: experiments
|
|
18
|
+
Requires-Dist: matplotlib; extra == "experiments"
|
|
19
|
+
Requires-Dist: river; extra == "experiments"
|
|
20
|
+
Requires-Dist: ucimlrepo; extra == "experiments"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# spdal
|
|
24
|
+
|
|
25
|
+
**Single-Pass Discard-After-Learn** — hyperellipsoid classifiers for online streaming data.
|
|
26
|
+
|
|
27
|
+
Each training sample is processed once and then discarded. No full dataset is ever stored. All classifiers implement scikit-learn's `partial_fit` / `predict` interface.
|
|
28
|
+
|
|
29
|
+
> **Corrigendum:** Theorem 2 of the D4 paper contains a sign error. See the correction and revised theoretical mechanism: [Markdown](docs/D4_corrigendum.md) · [PDF](docs/D4_corrigendum.pdf)
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install spdal
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Development mode:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
git clone https://github.com/your-org/single-pass-discard-after-learn
|
|
43
|
+
cd single-pass-discard-after-learn
|
|
44
|
+
pip install -e ".[dev]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from sklearn.datasets import make_classification
|
|
53
|
+
from spdal import LRHE
|
|
54
|
+
|
|
55
|
+
X, y = make_classification(n_samples=500, random_state=42)
|
|
56
|
+
|
|
57
|
+
clf = LRHE()
|
|
58
|
+
clf.fit(X[:400], y[:400])
|
|
59
|
+
print(clf.predict(X[400:])) # array of class labels
|
|
60
|
+
print(len(clf.neuron_list)) # number of learned prototypes
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Incremental (chunk) learning
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from spdal import TRACED
|
|
67
|
+
import numpy as np
|
|
68
|
+
|
|
69
|
+
X, y = make_classification(n_samples=500, random_state=42)
|
|
70
|
+
classes = np.unique(y)
|
|
71
|
+
|
|
72
|
+
clf = TRACED()
|
|
73
|
+
for i in range(0, 400, 50):
|
|
74
|
+
clf.partial_fit(X[i:i+50], y[i:i+50], classes=classes)
|
|
75
|
+
|
|
76
|
+
print(clf.predict(X[400:]))
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Classifiers
|
|
82
|
+
|
|
83
|
+
| Class | Full name | Year | Key idea |
|
|
84
|
+
|-------|-----------|------|----------|
|
|
85
|
+
| `VEBF` | Versatile Elliptic Basis Function | 2010 | Foundation: PCA-axis hyperellipsoids, single-datum online learning |
|
|
86
|
+
| `LRHE` | Learning with Recoil in Hyperellipsoidal Structure | 2020 | Shrink-and-shift recoil to handle noisy boundary data |
|
|
87
|
+
| `SCIL` | Streaming Chunk Incremental Learning | 2019 | Neuron merging with parallel-axis covariance pooling |
|
|
88
|
+
| `SHEF` | Scalable Hyper-Ellipsoidal Function | 2020 | Regularized covariance + Mahalanobis-based prediction |
|
|
89
|
+
| `D4` | Diversion of Data Distribution Direction | 2026 | Hybrid width formula; principal-axis projection for coincident regions |
|
|
90
|
+
| `TRACED` | Trend-Adaptive Classification with Ellipsoidal Disambiguation | TBD | Adds EMA displacement/expansion tracking for exterior-region prediction |
|
|
91
|
+
|
|
92
|
+
### VEBF
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from spdal import VEBF
|
|
96
|
+
clf = VEBF(theta=0, delta=1, epsilon=1e-10)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
| Parameter | Default | Description |
|
|
100
|
+
|-----------|---------|-------------|
|
|
101
|
+
| `theta` | `0` | Overlap threshold for neuron merging |
|
|
102
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
103
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
104
|
+
|
|
105
|
+
### LRHE
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from spdal import LRHE
|
|
109
|
+
clf = LRHE(alpha=0.5, theta=0, delta=1, epsilon=1e-10)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
| Parameter | Default | Description |
|
|
113
|
+
|-----------|---------|-------------|
|
|
114
|
+
| `alpha` | `0.5` | Shrink multiplier during recoil (0–1) |
|
|
115
|
+
| `theta` | `0` | Overlap threshold for merging |
|
|
116
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
117
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
118
|
+
|
|
119
|
+
### SCIL
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from spdal import SCIL
|
|
123
|
+
clf = SCIL(N0=3, eta=2, delta=1, theta=0, epsilon=1e-10)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
| Parameter | Default | Description |
|
|
127
|
+
|-----------|---------|-------------|
|
|
128
|
+
| `N0` | `3` | Min samples for an active neuron |
|
|
129
|
+
| `eta` | `2` | Width expansion scaling factor |
|
|
130
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
131
|
+
| `theta` | `0` | Merge overlap threshold |
|
|
132
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
133
|
+
|
|
134
|
+
### SHEF
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from spdal import SHEF
|
|
138
|
+
clf = SHEF(M=3, r=1.5, epsilon=1e-10)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
| Parameter | Default | Description |
|
|
142
|
+
|-----------|---------|-------------|
|
|
143
|
+
| `M` | `3` | Min samples before adaptive threshold triggers |
|
|
144
|
+
| `r` | `1.5` | Ellipsoid radius scaling constant |
|
|
145
|
+
| `epsilon` | `1e-10` | Regularization / numerical floor |
|
|
146
|
+
|
|
147
|
+
### D4
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from spdal import D4
|
|
151
|
+
clf = D4(width_parameter=1, reduce_dims=0, delta=1, norm=2, r=1.5, threshold=15, epsilon=1e-10)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
| Parameter | Default | Description |
|
|
155
|
+
|-----------|---------|-------------|
|
|
156
|
+
| `width_parameter` | `1` | Blend: `1` = pure statistical width, `0` = pure expansion-based |
|
|
157
|
+
| `reduce_dims` | `0` | Principal axes to drop in disambiguation subspace |
|
|
158
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
159
|
+
| `norm` | `2` | Lp norm for projected distance |
|
|
160
|
+
| `r` | `1.5` | Radius scaling factor |
|
|
161
|
+
| `threshold` | `15` | Angle threshold (degrees) for axis pairing |
|
|
162
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
163
|
+
|
|
164
|
+
D4 maintains **one neuron per class**. When two nearest neurons belong to different classes, it pairs their principal axes by smallest angle and assigns the class with the smaller projected distance in that subspace.
|
|
165
|
+
|
|
166
|
+
### TRACED
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
from spdal import TRACED
|
|
170
|
+
clf = TRACED(
|
|
171
|
+
alpha=0.5, beta=0.01, delta=2, width_parameter=1,
|
|
172
|
+
reduce_dims=1, N0=3, r=2.507, norm=2,
|
|
173
|
+
method='overlap-outside', distance_metric='boundary',
|
|
174
|
+
threshold=15, epsilon=1e-10,
|
|
175
|
+
)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
| Parameter | Default | Description |
|
|
179
|
+
|-----------|---------|-------------|
|
|
180
|
+
| `alpha` | `0.5` | EMA weight for displacement smoothing (`0` = disabled) |
|
|
181
|
+
| `beta` | `0.01` | EMA weight for expansion-rate smoothing (`0` = disabled) |
|
|
182
|
+
| `delta` | `2` | Dynamic threshold scaling (mean NN distance × delta) |
|
|
183
|
+
| `width_parameter` | `1` | Blend: `1` = statistical, `0` = expansion-based |
|
|
184
|
+
| `reduce_dims` | `1` | Axes to drop in coincident-region disambiguation |
|
|
185
|
+
| `N0` | `3` | Min samples for an active neuron |
|
|
186
|
+
| `r` | `sqrt(2π)` | Statistical width scaling |
|
|
187
|
+
| `norm` | `2` | Lp norm for distance calculation |
|
|
188
|
+
| `method` | `'overlap-outside'` | Corrections to apply: `'overlap'`, `'outside'`, or both |
|
|
189
|
+
| `distance_metric` | `'boundary'` | `'boundary'` or `'center'` |
|
|
190
|
+
| `threshold` | `15` | Angle threshold (degrees) for axis pairing |
|
|
191
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
192
|
+
|
|
193
|
+
TRACED resolves two ambiguous regions:
|
|
194
|
+
- **Coincident** (x inside multiple classes) — principal-axis subspace projection (like D4)
|
|
195
|
+
- **Exterior** (x outside all neurons) — predicts using EMA-smoothed displacement and expansion as a trend model
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## sklearn Interface
|
|
200
|
+
|
|
201
|
+
All classifiers are `sklearn.base.BaseEstimator` subclasses and support:
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
clf.fit(X, y) # full batch training
|
|
205
|
+
clf.partial_fit(X, y, classes=classes) # incremental update
|
|
206
|
+
clf.predict(X) # returns array of class labels
|
|
207
|
+
clf.classes_ # array of known class labels
|
|
208
|
+
clf.neuron_list # list of neuron dicts
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Compatible with scikit-learn pipelines and cross-validation tools that support `partial_fit`.
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Neuron Schema
|
|
216
|
+
|
|
217
|
+
Learned prototypes are stored in `clf.neuron_list` as a list of dicts:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
{
|
|
221
|
+
'y': class_label,
|
|
222
|
+
'center': np.ndarray, # prototype position
|
|
223
|
+
'cov': np.ndarray, # covariance matrix
|
|
224
|
+
'eig_component': np.ndarray, # PCA eigenvectors
|
|
225
|
+
'width': np.ndarray, # semi-axis lengths
|
|
226
|
+
'n': int, # sample count
|
|
227
|
+
# SCIL, D4, TRACED only:
|
|
228
|
+
'variance': np.ndarray, # eigenvalues
|
|
229
|
+
# TRACED only:
|
|
230
|
+
'displacement': np.ndarray, # EMA displacement vector
|
|
231
|
+
'expansion': np.ndarray, # EMA per-axis expansion rates
|
|
232
|
+
}
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## Development
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
# Run tests
|
|
241
|
+
pytest tests/ -v
|
|
242
|
+
|
|
243
|
+
# Run a single test class
|
|
244
|
+
pytest tests/test_classifiers.py::TestTRACED -v
|
|
245
|
+
|
|
246
|
+
# Build for PyPI
|
|
247
|
+
pip install build && python -m build
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## References
|
|
253
|
+
|
|
254
|
+
1. **VEBF** — Jaiyen, S., Lursinsap, C., & Phimoltares, S. (2010). A New Versatile Elliptic Basis Function Neural Network. *IEEE Transactions on Neural Networks*, 21(3), 381–392.
|
|
255
|
+
2. **LRHE** — Jindadoungrut, K., Phimoltares, S., & Lursinsap, C. (2020). Neural Learning With Recoil Behavior in Hyperellipsoidal Structure. *IEEE Access*, 8, 114643–114655.
|
|
256
|
+
3. **SCIL** — Junsawang, P., Phimoltares, S., & Lursinsap, C. (2019). Streaming chunk incremental learning for class-wise data stream classification with fast learning speed and low structural complexity. *PLOS ONE*, 14(9), e0220624.
|
|
257
|
+
4. **SHEF** — Rungcharassang, P., & Lursinsap, C. (2020). Scalable Hyper-Ellipsoidal Function with Projection Ratio for Local Distributed Streaming Data Classification. *IEEE Access*. DOI: 10.1109/ACCESS.2020.2997944.
|
|
258
|
+
5. **D4** — Wongsriphisant, P., Plaimas, K., & Lursinsap, C. (2026). Markov-based continuous learning with diversion of data distribution direction for streaming data in limited memory. *Expert Systems With Applications*, 298, 129818.
|
|
259
|
+
- Corrigendum (Theorem 2): [docs/D4_corrigendum.md](docs/D4_corrigendum.md) · [PDF](docs/D4_corrigendum.pdf)
|
|
260
|
+
6. **TRACED** — Wongsriphisant, P., Plaimas, K., & Lursinsap, C. TRACED: Trend-Adaptive Classification with Ellipsoidal Disambiguation for Resolving Exterior and Coincident Regions in Data Streams. *Preprint submitted to Elsevier*.
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## Notes
|
|
265
|
+
|
|
266
|
+
- The original monolithic implementation is preserved at [`deprecated/spdal.py`](deprecated/spdal.py) for reference.
|
|
267
|
+
- Refactoring into the modular `src/spdal/` package structure, docstrings, and parameter naming were performed by Claude (Anthropic) and reviewed by the project owner.
|
spdal-0.1.0/README.md
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
# spdal
|
|
2
|
+
|
|
3
|
+
**Single-Pass Discard-After-Learn** — hyperellipsoid classifiers for online streaming data.
|
|
4
|
+
|
|
5
|
+
Each training sample is processed once and then discarded. No full dataset is ever stored. All classifiers implement scikit-learn's `partial_fit` / `predict` interface.
|
|
6
|
+
|
|
7
|
+
> **Corrigendum:** Theorem 2 of the D4 paper contains a sign error. See the correction and revised theoretical mechanism: [Markdown](docs/D4_corrigendum.md) · [PDF](docs/D4_corrigendum.pdf)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install spdal
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Development mode:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
git clone https://github.com/your-org/single-pass-discard-after-learn
|
|
21
|
+
cd single-pass-discard-after-learn
|
|
22
|
+
pip install -e ".[dev]"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from sklearn.datasets import make_classification
|
|
31
|
+
from spdal import LRHE
|
|
32
|
+
|
|
33
|
+
X, y = make_classification(n_samples=500, random_state=42)
|
|
34
|
+
|
|
35
|
+
clf = LRHE()
|
|
36
|
+
clf.fit(X[:400], y[:400])
|
|
37
|
+
print(clf.predict(X[400:])) # array of class labels
|
|
38
|
+
print(len(clf.neuron_list)) # number of learned prototypes
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Incremental (chunk) learning
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from spdal import TRACED
|
|
45
|
+
import numpy as np
|
|
46
|
+
|
|
47
|
+
X, y = make_classification(n_samples=500, random_state=42)
|
|
48
|
+
classes = np.unique(y)
|
|
49
|
+
|
|
50
|
+
clf = TRACED()
|
|
51
|
+
for i in range(0, 400, 50):
|
|
52
|
+
clf.partial_fit(X[i:i+50], y[i:i+50], classes=classes)
|
|
53
|
+
|
|
54
|
+
print(clf.predict(X[400:]))
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Classifiers
|
|
60
|
+
|
|
61
|
+
| Class | Full name | Year | Key idea |
|
|
62
|
+
|-------|-----------|------|----------|
|
|
63
|
+
| `VEBF` | Versatile Elliptic Basis Function | 2010 | Foundation: PCA-axis hyperellipsoids, single-datum online learning |
|
|
64
|
+
| `LRHE` | Learning with Recoil in Hyperellipsoidal Structure | 2020 | Shrink-and-shift recoil to handle noisy boundary data |
|
|
65
|
+
| `SCIL` | Streaming Chunk Incremental Learning | 2019 | Neuron merging with parallel-axis covariance pooling |
|
|
66
|
+
| `SHEF` | Scalable Hyper-Ellipsoidal Function | 2020 | Regularized covariance + Mahalanobis-based prediction |
|
|
67
|
+
| `D4` | Diversion of Data Distribution Direction | 2026 | Hybrid width formula; principal-axis projection for coincident regions |
|
|
68
|
+
| `TRACED` | Trend-Adaptive Classification with Ellipsoidal Disambiguation | TBD | Adds EMA displacement/expansion tracking for exterior-region prediction |
|
|
69
|
+
|
|
70
|
+
### VEBF
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from spdal import VEBF
|
|
74
|
+
clf = VEBF(theta=0, delta=1, epsilon=1e-10)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
| Parameter | Default | Description |
|
|
78
|
+
|-----------|---------|-------------|
|
|
79
|
+
| `theta` | `0` | Overlap threshold for neuron merging |
|
|
80
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
81
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
82
|
+
|
|
83
|
+
### LRHE
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from spdal import LRHE
|
|
87
|
+
clf = LRHE(alpha=0.5, theta=0, delta=1, epsilon=1e-10)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
| Parameter | Default | Description |
|
|
91
|
+
|-----------|---------|-------------|
|
|
92
|
+
| `alpha` | `0.5` | Shrink multiplier during recoil (0–1) |
|
|
93
|
+
| `theta` | `0` | Overlap threshold for merging |
|
|
94
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
95
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
96
|
+
|
|
97
|
+
### SCIL
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from spdal import SCIL
|
|
101
|
+
clf = SCIL(N0=3, eta=2, delta=1, theta=0, epsilon=1e-10)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
| Parameter | Default | Description |
|
|
105
|
+
|-----------|---------|-------------|
|
|
106
|
+
| `N0` | `3` | Min samples for an active neuron |
|
|
107
|
+
| `eta` | `2` | Width expansion scaling factor |
|
|
108
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
109
|
+
| `theta` | `0` | Merge overlap threshold |
|
|
110
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
111
|
+
|
|
112
|
+
### SHEF
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from spdal import SHEF
|
|
116
|
+
clf = SHEF(M=3, r=1.5, epsilon=1e-10)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
| Parameter | Default | Description |
|
|
120
|
+
|-----------|---------|-------------|
|
|
121
|
+
| `M` | `3` | Min samples before adaptive threshold triggers |
|
|
122
|
+
| `r` | `1.5` | Ellipsoid radius scaling constant |
|
|
123
|
+
| `epsilon` | `1e-10` | Regularization / numerical floor |
|
|
124
|
+
|
|
125
|
+
### D4
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from spdal import D4
|
|
129
|
+
clf = D4(width_parameter=1, reduce_dims=0, delta=1, norm=2, r=1.5, threshold=15, epsilon=1e-10)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
| Parameter | Default | Description |
|
|
133
|
+
|-----------|---------|-------------|
|
|
134
|
+
| `width_parameter` | `1` | Blend: `1` = pure statistical width, `0` = pure expansion-based |
|
|
135
|
+
| `reduce_dims` | `0` | Principal axes to drop in disambiguation subspace |
|
|
136
|
+
| `delta` | `1` | Width scaling from pairwise distances |
|
|
137
|
+
| `norm` | `2` | Lp norm for projected distance |
|
|
138
|
+
| `r` | `1.5` | Radius scaling factor |
|
|
139
|
+
| `threshold` | `15` | Angle threshold (degrees) for axis pairing |
|
|
140
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
141
|
+
|
|
142
|
+
D4 maintains **one neuron per class**. When two nearest neurons belong to different classes, it pairs their principal axes by smallest angle and assigns the class with the smaller projected distance in that subspace.
|
|
143
|
+
|
|
144
|
+
### TRACED
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from spdal import TRACED
|
|
148
|
+
clf = TRACED(
|
|
149
|
+
alpha=0.5, beta=0.01, delta=2, width_parameter=1,
|
|
150
|
+
reduce_dims=1, N0=3, r=2.507, norm=2,
|
|
151
|
+
method='overlap-outside', distance_metric='boundary',
|
|
152
|
+
threshold=15, epsilon=1e-10,
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
| Parameter | Default | Description |
|
|
157
|
+
|-----------|---------|-------------|
|
|
158
|
+
| `alpha` | `0.5` | EMA weight for displacement smoothing (`0` = disabled) |
|
|
159
|
+
| `beta` | `0.01` | EMA weight for expansion-rate smoothing (`0` = disabled) |
|
|
160
|
+
| `delta` | `2` | Dynamic threshold scaling (mean NN distance × delta) |
|
|
161
|
+
| `width_parameter` | `1` | Blend: `1` = statistical, `0` = expansion-based |
|
|
162
|
+
| `reduce_dims` | `1` | Axes to drop in coincident-region disambiguation |
|
|
163
|
+
| `N0` | `3` | Min samples for an active neuron |
|
|
164
|
+
| `r` | `sqrt(2π)` | Statistical width scaling |
|
|
165
|
+
| `norm` | `2` | Lp norm for distance calculation |
|
|
166
|
+
| `method` | `'overlap-outside'` | Corrections to apply: `'overlap'`, `'outside'`, or both |
|
|
167
|
+
| `distance_metric` | `'boundary'` | `'boundary'` or `'center'` |
|
|
168
|
+
| `threshold` | `15` | Angle threshold (degrees) for axis pairing |
|
|
169
|
+
| `epsilon` | `1e-10` | Numerical floor |
|
|
170
|
+
|
|
171
|
+
TRACED resolves two ambiguous regions:
|
|
172
|
+
- **Coincident** (x inside multiple classes) — principal-axis subspace projection (like D4)
|
|
173
|
+
- **Exterior** (x outside all neurons) — predicts using EMA-smoothed displacement and expansion as a trend model
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## sklearn Interface
|
|
178
|
+
|
|
179
|
+
All classifiers are `sklearn.base.BaseEstimator` subclasses and support:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
clf.fit(X, y) # full batch training
|
|
183
|
+
clf.partial_fit(X, y, classes=classes) # incremental update
|
|
184
|
+
clf.predict(X) # returns array of class labels
|
|
185
|
+
clf.classes_ # array of known class labels
|
|
186
|
+
clf.neuron_list # list of neuron dicts
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Compatible with scikit-learn pipelines and cross-validation tools that support `partial_fit`.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Neuron Schema
|
|
194
|
+
|
|
195
|
+
Learned prototypes are stored in `clf.neuron_list` as a list of dicts:
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
{
|
|
199
|
+
'y': class_label,
|
|
200
|
+
'center': np.ndarray, # prototype position
|
|
201
|
+
'cov': np.ndarray, # covariance matrix
|
|
202
|
+
'eig_component': np.ndarray, # PCA eigenvectors
|
|
203
|
+
'width': np.ndarray, # semi-axis lengths
|
|
204
|
+
'n': int, # sample count
|
|
205
|
+
# SCIL, D4, TRACED only:
|
|
206
|
+
'variance': np.ndarray, # eigenvalues
|
|
207
|
+
# TRACED only:
|
|
208
|
+
'displacement': np.ndarray, # EMA displacement vector
|
|
209
|
+
'expansion': np.ndarray, # EMA per-axis expansion rates
|
|
210
|
+
}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Development
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Run tests
|
|
219
|
+
pytest tests/ -v
|
|
220
|
+
|
|
221
|
+
# Run a single test class
|
|
222
|
+
pytest tests/test_classifiers.py::TestTRACED -v
|
|
223
|
+
|
|
224
|
+
# Build for PyPI
|
|
225
|
+
pip install build && python -m build
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## References
|
|
231
|
+
|
|
232
|
+
1. **VEBF** — Jaiyen, S., Lursinsap, C., & Phimoltares, S. (2010). A New Versatile Elliptic Basis Function Neural Network. *IEEE Transactions on Neural Networks*, 21(3), 381–392.
|
|
233
|
+
2. **LRHE** — Jindadoungrut, K., Phimoltares, S., & Lursinsap, C. (2020). Neural Learning With Recoil Behavior in Hyperellipsoidal Structure. *IEEE Access*, 8, 114643–114655.
|
|
234
|
+
3. **SCIL** — Junsawang, P., Phimoltares, S., & Lursinsap, C. (2019). Streaming chunk incremental learning for class-wise data stream classification with fast learning speed and low structural complexity. *PLOS ONE*, 14(9), e0220624.
|
|
235
|
+
4. **SHEF** — Rungcharassang, P., & Lursinsap, C. (2020). Scalable Hyper-Ellipsoidal Function with Projection Ratio for Local Distributed Streaming Data Classification. *IEEE Access*. DOI: 10.1109/ACCESS.2020.2997944.
|
|
236
|
+
5. **D4** — Wongsriphisant, P., Plaimas, K., & Lursinsap, C. (2026). Markov-based continuous learning with diversion of data distribution direction for streaming data in limited memory. *Expert Systems With Applications*, 298, 129818.
|
|
237
|
+
- Corrigendum (Theorem 2): [docs/D4_corrigendum.md](docs/D4_corrigendum.md) · [PDF](docs/D4_corrigendum.pdf)
|
|
238
|
+
6. **TRACED** — Wongsriphisant, P., Plaimas, K., & Lursinsap, C. TRACED: Trend-Adaptive Classification with Ellipsoidal Disambiguation for Resolving Exterior and Coincident Regions in Data Streams. *Preprint submitted to Elsevier*.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Notes
|
|
243
|
+
|
|
244
|
+
- The original monolithic implementation is preserved at [`deprecated/spdal.py`](deprecated/spdal.py) for reference.
|
|
245
|
+
- Refactoring into the modular `src/spdal/` package structure, docstrings, and parameter naming were performed by Claude (Anthropic) and reviewed by the project owner.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "spdal"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Single-pass and discard-after-learn hyperellipsoid classifiers for online learning"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "BSD-3-Clause"}
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Peemapat Wongsriphisant", email = "peemapat.w@gmail.com"},
|
|
13
|
+
]
|
|
14
|
+
requires-python = ">=3.9"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"scikit-learn>=1.3",
|
|
17
|
+
"numpy>=1.24",
|
|
18
|
+
"scipy>=1.10",
|
|
19
|
+
"pandas>=2.0",
|
|
20
|
+
"tqdm>=4.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
dev = ["pytest>=7.0"]
|
|
25
|
+
experiments = ["matplotlib", "river", "ucimlrepo"]
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.packages.find]
|
|
28
|
+
where = ["src"]
|
spdal-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from .lrhe import LRHE
|
|
2
|
+
from .vebf import VEBF
|
|
3
|
+
from .scil import SCIL
|
|
4
|
+
from .shef import SHEF
|
|
5
|
+
from .d4 import D4
|
|
6
|
+
from .traced import TRACED
|
|
7
|
+
from ._utils import get_axis_edge_points, get_axis_sample_points
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"LRHE",
|
|
11
|
+
"VEBF",
|
|
12
|
+
"SCIL",
|
|
13
|
+
"SHEF",
|
|
14
|
+
"D4",
|
|
15
|
+
"TRACED",
|
|
16
|
+
"get_axis_edge_points",
|
|
17
|
+
"get_axis_sample_points",
|
|
18
|
+
]
|