ssbc 0.1.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssbc/__init__.py +50 -2
- ssbc/bootstrap.py +411 -0
- ssbc/cli.py +0 -3
- ssbc/conformal.py +700 -1
- ssbc/cross_conformal.py +425 -0
- ssbc/mcp_server.py +93 -0
- ssbc/operational_bounds_simple.py +367 -0
- ssbc/rigorous_report.py +601 -0
- ssbc/statistics.py +70 -0
- ssbc/utils.py +72 -2
- ssbc/validation.py +409 -0
- ssbc/visualization.py +323 -300
- ssbc-1.1.0.dist-info/METADATA +337 -0
- ssbc-1.1.0.dist-info/RECORD +22 -0
- ssbc-1.1.0.dist-info/licenses/LICENSE +29 -0
- ssbc/ssbc.py +0 -1
- ssbc-0.1.0.dist-info/METADATA +0 -266
- ssbc-0.1.0.dist-info/RECORD +0 -17
- ssbc-0.1.0.dist-info/licenses/LICENSE +0 -21
- {ssbc-0.1.0.dist-info → ssbc-1.1.0.dist-info}/WHEEL +0 -0
- {ssbc-0.1.0.dist-info → ssbc-1.1.0.dist-info}/entry_points.txt +0 -0
- {ssbc-0.1.0.dist-info → ssbc-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,337 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: ssbc
|
3
|
+
Version: 1.1.0
|
4
|
+
Summary: Small Sample Beta Correction - PAC guarantees with small datasets
|
5
|
+
Author-email: Petrus H Zwart <phzwart@lbl.gov>
|
6
|
+
Maintainer-email: Petrus H Zwart <phzwart@lbl.gov>
|
7
|
+
License: BSD License
|
8
|
+
|
9
|
+
Copyright (c) 2025, Petrus H. Zwart / Lawrence Berkeley National Laboratory
|
10
|
+
All rights reserved.
|
11
|
+
|
12
|
+
Redistribution and use in source and binary forms, with or without modification,
|
13
|
+
are permitted provided that the following conditions are met:
|
14
|
+
|
15
|
+
* Redistributions of source code must retain the above copyright notice, this
|
16
|
+
list of conditions and the following disclaimer.
|
17
|
+
|
18
|
+
* Redistributions in binary form must reproduce the above copyright notice, this
|
19
|
+
list of conditions and the following disclaimer in the documentation and/or
|
20
|
+
other materials provided with the distribution.
|
21
|
+
|
22
|
+
* Neither the name of the copyright holder nor the names of its
|
23
|
+
contributors may be used to endorse or promote products derived from this
|
24
|
+
software without specific prior written permission.
|
25
|
+
|
26
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
27
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
28
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
29
|
+
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
30
|
+
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
31
|
+
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
32
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
33
|
+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
34
|
+
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
35
|
+
OF THE POSSIBILITY OF SUCH DAMAGE.
|
36
|
+
|
37
|
+
Project-URL: bugs, https://github.com/phzwart/ssbc/issues
|
38
|
+
Project-URL: changelog, https://github.com/phzwart/ssbc/blob/master/changelog.md
|
39
|
+
Project-URL: homepage, https://github.com/phzwart/ssbc
|
40
|
+
Classifier: Development Status :: 4 - Beta
|
41
|
+
Classifier: Intended Audience :: Science/Research
|
42
|
+
Classifier: Intended Audience :: Developers
|
43
|
+
Classifier: Programming Language :: Python :: 3
|
44
|
+
Classifier: Programming Language :: Python :: 3.10
|
45
|
+
Classifier: Programming Language :: Python :: 3.11
|
46
|
+
Classifier: Programming Language :: Python :: 3.12
|
47
|
+
Classifier: Programming Language :: Python :: 3.13
|
48
|
+
Classifier: Topic :: Scientific/Engineering
|
49
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
50
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
51
|
+
Requires-Python: >=3.10
|
52
|
+
Description-Content-Type: text/markdown
|
53
|
+
License-File: LICENSE
|
54
|
+
Requires-Dist: joblib
|
55
|
+
Requires-Dist: matplotlib
|
56
|
+
Requires-Dist: numpy
|
57
|
+
Requires-Dist: pandas
|
58
|
+
Requires-Dist: plotly
|
59
|
+
Requires-Dist: rich
|
60
|
+
Requires-Dist: scipy
|
61
|
+
Requires-Dist: typer
|
62
|
+
Provides-Extra: test
|
63
|
+
Requires-Dist: coverage; extra == "test"
|
64
|
+
Requires-Dist: pytest; extra == "test"
|
65
|
+
Requires-Dist: pytest-cov; extra == "test"
|
66
|
+
Requires-Dist: ruff; extra == "test"
|
67
|
+
Requires-Dist: ty; extra == "test"
|
68
|
+
Requires-Dist: ipdb; extra == "test"
|
69
|
+
Provides-Extra: dev
|
70
|
+
Requires-Dist: pre-commit; extra == "dev"
|
71
|
+
Requires-Dist: bandit[toml]; extra == "dev"
|
72
|
+
Provides-Extra: docs
|
73
|
+
Requires-Dist: sphinx>=7.0; extra == "docs"
|
74
|
+
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
75
|
+
Requires-Dist: myst-parser; extra == "docs"
|
76
|
+
Provides-Extra: mcp
|
77
|
+
Requires-Dist: fastmcp; extra == "mcp"
|
78
|
+
Dynamic: license-file
|
79
|
+
|
80
|
+
# SSBC: Small-Sample Beta Correction
|
81
|
+
|
82
|
+

|
83
|
+
[](https://ssbc.readthedocs.io/en/latest/?version=latest)
|
84
|
+
|
85
|
+
**Small-Sample Beta Correction** provides PAC (Probably Approximately Correct) guarantees for conformal prediction with small calibration sets.
|
86
|
+
|
87
|
+
* PyPI package: https://pypi.org/project/ssbc/
|
88
|
+
* Free software: MIT License
|
89
|
+
* Documentation: https://ssbc.readthedocs.io.
|
90
|
+
|
91
|
+
## Overview
|
92
|
+
|
93
|
+
SSBC addresses the challenge of constructing valid prediction sets when you have limited calibration data. Traditional conformal prediction assumes large calibration sets, but in practice, data is often scarce. SSBC provides **finite-sample PAC guarantees** and **rigorous operational bounds** for deployment.
|
94
|
+
|
95
|
+
### What Makes SSBC Unique?
|
96
|
+
|
97
|
+
Unlike asymptotic methods, SSBC provides:
|
98
|
+
|
99
|
+
1. **Finite-Sample PAC Coverage** (via SSBC algorithm)
|
100
|
+
- Rigorous guarantees that hold for ANY sample size
|
101
|
+
- Automatically adapts to class imbalance via Mondrian conformal prediction
|
102
|
+
- Example: "≥90% coverage with 95% probability" even with n=50
|
103
|
+
|
104
|
+
2. **Rigorous Operational Bounds** (via LOO-CV + Clopper-Pearson)
|
105
|
+
- PAC-controlled bounds on automation rates, error rates, escalation rates
|
106
|
+
- Confidence intervals account for estimation uncertainty
|
107
|
+
- Example: "Singleton rate [0.85, 0.97] with 90% PAC guarantee"
|
108
|
+
|
109
|
+
3. **Uncertainty Quantification**
|
110
|
+
- Bootstrap analysis for recalibration uncertainty
|
111
|
+
- Cross-conformal validation for finite-sample diagnostics
|
112
|
+
- Empirical validation for verifying theoretical guarantees
|
113
|
+
|
114
|
+
4. **Contract-Ready Guarantees**
|
115
|
+
- Transform theory into deployable systems
|
116
|
+
- Resource planning (human oversight needs)
|
117
|
+
- SLA compliance (performance bounds)
|
118
|
+
|
119
|
+
### Core Statistical Properties
|
120
|
+
|
121
|
+
🎯 **Distribution-Free**: No assumptions about data distribution
|
122
|
+
🎯 **Model-Agnostic**: Works with ANY probabilistic classifier
|
123
|
+
🎯 **Frequentist**: Valid frequentist guarantees, no prior needed
|
124
|
+
🎯 **Non-Bayesian**: No Bayesian assumptions or hyperpriors
|
125
|
+
🎯 **Finite-Sample**: Exact guarantees for small n, not asymptotic
|
126
|
+
🎯 **Exchangeability Only**: Minimal assumption (test/calibration exchangeable)
|
127
|
+
|
128
|
+
**📖 For detailed theory and deployment guide, see [docs/theory.md](docs/theory.md)**
|
129
|
+
|
130
|
+
## Installation
|
131
|
+
|
132
|
+
```bash
|
133
|
+
pip install ssbc
|
134
|
+
```
|
135
|
+
|
136
|
+
Or from source:
|
137
|
+
|
138
|
+
```bash
|
139
|
+
git clone https://github.com/phzwart/ssbc.git
|
140
|
+
cd ssbc
|
141
|
+
pip install -e .
|
142
|
+
```
|
143
|
+
|
144
|
+
## Quick Start
|
145
|
+
|
146
|
+
### Unified Workflow (Recommended)
|
147
|
+
|
148
|
+
The complete workflow is available through a single function:
|
149
|
+
|
150
|
+
```python
|
151
|
+
from ssbc import BinaryClassifierSimulator, generate_rigorous_pac_report
|
152
|
+
|
153
|
+
# Generate or load calibration data
|
154
|
+
sim = BinaryClassifierSimulator(
|
155
|
+
p_class1=0.2,
|
156
|
+
beta_params_class0=(1, 7),
|
157
|
+
beta_params_class1=(5, 2),
|
158
|
+
seed=42
|
159
|
+
)
|
160
|
+
labels, probs = sim.generate(n_samples=100)
|
161
|
+
|
162
|
+
# Generate comprehensive PAC report with operational bounds
|
163
|
+
report = generate_rigorous_pac_report(
|
164
|
+
labels=labels,
|
165
|
+
probs=probs,
|
166
|
+
alpha_target=0.10, # Target 90% coverage
|
167
|
+
delta=0.10, # 90% PAC confidence
|
168
|
+
test_size=1000, # Expected deployment size
|
169
|
+
use_union_bound=True, # Simultaneous guarantees
|
170
|
+
|
171
|
+
# Optional uncertainty analyses
|
172
|
+
run_bootstrap=True, # Recalibration uncertainty
|
173
|
+
n_bootstrap=1000,
|
174
|
+
simulator=sim,
|
175
|
+
|
176
|
+
run_cross_conformal=True, # Finite-sample diagnostics
|
177
|
+
n_folds=10,
|
178
|
+
)
|
179
|
+
|
180
|
+
# Access results
|
181
|
+
pac_bounds = report['pac_bounds_marginal']
|
182
|
+
print(f"Singleton rate: {pac_bounds['singleton_rate_bounds']}")
|
183
|
+
print(f"Expected: {pac_bounds['expected_singleton_rate']:.3f}")
|
184
|
+
```
|
185
|
+
|
186
|
+
**Output includes:**
|
187
|
+
- ✅ PAC coverage guarantees (SSBC-corrected thresholds)
|
188
|
+
- ✅ Rigorous operational bounds (singleton, doublet, abstention, error rates)
|
189
|
+
- ✅ Per-class and marginal statistics
|
190
|
+
- ✅ Optional: Bootstrap uncertainty intervals
|
191
|
+
- ✅ Optional: Cross-conformal validation diagnostics
|
192
|
+
|
193
|
+
### Core SSBC Algorithm
|
194
|
+
|
195
|
+
For fine-grained control, use the core algorithm directly:
|
196
|
+
|
197
|
+
```python
|
198
|
+
from ssbc import ssbc_correct
|
199
|
+
|
200
|
+
result = ssbc_correct(
|
201
|
+
alpha_target=0.10, # Target 10% miscoverage
|
202
|
+
n=50, # Calibration set size
|
203
|
+
delta=0.10, # PAC parameter (90% confidence)
|
204
|
+
mode="beta" # Infinite test window
|
205
|
+
)
|
206
|
+
|
207
|
+
print(f"Corrected α: {result.alpha_corrected:.4f}")
|
208
|
+
print(f"u*: {result.u_star}")
|
209
|
+
```
|
210
|
+
|
211
|
+
### Validation and Diagnostics
|
212
|
+
|
213
|
+
Empirically validate your PAC bounds:
|
214
|
+
|
215
|
+
```python
|
216
|
+
from ssbc import validate_pac_bounds, print_validation_results
|
217
|
+
|
218
|
+
# Generate report
|
219
|
+
report = generate_rigorous_pac_report(labels, probs, delta=0.10)
|
220
|
+
|
221
|
+
# Validate empirically
|
222
|
+
validation = validate_pac_bounds(
|
223
|
+
report=report,
|
224
|
+
simulator=sim,
|
225
|
+
test_size=1000,
|
226
|
+
n_trials=10000
|
227
|
+
)
|
228
|
+
|
229
|
+
# Print results
|
230
|
+
print_validation_results(validation)
|
231
|
+
```
|
232
|
+
|
233
|
+
Cross-conformal validation for calibration diagnostics:
|
234
|
+
|
235
|
+
```python
|
236
|
+
from ssbc import cross_conformal_validation
|
237
|
+
|
238
|
+
results = cross_conformal_validation(
|
239
|
+
labels=labels,
|
240
|
+
probs=probs,
|
241
|
+
n_folds=10,
|
242
|
+
alpha_target=0.10,
|
243
|
+
delta=0.10
|
244
|
+
)
|
245
|
+
|
246
|
+
print(f"Singleton rate: {results['marginal']['singleton']['mean']:.3f}")
|
247
|
+
print(f"Std dev: {results['marginal']['singleton']['std']:.3f}")
|
248
|
+
```
|
249
|
+
|
250
|
+
## Key Features
|
251
|
+
|
252
|
+
- ✅ **Small-Sample Correction**: PAC-valid conformal prediction for small calibration sets
|
253
|
+
- ✅ **Mondrian Conformal Prediction**: Per-class calibration for handling class imbalance
|
254
|
+
- ✅ **PAC Operational Bounds**: Rigorous bounds on deployment rates (LOO-CV + Clopper-Pearson)
|
255
|
+
- ✅ **Bootstrap Uncertainty**: Recalibration variability analysis
|
256
|
+
- ✅ **Cross-Conformal Validation**: Finite-sample diagnostics via K-fold
|
257
|
+
- ✅ **Empirical Validation**: Verify theoretical guarantees in practice
|
258
|
+
- ✅ **Comprehensive Statistics**: Detailed reporting with exact confidence intervals
|
259
|
+
- ✅ **Hyperparameter Tuning**: Interactive parallel coordinates visualization
|
260
|
+
- ✅ **Simulation Tools**: Built-in data generators for testing
|
261
|
+
|
262
|
+
## Examples
|
263
|
+
|
264
|
+
The `examples/` directory contains comprehensive demonstrations:
|
265
|
+
|
266
|
+
### Essential Examples
|
267
|
+
|
268
|
+
```bash
|
269
|
+
# Core algorithm
|
270
|
+
python examples/ssbc_core_example.py
|
271
|
+
|
272
|
+
# Mondrian conformal prediction
|
273
|
+
python examples/mondrian_conformal_example.py
|
274
|
+
|
275
|
+
# Complete workflow with all uncertainty analyses
|
276
|
+
python examples/complete_workflow_example.py
|
277
|
+
|
278
|
+
# SLA/deployment contracts
|
279
|
+
python examples/sla_example.py
|
280
|
+
|
281
|
+
# Alpha scanning across thresholds
|
282
|
+
python examples/alpha_scan_example.py
|
283
|
+
|
284
|
+
# Empirical validation
|
285
|
+
python examples/pac_validation_example.py
|
286
|
+
```
|
287
|
+
|
288
|
+
## Understanding the Output
|
289
|
+
|
290
|
+
### Per-Class Statistics (Conditioned on True Label)
|
291
|
+
|
292
|
+
For each class, the report shows:
|
293
|
+
- **Abstentions**: Empty prediction sets (no confident prediction)
|
294
|
+
- **Singletons**: Single-label predictions (automated decisions)
|
295
|
+
- **Doublets**: Both labels included (escalated to human review)
|
296
|
+
- **Singleton Error Rate**: P(error | singleton prediction)
|
297
|
+
|
298
|
+
### Marginal Statistics (Deployment View)
|
299
|
+
|
300
|
+
Overall performance metrics (deployment perspective):
|
301
|
+
- **Coverage**: Fraction of predictions containing the true label
|
302
|
+
- **Automation Rate**: Fraction of confident predictions (singletons)
|
303
|
+
- **Escalation Rate**: Fraction requiring human review (doublets + abstentions)
|
304
|
+
- **Error Rate**: Among automated decisions
|
305
|
+
|
306
|
+
### PAC Operational Bounds
|
307
|
+
|
308
|
+
Rigorous bounds on all operational metrics:
|
309
|
+
- Computed via Leave-One-Out Cross-Validation (LOO-CV)
|
310
|
+
- Clopper-Pearson confidence intervals account for estimation uncertainty
|
311
|
+
- Union bound ensures all metrics hold simultaneously
|
312
|
+
- Valid for any future test set from the same distribution
|
313
|
+
|
314
|
+
## Citation
|
315
|
+
|
316
|
+
If you use SSBC in your research, please cite:
|
317
|
+
|
318
|
+
```bibtex
|
319
|
+
@software{ssbc2024,
|
320
|
+
author = {Zwart, Petrus H},
|
321
|
+
title = {SSBC: Small-Sample Beta Correction},
|
322
|
+
year = {2024},
|
323
|
+
url = {https://github.com/phzwart/ssbc}
|
324
|
+
}
|
325
|
+
```
|
326
|
+
|
327
|
+
## Contributing
|
328
|
+
|
329
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
330
|
+
|
331
|
+
## License
|
332
|
+
|
333
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
334
|
+
|
335
|
+
## Credits
|
336
|
+
|
337
|
+
This package was created with [Cookiecutter](https://github.com/audreyfeldroy/cookiecutter) and the [audreyfeldroy/cookiecutter-pypackage](https://github.com/audreyfeldroy/cookiecutter-pypackage) project template.
|
@@ -0,0 +1,22 @@
|
|
1
|
+
ssbc/__init__.py,sha256=yPIygDSVgQc_T-kY28H-pFmgBtjUrDj-ZK70wmnlub4,2468
|
2
|
+
ssbc/__main__.py,sha256=Qd-f8z2Q2vpiEP2x6PBFsJrpACWDVxFKQk820MhFmHo,59
|
3
|
+
ssbc/bootstrap.py,sha256=r6XzDDWK7wZxk6aOOQoghq9-q4UVB4OhQlILKE3RYEs,13424
|
4
|
+
ssbc/cli.py,sha256=qLrFXorFCqaq07Z9iKpL8U95xOMujFcsNFUAaQQzh-w,380
|
5
|
+
ssbc/conformal.py,sha256=9aAYzfqCuEE0yiuJ-hHNPiakVqd4jCtt7-pTebBL4pc,39766
|
6
|
+
ssbc/core.py,sha256=2qDiMgBloevB4v62Cl_CKuGhwlYF_bUd2nI_V4dSPPo,6857
|
7
|
+
ssbc/cross_conformal.py,sha256=6e2Cd7bRALOoSN4BGULhDyhQCW65skHUldgglUXgNro,14318
|
8
|
+
ssbc/hyperparameter.py,sha256=e61j6koQ_l8sFvv5XRYa0x3RRPbhGIV3FxnXn1N3v28,9096
|
9
|
+
ssbc/mcp_server.py,sha256=ploZmktX_Vy6mMLJc5yOiNyi7ylqHmCij5GenGC1DS0,3142
|
10
|
+
ssbc/operational_bounds_simple.py,sha256=rBSnOMf_8JokdpqBI1FSgK85v5znf_QcrBeQExUvUJk,12890
|
11
|
+
ssbc/rigorous_report.py,sha256=ECWiVjkWyIeQBBCudaJaLw6MYnJYtmcYaVX6uevLn6k,24242
|
12
|
+
ssbc/simulation.py,sha256=V0nOUoxdDr5tPTCn1A5PRPJpwINqSi8kqwwk9msUwIo,5158
|
13
|
+
ssbc/statistics.py,sha256=W_Fk8RD2gOUgStP8UbaLeAFOKcdl4p2nTTRzoOF_XNI,6792
|
14
|
+
ssbc/utils.py,sha256=HLQ5JoEKrmn4Ny03vqWD-tnhV_RwocfOdTYAj2nOtFM,2808
|
15
|
+
ssbc/validation.py,sha256=z8sfFtEywLssPTS-pqJJHpgQ6XUmZGYI7pA52OM74uE,17169
|
16
|
+
ssbc/visualization.py,sha256=7y8i11LbCQOWGKQ_Xu3BeUsPySIjMWTbAeBHXsHGIHY,20819
|
17
|
+
ssbc-1.1.0.dist-info/licenses/LICENSE,sha256=1xdG5-3J-gq2HJ1wyR6Le6ZHfG6f1RuJuBY31N_EEpM,1546
|
18
|
+
ssbc-1.1.0.dist-info/METADATA,sha256=YLmA7B0ybQxYqYOzz0ncMSQbUDNlqCRypCcyrwqJeBs,11833
|
19
|
+
ssbc-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
20
|
+
ssbc-1.1.0.dist-info/entry_points.txt,sha256=hgp8rkP_J-wInF8y42DBMGmqJ1bDGMgHQGq12Y3tMs4,38
|
21
|
+
ssbc-1.1.0.dist-info/top_level.txt,sha256=jkM9L2hWrag3UXW32pWlFmuB_L-G1el4oUbnMTJdDv0,5
|
22
|
+
ssbc-1.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,29 @@
|
|
1
|
+
BSD License
|
2
|
+
|
3
|
+
Copyright (c) 2025, Petrus H. Zwart / Lawrence Berkeley National Laboratory
|
4
|
+
All rights reserved.
|
5
|
+
|
6
|
+
Redistribution and use in source and binary forms, with or without modification,
|
7
|
+
are permitted provided that the following conditions are met:
|
8
|
+
|
9
|
+
* Redistributions of source code must retain the above copyright notice, this
|
10
|
+
list of conditions and the following disclaimer.
|
11
|
+
|
12
|
+
* Redistributions in binary form must reproduce the above copyright notice, this
|
13
|
+
list of conditions and the following disclaimer in the documentation and/or
|
14
|
+
other materials provided with the distribution.
|
15
|
+
|
16
|
+
* Neither the name of the copyright holder nor the names of its
|
17
|
+
contributors may be used to endorse or promote products derived from this
|
18
|
+
software without specific prior written permission.
|
19
|
+
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
21
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
22
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
23
|
+
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
24
|
+
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
25
|
+
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
27
|
+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
28
|
+
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
29
|
+
OF THE POSSIBILITY OF SUCH DAMAGE.
|
ssbc/ssbc.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
"""Main module."""
|
ssbc-0.1.0.dist-info/METADATA
DELETED
@@ -1,266 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: ssbc
|
3
|
-
Version: 0.1.0
|
4
|
-
Summary: Small Sample Beta Correction - PAC guarantees with small datasets
|
5
|
-
Author-email: Petrus H Zwart <phzwart@lbl.gov>
|
6
|
-
Maintainer-email: Petrus H Zwart <phzwart@lbl.gov>
|
7
|
-
License-Expression: MIT
|
8
|
-
Project-URL: bugs, https://github.com/phzwart/ssbc/issues
|
9
|
-
Project-URL: changelog, https://github.com/phzwart/ssbc/blob/master/changelog.md
|
10
|
-
Project-URL: homepage, https://github.com/phzwart/ssbc
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
12
|
-
Classifier: Intended Audience :: Science/Research
|
13
|
-
Classifier: Intended Audience :: Developers
|
14
|
-
Classifier: Programming Language :: Python :: 3
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
17
|
-
Classifier: Programming Language :: Python :: 3.12
|
18
|
-
Classifier: Programming Language :: Python :: 3.13
|
19
|
-
Classifier: Topic :: Scientific/Engineering
|
20
|
-
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
21
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
22
|
-
Requires-Python: >=3.10
|
23
|
-
Description-Content-Type: text/markdown
|
24
|
-
License-File: LICENSE
|
25
|
-
Requires-Dist: matplotlib
|
26
|
-
Requires-Dist: numpy
|
27
|
-
Requires-Dist: pandas
|
28
|
-
Requires-Dist: plotly
|
29
|
-
Requires-Dist: rich
|
30
|
-
Requires-Dist: scipy
|
31
|
-
Requires-Dist: typer
|
32
|
-
Provides-Extra: test
|
33
|
-
Requires-Dist: coverage; extra == "test"
|
34
|
-
Requires-Dist: pytest; extra == "test"
|
35
|
-
Requires-Dist: pytest-cov; extra == "test"
|
36
|
-
Requires-Dist: ruff; extra == "test"
|
37
|
-
Requires-Dist: ty; extra == "test"
|
38
|
-
Requires-Dist: ipdb; extra == "test"
|
39
|
-
Provides-Extra: dev
|
40
|
-
Requires-Dist: pre-commit; extra == "dev"
|
41
|
-
Requires-Dist: bandit[toml]; extra == "dev"
|
42
|
-
Dynamic: license-file
|
43
|
-
|
44
|
-
# SSBC: Small-Sample Beta Correction
|
45
|
-
|
46
|
-

|
47
|
-
[](https://ssbc.readthedocs.io/en/latest/?version=latest)
|
48
|
-
|
49
|
-
**Small-Sample Beta Correction** provides PAC (Probably Approximately Correct) guarantees for conformal prediction with small calibration sets.
|
50
|
-
|
51
|
-
* PyPI package: https://pypi.org/project/ssbc/
|
52
|
-
* Free software: MIT License
|
53
|
-
* Documentation: https://ssbc.readthedocs.io.
|
54
|
-
|
55
|
-
## Overview
|
56
|
-
|
57
|
-
SSBC addresses the challenge of constructing valid prediction sets when you have limited calibration data. Traditional conformal prediction assumes large calibration sets, but in practice, data is often scarce. SSBC provides finite-sample correction with PAC guarantees.
|
58
|
-
|
59
|
-
### Key Features
|
60
|
-
|
61
|
-
- ✅ **Small-Sample Correction**: PAC-valid conformal prediction for small calibration sets
|
62
|
-
- ✅ **Mondrian Conformal Prediction**: Per-class calibration for handling class imbalance
|
63
|
-
- ✅ **Comprehensive Statistics**: Detailed reporting with Clopper-Pearson confidence intervals
|
64
|
-
- ✅ **Hyperparameter Tuning**: Interactive parallel coordinates visualization for parameter optimization
|
65
|
-
- ✅ **Simulation Tools**: Built-in data generators for testing and validation
|
66
|
-
|
67
|
-
## Installation
|
68
|
-
|
69
|
-
```bash
|
70
|
-
pip install ssbc
|
71
|
-
```
|
72
|
-
|
73
|
-
Or from source:
|
74
|
-
|
75
|
-
```bash
|
76
|
-
git clone https://github.com/yourusername/ssbc.git
|
77
|
-
cd ssbc
|
78
|
-
pip install -e .
|
79
|
-
```
|
80
|
-
|
81
|
-
## Quick Start
|
82
|
-
|
83
|
-
```python
|
84
|
-
import numpy as np
|
85
|
-
from ssbc import (
|
86
|
-
ssbc_correct,
|
87
|
-
BinaryClassifierSimulator,
|
88
|
-
split_by_class,
|
89
|
-
mondrian_conformal_calibrate,
|
90
|
-
report_prediction_stats,
|
91
|
-
)
|
92
|
-
|
93
|
-
# 1. Generate simulated data
|
94
|
-
sim = BinaryClassifierSimulator(
|
95
|
-
p_class1=0.1,
|
96
|
-
beta_params_class0=(2, 8),
|
97
|
-
beta_params_class1=(8, 2),
|
98
|
-
seed=42
|
99
|
-
)
|
100
|
-
labels, probs = sim.generate(n_samples=100)
|
101
|
-
|
102
|
-
# 2. Split by class for Mondrian CP
|
103
|
-
class_data = split_by_class(labels, probs)
|
104
|
-
|
105
|
-
# 3. Calibrate with SSBC correction
|
106
|
-
cal_result, pred_stats = mondrian_conformal_calibrate(
|
107
|
-
class_data=class_data,
|
108
|
-
alpha_target=0.10, # 10% miscoverage
|
109
|
-
delta=0.10, # 90% PAC guarantee
|
110
|
-
mode="beta"
|
111
|
-
)
|
112
|
-
|
113
|
-
# 4. Generate comprehensive report
|
114
|
-
summary = report_prediction_stats(pred_stats, cal_result, verbose=True)
|
115
|
-
```
|
116
|
-
|
117
|
-
## Core Algorithm: SSBC
|
118
|
-
|
119
|
-
The SSBC algorithm finds the optimal corrected miscoverage rate α' that satisfies:
|
120
|
-
|
121
|
-
**P(Coverage(α') ≥ 1 - α_target) ≥ 1 - δ**
|
122
|
-
|
123
|
-
```python
|
124
|
-
from ssbc import ssbc_correct
|
125
|
-
|
126
|
-
result = ssbc_correct(
|
127
|
-
alpha_target=0.10, # Target 10% miscoverage
|
128
|
-
n=50, # Calibration set size
|
129
|
-
delta=0.10, # PAC parameter (90% confidence)
|
130
|
-
mode="beta" # Infinite test window
|
131
|
-
)
|
132
|
-
|
133
|
-
print(f"Corrected α: {result.alpha_corrected:.4f}")
|
134
|
-
print(f"u*: {result.u_star}")
|
135
|
-
```
|
136
|
-
|
137
|
-
### Parameters
|
138
|
-
|
139
|
-
- `alpha_target`: Target miscoverage rate (e.g., 0.10 for 90% coverage)
|
140
|
-
- `n`: Calibration set size
|
141
|
-
- `delta`: PAC risk tolerance (probability of violating guarantee)
|
142
|
-
- `mode`: "beta" (infinite test) or "beta-binomial" (finite test)
|
143
|
-
|
144
|
-
## Module Structure
|
145
|
-
|
146
|
-
The library is organized into focused modules:
|
147
|
-
|
148
|
-
### Core Modules
|
149
|
-
|
150
|
-
- **`ssbc.core`**: Core SSBC algorithm (`ssbc_correct`, `SSBCResult`)
|
151
|
-
- **`ssbc.conformal`**: Mondrian conformal prediction (`mondrian_conformal_calibrate`, `split_by_class`)
|
152
|
-
- **`ssbc.statistics`**: Statistical utilities (`clopper_pearson_intervals`, `cp_interval`)
|
153
|
-
|
154
|
-
### Analysis & Visualization
|
155
|
-
|
156
|
-
- **`ssbc.visualization`**: Reporting and plotting (`report_prediction_stats`, `plot_parallel_coordinates_plotly`)
|
157
|
-
- **`ssbc.hyperparameter`**: Parameter tuning (`sweep_hyperparams_and_collect`, `sweep_and_plot_parallel_plotly`)
|
158
|
-
|
159
|
-
### Testing & Simulation
|
160
|
-
|
161
|
-
- **`ssbc.simulation`**: Data generators (`BinaryClassifierSimulator`)
|
162
|
-
|
163
|
-
## Examples
|
164
|
-
|
165
|
-
The `examples/` directory contains comprehensive demonstrations:
|
166
|
-
|
167
|
-
### 1. Core SSBC Algorithm
|
168
|
-
```bash
|
169
|
-
python examples/ssbc_core_example.py
|
170
|
-
```
|
171
|
-
Demonstrates the SSBC algorithm for different calibration set sizes.
|
172
|
-
|
173
|
-
### 2. Mondrian Conformal Prediction
|
174
|
-
```bash
|
175
|
-
python examples/mondrian_conformal_example.py
|
176
|
-
```
|
177
|
-
Complete workflow: simulation → calibration → reporting.
|
178
|
-
|
179
|
-
### 3. Hyperparameter Sweep
|
180
|
-
```bash
|
181
|
-
python examples/hyperparameter_sweep_example.py
|
182
|
-
```
|
183
|
-
Interactive parameter tuning with parallel coordinates visualization.
|
184
|
-
|
185
|
-
## Hyperparameter Tuning
|
186
|
-
|
187
|
-
Sweep over α and δ values to find optimal configurations:
|
188
|
-
|
189
|
-
```python
|
190
|
-
from ssbc import sweep_and_plot_parallel_plotly
|
191
|
-
import numpy as np
|
192
|
-
|
193
|
-
# Define grid
|
194
|
-
alpha_grid = np.arange(0.05, 0.20, 0.05)
|
195
|
-
delta_grid = np.arange(0.05, 0.20, 0.05)
|
196
|
-
|
197
|
-
# Run sweep and visualize
|
198
|
-
df, fig = sweep_and_plot_parallel_plotly(
|
199
|
-
class_data=class_data,
|
200
|
-
alpha_0=alpha_grid, delta_0=delta_grid,
|
201
|
-
alpha_1=alpha_grid, delta_1=delta_grid,
|
202
|
-
color='err_all' # Color by error rate
|
203
|
-
)
|
204
|
-
|
205
|
-
# Save interactive plot
|
206
|
-
fig.write_html("sweep_results.html")
|
207
|
-
|
208
|
-
# Analyze results
|
209
|
-
print(df[['a0', 'd0', 'cov', 'sing_rate', 'err_all']].head())
|
210
|
-
```
|
211
|
-
|
212
|
-
The interactive plot allows you to:
|
213
|
-
- Brush (select) ranges on any axis to filter configurations
|
214
|
-
- Explore trade-offs between coverage, automation, and error rates
|
215
|
-
- Identify Pareto-optimal hyperparameter settings
|
216
|
-
|
217
|
-
## Understanding the Output
|
218
|
-
|
219
|
-
### Per-Class Statistics (Conditioned on True Label)
|
220
|
-
|
221
|
-
For each class, the report shows:
|
222
|
-
- **Abstentions**: Empty prediction sets
|
223
|
-
- **Singletons**: Confident predictions (automated decisions)
|
224
|
-
- Correct: True label in singleton set
|
225
|
-
- Incorrect: True label not in singleton set
|
226
|
-
- **Doublets**: Both labels included (escalated to human review)
|
227
|
-
|
228
|
-
### Marginal Statistics (Deployment View)
|
229
|
-
|
230
|
-
Overall performance metrics ignoring true labels:
|
231
|
-
- **Coverage**: Fraction of predictions containing the true label
|
232
|
-
- **Singleton rate**: Fraction of confident predictions (automation level)
|
233
|
-
- **Escalation rate**: Fraction requiring human review
|
234
|
-
- **Error rates**: By predicted class and overall
|
235
|
-
|
236
|
-
### PAC Bounds
|
237
|
-
|
238
|
-
The report includes theoretical and observed singleton error rates:
|
239
|
-
- **α'_bound**: Theoretical upper bound from PAC analysis
|
240
|
-
- **α'_observed**: Observed error rate on calibration data
|
241
|
-
- ✓ if observed ≤ bound (PAC guarantee satisfied)
|
242
|
-
|
243
|
-
## Citation
|
244
|
-
|
245
|
-
If you use SSBC in your research, please cite:
|
246
|
-
|
247
|
-
```bibtex
|
248
|
-
@software{ssbc2024,
|
249
|
-
author = {Zwart, Petrus H},
|
250
|
-
title = {SSBC: Small-Sample Beta Correction},
|
251
|
-
year = {2024},
|
252
|
-
url = {https://github.com/yourusername/ssbc}
|
253
|
-
}
|
254
|
-
```
|
255
|
-
|
256
|
-
## Contributing
|
257
|
-
|
258
|
-
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
259
|
-
|
260
|
-
## License
|
261
|
-
|
262
|
-
MIT License - see [LICENSE](LICENSE) file for details.
|
263
|
-
|
264
|
-
## Credits
|
265
|
-
|
266
|
-
This package was created with [Cookiecutter](https://github.com/audreyfeldroy/cookiecutter) and the [audreyfeldroy/cookiecutter-pypackage](https://github.com/audreyfeldroy/cookiecutter-pypackage) project template.
|
ssbc-0.1.0.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
ssbc/__init__.py,sha256=T5Xr2tMLW0SIsphFeAKqbjHK_cjixLA8yUgdqVK53Io,1236
|
2
|
-
ssbc/__main__.py,sha256=Qd-f8z2Q2vpiEP2x6PBFsJrpACWDVxFKQk820MhFmHo,59
|
3
|
-
ssbc/cli.py,sha256=i5PLSsXS3glAFH1yVdd0YJAQVDNLd8z3jPJiPo5IW8k,436
|
4
|
-
ssbc/conformal.py,sha256=4Xq6OXJ3_vm8A177_goIYPic-Bb-qzAWLnSRJhTZMik,12340
|
5
|
-
ssbc/core.py,sha256=2qDiMgBloevB4v62Cl_CKuGhwlYF_bUd2nI_V4dSPPo,6857
|
6
|
-
ssbc/hyperparameter.py,sha256=e61j6koQ_l8sFvv5XRYa0x3RRPbhGIV3FxnXn1N3v28,9096
|
7
|
-
ssbc/simulation.py,sha256=V0nOUoxdDr5tPTCn1A5PRPJpwINqSi8kqwwk9msUwIo,5158
|
8
|
-
ssbc/ssbc.py,sha256=h0hwdogXGFqerm-5ZPeT-irPn91pCcQRjiHThXsRzEk,19
|
9
|
-
ssbc/statistics.py,sha256=UzQe6kPBIVHsGe-NMt4nECdqqIpZRnsls1Bnl2GNBgI,4824
|
10
|
-
ssbc/utils.py,sha256=1RxiNQM7rpegUEPuFvOlbSGesR4gnWpXr82bZQCgELM,77
|
11
|
-
ssbc/visualization.py,sha256=aAjnDKYMPMFEyD965f_8DMccTrhWeEvtFvKuqy1Cflk,19647
|
12
|
-
ssbc-0.1.0.dist-info/licenses/LICENSE,sha256=YOKwrV5OLHoJ_e8T4lkylNpjDdEEe4vvMG0tmUxYxco,1072
|
13
|
-
ssbc-0.1.0.dist-info/METADATA,sha256=jFZIeDJz1p6BWVvF0985urA-2g0_jnD-ETi3z5BdWwI,8357
|
14
|
-
ssbc-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
15
|
-
ssbc-0.1.0.dist-info/entry_points.txt,sha256=hgp8rkP_J-wInF8y42DBMGmqJ1bDGMgHQGq12Y3tMs4,38
|
16
|
-
ssbc-0.1.0.dist-info/top_level.txt,sha256=jkM9L2hWrag3UXW32pWlFmuB_L-G1el4oUbnMTJdDv0,5
|
17
|
-
ssbc-0.1.0.dist-info/RECORD,,
|