suq 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- suq-0.1.0/LICENSE +21 -0
- suq-0.1.0/PKG-INFO +19 -0
- suq-0.1.0/README.md +137 -0
- suq-0.1.0/pyproject.toml +3 -0
- suq-0.1.0/setup.cfg +4 -0
- suq-0.1.0/setup.py +19 -0
- suq-0.1.0/suq/SUQ_MLP.py +10 -0
- suq-0.1.0/suq/SUQ_ViT.py +14 -0
- suq-0.1.0/suq/__init__.py +4 -0
- suq-0.1.0/suq/base_suq.py +181 -0
- suq-0.1.0/suq/diag_suq_mlp.py +308 -0
- suq-0.1.0/suq/diag_suq_transformer.py +627 -0
- suq-0.1.0/suq/streamline_layer.py +23 -0
- suq-0.1.0/suq.egg-info/PKG-INFO +19 -0
- suq-0.1.0/suq.egg-info/SOURCES.txt +16 -0
- suq-0.1.0/suq.egg-info/dependency_links.txt +1 -0
- suq-0.1.0/suq.egg-info/requires.txt +3 -0
- suq-0.1.0/suq.egg-info/top_level.txt +1 -0
suq-0.1.0/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 AaltoML
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
suq-0.1.0/PKG-INFO
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: suq
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Streamlined Uncertainty Quantification (SUQ)
|
5
|
+
Home-page: https://github.com/AaltoML/SUQ
|
6
|
+
Author: Rui Li, Marcus Klasson, Arno Solin, Martin Trapp
|
7
|
+
License: MIT
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
License-File: LICENSE
|
10
|
+
Requires-Dist: torch>=1.10
|
11
|
+
Requires-Dist: numpy>=1.21
|
12
|
+
Requires-Dist: tqdm>=4.60
|
13
|
+
Dynamic: author
|
14
|
+
Dynamic: classifier
|
15
|
+
Dynamic: home-page
|
16
|
+
Dynamic: license
|
17
|
+
Dynamic: license-file
|
18
|
+
Dynamic: requires-dist
|
19
|
+
Dynamic: summary
|
suq-0.1.0/README.md
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
# SUQ: Streamlined Uncertainty Quantification
|
2
|
+
|
3
|
+

|
4
|
+
|
5
|
+
This repository contains an open-source library implementation of Streamlined Uncertainty Quantification (SUQ) used in the paper *Streamlining Prediction in Bayesian Deep Learning* accepted at ICLR 2025.
|
6
|
+
|
7
|
+
<table>
|
8
|
+
<tr>
|
9
|
+
<td>
|
10
|
+
<strong> Streamlining Prediction in Bayesian Deep Learning</strong><br>
|
11
|
+
Rui Li, Marcus Klasson, Arno Solin, Martin Trapp<br>
|
12
|
+
<strong>International Conference on Learning Representations (ICLR 2025)</strong><br>
|
13
|
+
<a href="https://arxiv.org/abs/2411.18425"><img alt="Paper" src="https://img.shields.io/badge/-Paper-gray"></a>
|
14
|
+
<a href="https://github.com/AaltoML/suq"><img alt="Code" src="https://img.shields.io/badge/-Code-gray" ></a>
|
15
|
+
</td>
|
16
|
+
</tr>
|
17
|
+
</table>
|
18
|
+
|
19
|
+
## SUQ Library
|
20
|
+
### 📦 Installation
|
21
|
+
Install the stable version with `pip`:
|
22
|
+
```bash
|
23
|
+
pip install suq
|
24
|
+
```
|
25
|
+
|
26
|
+
Or install the latest development version from source:
|
27
|
+
```bash
|
28
|
+
git clone https://github.com/AaltoML/SUQ.git
|
29
|
+
cd SUQ
|
30
|
+
pip install -e .
|
31
|
+
```
|
32
|
+
|
33
|
+
### 🚀 Simple Usage
|
34
|
+
#### Streamline Whole Network
|
35
|
+
```python
|
36
|
+
from suq import streamline_mlp, streamline_vit
|
37
|
+
|
38
|
+
# Load your model and estimated posterior
|
39
|
+
model = ...
|
40
|
+
posterior = ...
|
41
|
+
|
42
|
+
# Wrap an MLP model with SUQ
|
43
|
+
suq_model = streamline_mlp(
|
44
|
+
model=model,
|
45
|
+
posterior=posterior,
|
46
|
+
covariance_structure='diag', # currently only 'diag' is supported
|
47
|
+
likelihood='classification' # or 'regression'
|
48
|
+
)
|
49
|
+
|
50
|
+
# Wrap a Vision Transformer with SUQ
|
51
|
+
suq_model = streamline_vit(
|
52
|
+
model=model,
|
53
|
+
posterior=posterior,
|
54
|
+
covariance_structure='diag', # currently only 'diag' is supported
|
55
|
+
likelihood='classification',
|
56
|
+
MLP_deterministic=True,
|
57
|
+
Attn_deterministic=False,
|
58
|
+
attention_diag_cov=False,
|
59
|
+
num_det_blocks=10
|
60
|
+
)
|
61
|
+
|
62
|
+
# Fit scale factor
|
63
|
+
suq_model.fit(train_loader, scale_fit_epoch, scale_fit_lr)
|
64
|
+
|
65
|
+
# Make a prediction
|
66
|
+
pred = suq_model(X)
|
67
|
+
```
|
68
|
+
|
69
|
+
📄 See [`examples/mlp_la_example.py`](examples/mlp_la_example.py), [`examples/vit_la_example.py`](examples/vit_la_example.py), [`examples/mlp_vi_example.py`](examples/mlp_vi_example.py), and [`examples/vit_vi_example.py`](examples/vit_vi_example.py) for full, self-contained examples that cover:
|
70
|
+
- Training the MAP model
|
71
|
+
- Estimating the posterior with Laplace or IVON (mean field VI)
|
72
|
+
- Wrapping the model into a streamlined SUQ version
|
73
|
+
|
74
|
+
|
75
|
+
> ❗ **Note on Vision Transformer Support**
|
76
|
+
Currently, SUQ only supports Vision Transformers implemented in the same style as [`examples/vit_model.py`](examples/vit_model.py). If you're using a different ViT implementation, compatibility is not guaranteed.
|
77
|
+
|
78
|
+
#### Streamline Individual Layers
|
79
|
+
|
80
|
+
In addition to wrapping full models like MLPs or ViTs, SUQ allows you to manually wrap individual layers in your own networks.
|
81
|
+
|
82
|
+
You can directly import supported modules from `suq.streamline_layer`.
|
83
|
+
|
84
|
+
Supported Layers:
|
85
|
+
|
86
|
+
| Layer Type | SUQ Wrapper |
|
87
|
+
|--------------------|-------------------------------|
|
88
|
+
| `nn.Linear` | `SUQ_Linear_Diag` |
|
89
|
+
| `nn.ReLU`, etc. | `SUQ_Activation_Diag` |
|
90
|
+
| `nn.BatchNorm1d` | `SUQ_BatchNorm_Diag` |
|
91
|
+
| `nn.LayerNorm` | `SUQ_LayerNorm_Diag` |
|
92
|
+
| `MLP (Transformer block)` | `SUQ_TransformerMLP_Diag` |
|
93
|
+
| `Attention` | `SUQ_Attention_Diag` |
|
94
|
+
| `Transformer block` | `SUQ_Transformer_Block_Diag` |
|
95
|
+
| `Final classifier` | `SUQ_Classifier_Diag` |
|
96
|
+
|
97
|
+
Example:
|
98
|
+
|
99
|
+
```python
|
100
|
+
from suq.streamline_layer import SUQ_Linear_Diag
|
101
|
+
|
102
|
+
# Define a standard linear layer
|
103
|
+
linear_layer = nn.Linear(100, 50)
|
104
|
+
# Provide posterior variances for weights and biases
|
105
|
+
w_var = torch.rand(50, 100)
|
106
|
+
b_var = torch.rand(50)
|
107
|
+
|
108
|
+
# Wrap the layer with SUQ's linear module
|
109
|
+
streamlined_layer = SUQ_Linear_Diag(linear_layer, w_var, b_var)
|
110
|
+
|
111
|
+
# Provide input mean and variance (e.g., from a previous layer)
|
112
|
+
input_mean = torch.randn(32, 100)
|
113
|
+
input_var = torch.rand(32, 100)
|
114
|
+
|
115
|
+
# Forward pass through the streamlined layer
|
116
|
+
pred_mean, pred_var = streamlined_layer(input_mean, input_var)
|
117
|
+
```
|
118
|
+
|
119
|
+
### 🛠️ TODO
|
120
|
+
- Extend support to other Transformer implementations
|
121
|
+
- Add Kronecker covariance
|
122
|
+
- Add full covariance
|
123
|
+
|
124
|
+
|
125
|
+
## Citation
|
126
|
+
|
127
|
+
```bibtex
|
128
|
+
@inproceedings{li2025streamlining,
|
129
|
+
title = {Streamlining Prediction in Bayesian Deep Learning},
|
130
|
+
author = {Rui Li, Marcus Klasson, Arno Solin and Martin Trapp},
|
131
|
+
booktitle = {International Conference on Learning Representations ({ICLR})},
|
132
|
+
year = {2025}
|
133
|
+
}
|
134
|
+
```
|
135
|
+
|
136
|
+
## License
|
137
|
+
This software is provided under the MIT license.
|
suq-0.1.0/pyproject.toml
ADDED
suq-0.1.0/setup.cfg
ADDED
suq-0.1.0/setup.py
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
from setuptools import setup, find_packages
|
2
|
+
|
3
|
+
setup(
|
4
|
+
name='suq',
|
5
|
+
version='0.1.0',
|
6
|
+
description='Streamlined Uncertainty Quantification (SUQ)',
|
7
|
+
author='Rui Li, Marcus Klasson, Arno Solin, Martin Trapp',
|
8
|
+
url='https://github.com/AaltoML/SUQ',
|
9
|
+
packages=find_packages(exclude=["examples*", "tests*"]),
|
10
|
+
install_requires=[
|
11
|
+
'torch>=1.10',
|
12
|
+
'numpy>=1.21',
|
13
|
+
'tqdm>=4.60'
|
14
|
+
],
|
15
|
+
license='MIT',
|
16
|
+
classifiers=[
|
17
|
+
'Programming Language :: Python :: 3',
|
18
|
+
],
|
19
|
+
)
|
suq-0.1.0/suq/SUQ_MLP.py
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
from .diag_suq_mlp import SUQ_MLP_Diag
|
2
|
+
|
3
|
+
def streamline_mlp(model, posterior, covariance_structure, likelihood, scale_init = 1.0):
|
4
|
+
if covariance_structure == 'diag':
|
5
|
+
return SUQ_MLP_Diag(org_model = model,
|
6
|
+
posterior_variance = posterior,
|
7
|
+
likelihood = likelihood,
|
8
|
+
scale_init = scale_init)
|
9
|
+
else:
|
10
|
+
raise NotImplementedError(f"Covariance structure '{covariance_structure}' is not implemented.")
|
suq-0.1.0/suq/SUQ_ViT.py
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
from .diag_suq_transformer import SUQ_ViT_Diag
|
2
|
+
|
3
|
+
def streamline_vit(model, posterior, covariance_structure, likelihood, MLP_deterministic, Attn_deterministic, attention_diag_cov, num_det_blocks, scale_init = 1.0):
|
4
|
+
if covariance_structure == 'diag':
|
5
|
+
return SUQ_ViT_Diag(ViT = model,
|
6
|
+
posterior_variance = posterior,
|
7
|
+
MLP_determinstic = MLP_deterministic,
|
8
|
+
Attn_determinstic = Attn_deterministic,
|
9
|
+
likelihood = likelihood,
|
10
|
+
attention_diag_cov = attention_diag_cov,
|
11
|
+
num_det_blocks = num_det_blocks,
|
12
|
+
scale_init = scale_init)
|
13
|
+
else:
|
14
|
+
raise NotImplementedError(f"Covariance structure '{covariance_structure}' is not implemented.")
|
@@ -0,0 +1,181 @@
|
|
1
|
+
import torch
|
2
|
+
import torch.nn as nn
|
3
|
+
import numpy as np
|
4
|
+
from tqdm import tqdm
|
5
|
+
from torch.distributions import Categorical
|
6
|
+
from torch.distributions.normal import Normal
|
7
|
+
from torch.utils.data import DataLoader
|
8
|
+
|
9
|
+
from suq.utils.utils import torch_dataset
|
10
|
+
|
11
|
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
12
|
+
|
13
|
+
class SUQ_Base(nn.Module):
|
14
|
+
"""
|
15
|
+
Base class for SUQ models.
|
16
|
+
|
17
|
+
Provides core functionality for:
|
18
|
+
- Managing likelihood type (regression or classification)
|
19
|
+
- Probit-based approximation for classification
|
20
|
+
- NLPD-based fitting of the scale factor
|
21
|
+
|
22
|
+
Inputs:
|
23
|
+
likelihood (str): Either 'classification' or 'regression'
|
24
|
+
scale_init (float): Initial value for the scale factor parameter
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, likelihood, scale_init):
|
28
|
+
super().__init__()
|
29
|
+
|
30
|
+
if likelihood not in ['classification', 'regression']:
|
31
|
+
raise ValueError(f"Invalid likelihood type {likelihood}")
|
32
|
+
|
33
|
+
self.likelihood = likelihood
|
34
|
+
self.scale_factor = nn.Parameter(torch.Tensor([scale_init]).to(device))
|
35
|
+
|
36
|
+
def probit_approximation(self, out_mean, out_var):
|
37
|
+
"""
|
38
|
+
Applies a probit approximation to compute class probabilities from the latent Gaussian distribution.
|
39
|
+
|
40
|
+
Inputs:
|
41
|
+
out_mean (Tensor): Latent function mean, shape [B, C]
|
42
|
+
out_var (Tensor): Latent function variance, shape [B, C] or [B, C, C]
|
43
|
+
|
44
|
+
Outputs:
|
45
|
+
posterior_predict_mean (Tensor): Predicted class probabilities, shape [B, C]
|
46
|
+
"""
|
47
|
+
|
48
|
+
if out_var.dim() == 3:
|
49
|
+
kappa = 1 / torch.sqrt(1. + np.pi / 8 * out_var.diagonal(dim1=1, dim2=2))
|
50
|
+
else:
|
51
|
+
kappa = 1 / torch.sqrt(1. + np.pi / 8 * out_var)
|
52
|
+
|
53
|
+
posterior_predict_mean = torch.softmax(kappa * out_mean, dim=-1)
|
54
|
+
return posterior_predict_mean
|
55
|
+
|
56
|
+
def fit_scale_factor(self, data_loader, n_epoches, lr, speedup = True, verbose = False):
|
57
|
+
"""
|
58
|
+
Fits the scale factor for predictive variance using negative log predictive density (NLPD).
|
59
|
+
|
60
|
+
Inputs:
|
61
|
+
data_loader (DataLoader): Dataloader containing (input, target) pairs
|
62
|
+
n_epoches (int): Number of epochs for optimization
|
63
|
+
lr (float): Learning rate for scale optimizer
|
64
|
+
speedup (bool): If True (classification only), caches forward pass outputs to accelerate fitting
|
65
|
+
verbose (bool): If True, prints NLPD at each epoch
|
66
|
+
|
67
|
+
Outputs:
|
68
|
+
total_train_nlpd (List[float]): Average NLPD per epoch over training data
|
69
|
+
"""
|
70
|
+
print("fit scale factor")
|
71
|
+
optimizer = torch.optim.Adam([self.scale_factor], lr)
|
72
|
+
total_train_nlpd = []
|
73
|
+
|
74
|
+
# store intermediate result and pack it into a data loader, so we only need to do one forward pass
|
75
|
+
if speedup:
|
76
|
+
|
77
|
+
if self.likelihood == 'regression':
|
78
|
+
raise ValueError(f"Speed up not supported for regression atm")
|
79
|
+
|
80
|
+
if self.likelihood == 'classification':
|
81
|
+
|
82
|
+
f_mean = []
|
83
|
+
f_var = []
|
84
|
+
labels = []
|
85
|
+
|
86
|
+
for (X, y) in tqdm(data_loader, desc= "packing f_mean f_var into a dataloader"):
|
87
|
+
out_mean, out_var = self.forward_latent(X.to(device))
|
88
|
+
f_mean.append(out_mean.detach().cpu().numpy())
|
89
|
+
f_var.append(out_var.detach().cpu().numpy())
|
90
|
+
if y.dim() == 2:
|
91
|
+
labels.append(y.numpy().argmax(1).reshape(-1, 1))
|
92
|
+
if y.dim() == 1:
|
93
|
+
labels.append(y.numpy().reshape(-1, 1))
|
94
|
+
|
95
|
+
f_mean = np.vstack(f_mean)
|
96
|
+
f_var = np.vstack(f_var)
|
97
|
+
labels = np.vstack(labels)
|
98
|
+
|
99
|
+
scale_fit_dataset = torch_dataset(f_mean, f_var, labels)
|
100
|
+
scale_fit_dataloader = DataLoader(scale_fit_dataset, batch_size=16, shuffle=True)
|
101
|
+
|
102
|
+
for epoch in tqdm(range(n_epoches), desc="fitting scaling factor"):
|
103
|
+
running_nlpd = 0
|
104
|
+
for data_pair in scale_fit_dataloader:
|
105
|
+
x_mean, x_var_label = data_pair
|
106
|
+
num_class = x_mean.shape[1]
|
107
|
+
x_mean = x_mean.to(device)
|
108
|
+
x_var, label = x_var_label.split(num_class, dim=1)
|
109
|
+
x_var = x_var.to(device)
|
110
|
+
label = label.to(device)
|
111
|
+
|
112
|
+
optimizer.zero_grad()
|
113
|
+
# make prediction
|
114
|
+
x_var = x_var / self.scale_factor.to(device)
|
115
|
+
posterior_predict_mean = self.probit_approximation(x_mean, x_var)
|
116
|
+
# construct log posterior predictive distribution
|
117
|
+
posterior_predictive_dist = Categorical(posterior_predict_mean)
|
118
|
+
# calculate nlpd and update
|
119
|
+
nlpd = -posterior_predictive_dist.log_prob(label).mean()
|
120
|
+
nlpd.backward()
|
121
|
+
optimizer.step()
|
122
|
+
# log nlpd
|
123
|
+
running_nlpd += nlpd.item()
|
124
|
+
total_train_nlpd.append(running_nlpd / len(scale_fit_dataloader))
|
125
|
+
if verbose:
|
126
|
+
print(f"epoch {epoch + 1}, nlpd {total_train_nlpd[-1]}")
|
127
|
+
|
128
|
+
del scale_fit_dataloader
|
129
|
+
del scale_fit_dataset
|
130
|
+
|
131
|
+
else:
|
132
|
+
|
133
|
+
if self.likelihood == 'classification':
|
134
|
+
for epoch in tqdm(range(n_epoches), desc="fitting scaling factor"):
|
135
|
+
running_nlpd = 0
|
136
|
+
for (data, label) in data_loader:
|
137
|
+
|
138
|
+
data = data.to(device)
|
139
|
+
label = label.to(device)
|
140
|
+
|
141
|
+
optimizer.zero_grad()
|
142
|
+
# make prediction
|
143
|
+
posterior_predict_mean = self.forward(data)
|
144
|
+
# construct log posterior predictive distribution
|
145
|
+
posterior_predictive_dist = Categorical(posterior_predict_mean)
|
146
|
+
# calculate nlpd and update
|
147
|
+
nlpd = -posterior_predictive_dist.log_prob(label).mean()
|
148
|
+
nlpd.backward()
|
149
|
+
optimizer.step()
|
150
|
+
# log nlpd
|
151
|
+
running_nlpd += nlpd.item()
|
152
|
+
total_train_nlpd.append(running_nlpd / len(data_loader))
|
153
|
+
if verbose:
|
154
|
+
print(f"epoch {epoch + 1}, nlpd {total_train_nlpd[-1]}")
|
155
|
+
|
156
|
+
|
157
|
+
if self.likelihood == 'regression':
|
158
|
+
for epoch in tqdm(range(n_epoches), desc="fitting scaling factor"):
|
159
|
+
running_nlpd = 0
|
160
|
+
for (data, label) in data_loader:
|
161
|
+
data = data.to(device)
|
162
|
+
label = label.to(device)
|
163
|
+
|
164
|
+
optimizer.zero_grad()
|
165
|
+
# make prediction
|
166
|
+
posterior_predict_mean, posterior_predict_var = self.forward(data)
|
167
|
+
# construct log posterior predictive distribution
|
168
|
+
posterior_predictive_dist = Normal(posterior_predict_mean, posterior_predict_var.sqrt())
|
169
|
+
# calculate nlpd and update
|
170
|
+
nlpd = -posterior_predictive_dist.log_prob(label).mean()
|
171
|
+
nlpd.backward()
|
172
|
+
optimizer.step()
|
173
|
+
# log nlpd
|
174
|
+
running_nlpd += nlpd.item()
|
175
|
+
|
176
|
+
total_train_nlpd.append(running_nlpd / len(data_loader))
|
177
|
+
|
178
|
+
if verbose:
|
179
|
+
print(f"epoch {epoch + 1}, nlpd {total_train_nlpd[-1]}")
|
180
|
+
|
181
|
+
return total_train_nlpd
|