suq 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
suq-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 AaltoML
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
suq-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.4
2
+ Name: suq
3
+ Version: 0.1.0
4
+ Summary: Streamlined Uncertainty Quantification (SUQ)
5
+ Home-page: https://github.com/AaltoML/SUQ
6
+ Author: Rui Li, Marcus Klasson, Arno Solin, Martin Trapp
7
+ License: MIT
8
+ Classifier: Programming Language :: Python :: 3
9
+ License-File: LICENSE
10
+ Requires-Dist: torch>=1.10
11
+ Requires-Dist: numpy>=1.21
12
+ Requires-Dist: tqdm>=4.60
13
+ Dynamic: author
14
+ Dynamic: classifier
15
+ Dynamic: home-page
16
+ Dynamic: license
17
+ Dynamic: license-file
18
+ Dynamic: requires-dist
19
+ Dynamic: summary
suq-0.1.0/README.md ADDED
@@ -0,0 +1,137 @@
1
+ # SUQ: Streamlined Uncertainty Quantification
2
+
3
+ ![image](suq.png)
4
+
5
+ This repository contains an open-source library implementation of Streamlined Uncertainty Quantification (SUQ) used in the paper *Streamlining Prediction in Bayesian Deep Learning* accepted at ICLR 2025.
6
+
7
+ <table>
8
+ <tr>
9
+ <td>
10
+ <strong> Streamlining Prediction in Bayesian Deep Learning</strong><br>
11
+ Rui Li, Marcus Klasson, Arno Solin, Martin Trapp<br>
12
+ <strong>International Conference on Learning Representations (ICLR 2025)</strong><br>
13
+ <a href="https://arxiv.org/abs/2411.18425"><img alt="Paper" src="https://img.shields.io/badge/-Paper-gray"></a>
14
+ <a href="https://github.com/AaltoML/suq"><img alt="Code" src="https://img.shields.io/badge/-Code-gray" ></a>
15
+ </td>
16
+ </tr>
17
+ </table>
18
+
19
+ ## SUQ Library
20
+ ### 📦 Installation
21
+ Install the stable version with `pip`:
22
+ ```bash
23
+ pip install suq
24
+ ```
25
+
26
+ Or install the latest development version from source:
27
+ ```bash
28
+ git clone https://github.com/AaltoML/SUQ.git
29
+ cd SUQ
30
+ pip install -e .
31
+ ```
32
+
33
+ ### 🚀 Simple Usage
34
+ #### Streamline Whole Network
35
+ ```python
36
+ from suq import streamline_mlp, streamline_vit
37
+
38
+ # Load your model and estimated posterior
39
+ model = ...
40
+ posterior = ...
41
+
42
+ # Wrap an MLP model with SUQ
43
+ suq_model = streamline_mlp(
44
+ model=model,
45
+ posterior=posterior,
46
+ covariance_structure='diag', # currently only 'diag' is supported
47
+ likelihood='classification' # or 'regression'
48
+ )
49
+
50
+ # Wrap a Vision Transformer with SUQ
51
+ suq_model = streamline_vit(
52
+ model=model,
53
+ posterior=posterior,
54
+ covariance_structure='diag', # currently only 'diag' is supported
55
+ likelihood='classification',
56
+ MLP_deterministic=True,
57
+ Attn_deterministic=False,
58
+ attention_diag_cov=False,
59
+ num_det_blocks=10
60
+ )
61
+
62
+ # Fit scale factor
63
+ suq_model.fit(train_loader, scale_fit_epoch, scale_fit_lr)
64
+
65
+ # Make a prediction
66
+ pred = suq_model(X)
67
+ ```
68
+
69
+ 📄 See [`examples/mlp_la_example.py`](examples/mlp_la_example.py), [`examples/vit_la_example.py`](examples/vit_la_example.py), [`examples/mlp_vi_example.py`](examples/mlp_vi_example.py), and [`examples/vit_vi_example.py`](examples/vit_vi_example.py) for full, self-contained examples that cover:
70
+ - Training the MAP model
71
+ - Estimating the posterior with Laplace or IVON (mean field VI)
72
+ - Wrapping the model into a streamlined SUQ version
73
+
74
+
75
+ > ❗ **Note on Vision Transformer Support**
76
+ Currently, SUQ only supports Vision Transformers implemented in the same style as [`examples/vit_model.py`](examples/vit_model.py). If you're using a different ViT implementation, compatibility is not guaranteed.
77
+
78
+ #### Streamline Individual Layers
79
+
80
+ In addition to wrapping full models like MLPs or ViTs, SUQ allows you to manually wrap individual layers in your own networks.
81
+
82
+ You can directly import supported modules from `suq.streamline_layer`.
83
+
84
+ Supported Layers:
85
+
86
+ | Layer Type | SUQ Wrapper |
87
+ |--------------------|-------------------------------|
88
+ | `nn.Linear` | `SUQ_Linear_Diag` |
89
+ | `nn.ReLU`, etc. | `SUQ_Activation_Diag` |
90
+ | `nn.BatchNorm1d` | `SUQ_BatchNorm_Diag` |
91
+ | `nn.LayerNorm` | `SUQ_LayerNorm_Diag` |
92
+ | `MLP (Transformer block)` | `SUQ_TransformerMLP_Diag` |
93
+ | `Attention` | `SUQ_Attention_Diag` |
94
+ | `Transformer block` | `SUQ_Transformer_Block_Diag` |
95
+ | `Final classifier` | `SUQ_Classifier_Diag` |
96
+
97
+ Example:
98
+
99
+ ```python
100
+ from suq.streamline_layer import SUQ_Linear_Diag
101
+
102
+ # Define a standard linear layer
103
+ linear_layer = nn.Linear(100, 50)
104
+ # Provide posterior variances for weights and biases
105
+ w_var = torch.rand(50, 100)
106
+ b_var = torch.rand(50)
107
+
108
+ # Wrap the layer with SUQ's linear module
109
+ streamlined_layer = SUQ_Linear_Diag(linear_layer, w_var, b_var)
110
+
111
+ # Provide input mean and variance (e.g., from a previous layer)
112
+ input_mean = torch.randn(32, 100)
113
+ input_var = torch.rand(32, 100)
114
+
115
+ # Forward pass through the streamlined layer
116
+ pred_mean, pred_var = streamlined_layer(input_mean, input_var)
117
+ ```
118
+
119
+ ### 🛠️ TODO
120
+ - Extend support to other Transformer implementations
121
+ - Add Kronecker covariance
122
+ - Add full covariance
123
+
124
+
125
+ ## Citation
126
+
127
+ ```bibtex
128
+ @inproceedings{li2025streamlining,
129
+ title = {Streamlining Prediction in Bayesian Deep Learning},
130
+ author = {Rui Li, Marcus Klasson, Arno Solin and Martin Trapp},
131
+ booktitle = {International Conference on Learning Representations ({ICLR})},
132
+ year = {2025}
133
+ }
134
+ ```
135
+
136
+ ## License
137
+ This software is provided under the MIT license.
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+ build-backend = "setuptools.build_meta"
suq-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
suq-0.1.0/setup.py ADDED
@@ -0,0 +1,19 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='suq',
5
+ version='0.1.0',
6
+ description='Streamlined Uncertainty Quantification (SUQ)',
7
+ author='Rui Li, Marcus Klasson, Arno Solin, Martin Trapp',
8
+ url='https://github.com/AaltoML/SUQ',
9
+ packages=find_packages(exclude=["examples*", "tests*"]),
10
+ install_requires=[
11
+ 'torch>=1.10',
12
+ 'numpy>=1.21',
13
+ 'tqdm>=4.60'
14
+ ],
15
+ license='MIT',
16
+ classifiers=[
17
+ 'Programming Language :: Python :: 3',
18
+ ],
19
+ )
@@ -0,0 +1,10 @@
1
+ from .diag_suq_mlp import SUQ_MLP_Diag
2
+
3
+ def streamline_mlp(model, posterior, covariance_structure, likelihood, scale_init = 1.0):
4
+ if covariance_structure == 'diag':
5
+ return SUQ_MLP_Diag(org_model = model,
6
+ posterior_variance = posterior,
7
+ likelihood = likelihood,
8
+ scale_init = scale_init)
9
+ else:
10
+ raise NotImplementedError(f"Covariance structure '{covariance_structure}' is not implemented.")
@@ -0,0 +1,14 @@
1
+ from .diag_suq_transformer import SUQ_ViT_Diag
2
+
3
+ def streamline_vit(model, posterior, covariance_structure, likelihood, MLP_deterministic, Attn_deterministic, attention_diag_cov, num_det_blocks, scale_init = 1.0):
4
+ if covariance_structure == 'diag':
5
+ return SUQ_ViT_Diag(ViT = model,
6
+ posterior_variance = posterior,
7
+ MLP_determinstic = MLP_deterministic,
8
+ Attn_determinstic = Attn_deterministic,
9
+ likelihood = likelihood,
10
+ attention_diag_cov = attention_diag_cov,
11
+ num_det_blocks = num_det_blocks,
12
+ scale_init = scale_init)
13
+ else:
14
+ raise NotImplementedError(f"Covariance structure '{covariance_structure}' is not implemented.")
@@ -0,0 +1,4 @@
1
+ from .SUQ_MLP import streamline_mlp
2
+ from .SUQ_ViT import streamline_vit
3
+
4
+ __all__ = ["streamline_mlp", "streamline_vit"]
@@ -0,0 +1,181 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ from torch.distributions import Categorical
6
+ from torch.distributions.normal import Normal
7
+ from torch.utils.data import DataLoader
8
+
9
+ from suq.utils.utils import torch_dataset
10
+
11
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
12
+
13
+ class SUQ_Base(nn.Module):
14
+ """
15
+ Base class for SUQ models.
16
+
17
+ Provides core functionality for:
18
+ - Managing likelihood type (regression or classification)
19
+ - Probit-based approximation for classification
20
+ - NLPD-based fitting of the scale factor
21
+
22
+ Inputs:
23
+ likelihood (str): Either 'classification' or 'regression'
24
+ scale_init (float): Initial value for the scale factor parameter
25
+ """
26
+
27
+ def __init__(self, likelihood, scale_init):
28
+ super().__init__()
29
+
30
+ if likelihood not in ['classification', 'regression']:
31
+ raise ValueError(f"Invalid likelihood type {likelihood}")
32
+
33
+ self.likelihood = likelihood
34
+ self.scale_factor = nn.Parameter(torch.Tensor([scale_init]).to(device))
35
+
36
+ def probit_approximation(self, out_mean, out_var):
37
+ """
38
+ Applies a probit approximation to compute class probabilities from the latent Gaussian distribution.
39
+
40
+ Inputs:
41
+ out_mean (Tensor): Latent function mean, shape [B, C]
42
+ out_var (Tensor): Latent function variance, shape [B, C] or [B, C, C]
43
+
44
+ Outputs:
45
+ posterior_predict_mean (Tensor): Predicted class probabilities, shape [B, C]
46
+ """
47
+
48
+ if out_var.dim() == 3:
49
+ kappa = 1 / torch.sqrt(1. + np.pi / 8 * out_var.diagonal(dim1=1, dim2=2))
50
+ else:
51
+ kappa = 1 / torch.sqrt(1. + np.pi / 8 * out_var)
52
+
53
+ posterior_predict_mean = torch.softmax(kappa * out_mean, dim=-1)
54
+ return posterior_predict_mean
55
+
56
+ def fit_scale_factor(self, data_loader, n_epoches, lr, speedup = True, verbose = False):
57
+ """
58
+ Fits the scale factor for predictive variance using negative log predictive density (NLPD).
59
+
60
+ Inputs:
61
+ data_loader (DataLoader): Dataloader containing (input, target) pairs
62
+ n_epoches (int): Number of epochs for optimization
63
+ lr (float): Learning rate for scale optimizer
64
+ speedup (bool): If True (classification only), caches forward pass outputs to accelerate fitting
65
+ verbose (bool): If True, prints NLPD at each epoch
66
+
67
+ Outputs:
68
+ total_train_nlpd (List[float]): Average NLPD per epoch over training data
69
+ """
70
+ print("fit scale factor")
71
+ optimizer = torch.optim.Adam([self.scale_factor], lr)
72
+ total_train_nlpd = []
73
+
74
+ # store intermediate result and pack it into a data loader, so we only need to do one forward pass
75
+ if speedup:
76
+
77
+ if self.likelihood == 'regression':
78
+ raise ValueError(f"Speed up not supported for regression atm")
79
+
80
+ if self.likelihood == 'classification':
81
+
82
+ f_mean = []
83
+ f_var = []
84
+ labels = []
85
+
86
+ for (X, y) in tqdm(data_loader, desc= "packing f_mean f_var into a dataloader"):
87
+ out_mean, out_var = self.forward_latent(X.to(device))
88
+ f_mean.append(out_mean.detach().cpu().numpy())
89
+ f_var.append(out_var.detach().cpu().numpy())
90
+ if y.dim() == 2:
91
+ labels.append(y.numpy().argmax(1).reshape(-1, 1))
92
+ if y.dim() == 1:
93
+ labels.append(y.numpy().reshape(-1, 1))
94
+
95
+ f_mean = np.vstack(f_mean)
96
+ f_var = np.vstack(f_var)
97
+ labels = np.vstack(labels)
98
+
99
+ scale_fit_dataset = torch_dataset(f_mean, f_var, labels)
100
+ scale_fit_dataloader = DataLoader(scale_fit_dataset, batch_size=16, shuffle=True)
101
+
102
+ for epoch in tqdm(range(n_epoches), desc="fitting scaling factor"):
103
+ running_nlpd = 0
104
+ for data_pair in scale_fit_dataloader:
105
+ x_mean, x_var_label = data_pair
106
+ num_class = x_mean.shape[1]
107
+ x_mean = x_mean.to(device)
108
+ x_var, label = x_var_label.split(num_class, dim=1)
109
+ x_var = x_var.to(device)
110
+ label = label.to(device)
111
+
112
+ optimizer.zero_grad()
113
+ # make prediction
114
+ x_var = x_var / self.scale_factor.to(device)
115
+ posterior_predict_mean = self.probit_approximation(x_mean, x_var)
116
+ # construct log posterior predictive distribution
117
+ posterior_predictive_dist = Categorical(posterior_predict_mean)
118
+ # calculate nlpd and update
119
+ nlpd = -posterior_predictive_dist.log_prob(label).mean()
120
+ nlpd.backward()
121
+ optimizer.step()
122
+ # log nlpd
123
+ running_nlpd += nlpd.item()
124
+ total_train_nlpd.append(running_nlpd / len(scale_fit_dataloader))
125
+ if verbose:
126
+ print(f"epoch {epoch + 1}, nlpd {total_train_nlpd[-1]}")
127
+
128
+ del scale_fit_dataloader
129
+ del scale_fit_dataset
130
+
131
+ else:
132
+
133
+ if self.likelihood == 'classification':
134
+ for epoch in tqdm(range(n_epoches), desc="fitting scaling factor"):
135
+ running_nlpd = 0
136
+ for (data, label) in data_loader:
137
+
138
+ data = data.to(device)
139
+ label = label.to(device)
140
+
141
+ optimizer.zero_grad()
142
+ # make prediction
143
+ posterior_predict_mean = self.forward(data)
144
+ # construct log posterior predictive distribution
145
+ posterior_predictive_dist = Categorical(posterior_predict_mean)
146
+ # calculate nlpd and update
147
+ nlpd = -posterior_predictive_dist.log_prob(label).mean()
148
+ nlpd.backward()
149
+ optimizer.step()
150
+ # log nlpd
151
+ running_nlpd += nlpd.item()
152
+ total_train_nlpd.append(running_nlpd / len(data_loader))
153
+ if verbose:
154
+ print(f"epoch {epoch + 1}, nlpd {total_train_nlpd[-1]}")
155
+
156
+
157
+ if self.likelihood == 'regression':
158
+ for epoch in tqdm(range(n_epoches), desc="fitting scaling factor"):
159
+ running_nlpd = 0
160
+ for (data, label) in data_loader:
161
+ data = data.to(device)
162
+ label = label.to(device)
163
+
164
+ optimizer.zero_grad()
165
+ # make prediction
166
+ posterior_predict_mean, posterior_predict_var = self.forward(data)
167
+ # construct log posterior predictive distribution
168
+ posterior_predictive_dist = Normal(posterior_predict_mean, posterior_predict_var.sqrt())
169
+ # calculate nlpd and update
170
+ nlpd = -posterior_predictive_dist.log_prob(label).mean()
171
+ nlpd.backward()
172
+ optimizer.step()
173
+ # log nlpd
174
+ running_nlpd += nlpd.item()
175
+
176
+ total_train_nlpd.append(running_nlpd / len(data_loader))
177
+
178
+ if verbose:
179
+ print(f"epoch {epoch + 1}, nlpd {total_train_nlpd[-1]}")
180
+
181
+ return total_train_nlpd