unicorn-eval 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ # Copyright 2025 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,33 @@
1
+ # Copyright 2025 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from unicorn_eval.adaptors.classification import (
16
+ KNN,
17
+ WeightedKNN,
18
+ LogisticRegression,
19
+ LinearProbing,
20
+ MLP,
21
+ )
22
+ from unicorn_eval.adaptors.detection import DensityMap
23
+ from unicorn_eval.adaptors.segmentation import SegmentationUpsampling
24
+
25
+ __all__ = [
26
+ "KNN",
27
+ "WeightedKNN",
28
+ "LogisticRegression",
29
+ "LinearProbing",
30
+ "MLP",
31
+ "DensityMap",
32
+ "SegmentationUpsampling",
33
+ ]
@@ -0,0 +1,65 @@
1
+ from abc import ABC, abstractmethod
2
+ import numpy as np
3
+
4
+
5
+ class BaseAdaptor(ABC):
6
+ """
7
+ Base class for coarse-grained tasks like classification or regression.
8
+ Expects train_feats, train_labels, and test_feats.
9
+ """
10
+
11
+ def __init__(self, train_feats: np.ndarray, train_labels: np.ndarray, test_feats: np.ndarray):
12
+ self.train_feats = train_feats
13
+ self.train_labels = train_labels
14
+ self.test_feats = test_feats
15
+
16
+ @abstractmethod
17
+ def fit(self):
18
+ """
19
+ Fit the model using train_feats and train_labels.
20
+ """
21
+ pass
22
+
23
+ @abstractmethod
24
+ def predict(self) -> np.ndarray:
25
+ """
26
+ Predict using test_feats.
27
+ Returns:
28
+ np.ndarray: Predictions for the test set.
29
+ """
30
+ pass
31
+
32
+
33
+ class DenseAdaptor(BaseAdaptor):
34
+ """
35
+ Base class for dense prediction tasks like detection or segmentation.
36
+ Expects train_feats, train_labels, test_feats, train_coordinates, and test_coordinates.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ train_feats: np.ndarray,
42
+ train_labels: np.ndarray,
43
+ test_feats: np.ndarray,
44
+ train_coordinates: np.ndarray,
45
+ test_coordinates: np.ndarray,
46
+ ):
47
+ super().__init__(train_feats, train_labels, test_feats)
48
+ self.train_coordinates = train_coordinates
49
+ self.test_coordinates = test_coordinates
50
+
51
+ @abstractmethod
52
+ def fit(self):
53
+ """
54
+ Fit the model using train_feats, train_labels, and train_coordinates.
55
+ """
56
+ pass
57
+
58
+ @abstractmethod
59
+ def predict(self) -> np.ndarray:
60
+ """
61
+ Predict using test_feats and test_coordinates.
62
+ Returns:
63
+ np.ndarray: Predictions for the test set.
64
+ """
65
+ pass
@@ -0,0 +1,380 @@
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.optim as optim
5
+ import sklearn
6
+ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
7
+ from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
8
+
9
+ from unicorn_eval.adaptors.base import BaseAdaptor
10
+
11
+
12
+ def preprocess_features(
13
+ train_feats: np.ndarray,
14
+ test_feats: np.ndarray,
15
+ center: bool = True,
16
+ normalize_feats: bool = True,
17
+ ) -> tuple[np.ndarray, np.ndarray]:
18
+ """
19
+ Preprocess feature vectors by centering and normalizing, optionally converting to NumPy.
20
+
21
+ Args:
22
+ train_feats: Training feature array (N_train, D)
23
+ test_feats: Test feature array (N_test, D)
24
+ center: Whether to subtract mean of training features
25
+ normalize_feats: Whether to apply L2 normalization
26
+
27
+ Returns:
28
+ Preprocessed (train_feats, test_feats) as torch.Tensor or np.ndarray
29
+ """
30
+ if center:
31
+ mean_feat = train_feats.mean(dim=0, keepdims=True)
32
+ train_feats = train_feats - mean_feat
33
+ test_feats = test_feats - mean_feat
34
+
35
+ if normalize_feats:
36
+ train_feats = train_feats / np.linalg.norm(train_feats, axis=-1, keepdims=True)
37
+ test_feats = test_feats / np.linalg.norm(test_feats, axis=-1, keepdims=True)
38
+
39
+ return train_feats, test_feats
40
+
41
+
42
+ class KNN(BaseAdaptor):
43
+ """
44
+ A class to perform K-Nearest Neighbors (KNN) probing for classification or regression tasks.
45
+ This class provides functionality to preprocess features and apply KNN models for
46
+ classification or regression tasks. It supports feature centering and L2 normalization.
47
+ Attributes:
48
+ k (int): Number of neighbors to consider for KNN.
49
+ task_type (Literal["classification", "regression"]): The type of task to perform.
50
+ num_workers (int): Number of parallel jobs for sklearn models. Default is 8.
51
+ center_feats (bool): Whether to subtract the mean from features. Default is False.
52
+ normalize_feats (bool): Whether to L2 normalize features. Default is False.
53
+ Methods:
54
+ fit(train_feats: np.ndarray, train_labels: np.ndarray):
55
+ Fits the KNN model using the provided training features and labels.
56
+ predict(test_feats: np.ndarray) -> np.ndarray:
57
+ Predicts the labels or values for the provided test features.
58
+ preprocess_features(train_feats: np.ndarray, test_feats: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
59
+ Preprocesses the training and test features by centering and/or normalizing them.
60
+ """
61
+ def __init__(self, train_feats, train_labels, test_feats, k, task_type, num_workers=8, center_feats=False, normalize_feats=False):
62
+ super().__init__(train_feats, train_labels, test_feats)
63
+ self.k = k
64
+ self.task_type = task_type
65
+ self.num_workers = num_workers
66
+ self.center_feats = center_feats
67
+ self.normalize_feats = normalize_feats
68
+ self.model = None
69
+
70
+ def fit(self):
71
+ train_feats, _ = preprocess_features(
72
+ self.train_feats, self.test_feats, center=self.center_feats, normalize_feats=self.normalize_feats
73
+ )
74
+
75
+ if self.task_type == "classification":
76
+ self.model = KNeighborsClassifier(n_neighbors=self.k, n_jobs=self.num_workers)
77
+ elif self.task_type == "regression":
78
+ self.model = KNeighborsRegressor(n_neighbors=self.k, n_jobs=self.num_workers)
79
+ else:
80
+ raise ValueError(f"Unknown task type: {self.task_type}")
81
+
82
+ self.model.fit(train_feats, self.train_labels)
83
+
84
+ def predict(self) -> np.ndarray:
85
+ _, test_feats = preprocess_features(
86
+ self.train_feats, self.test_feats, center=self.center_feats, normalize_feats=self.normalize_feats
87
+ )
88
+
89
+ if self.model is None:
90
+ raise ValueError("Model has not been fitted yet. Call `fit` before `predict`.")
91
+
92
+ return self.model.predict(test_feats)
93
+
94
+
95
+ class WeightedKNN(BaseAdaptor):
96
+ """
97
+ WeightedKNN is a k-Nearest Neighbors (k-NN) based adaptor that supports weighted similarity
98
+ for classification, ordinal classification, and regression tasks. It allows customization of
99
+ distance metrics, feature preprocessing, and output formats.
100
+ Attributes:
101
+ train_feats (np.ndarray): Training feature matrix.
102
+ train_labels (np.ndarray): Labels corresponding to the training features.
103
+ test_feats (np.ndarray): Test feature matrix.
104
+ k (int): Number of nearest neighbors to consider.
105
+ task_type (str): Type of task, one of ["classification", "ordinal-classification", "regression"].
106
+ metric (str or callable): Similarity metric to use. Options are "cosine", "euclidean", or a callable function.
107
+ center_feats (bool): Whether to center the features during preprocessing.
108
+ normalize_feats (bool): Whether to normalize the features during preprocessing.
109
+ return_probabilities (bool): Whether to return class probabilities for classification tasks.
110
+ class_values (np.ndarray or None): Array of possible class values for regression tasks.
111
+ Methods:
112
+ __init__(train_feats, train_labels, test_feats, k, task_type, metric="cosine", center_feats=False, normalize_feats=False, return_probabilities=False, class_values=None):
113
+ Initializes the WeightedKNN with the given parameters.
114
+ fit():
115
+ Preprocesses the features and sets up the similarity function and class-related attributes
116
+ based on the task type.
117
+ predict() -> np.ndarray | tuple[np.ndarray, np.ndarray]:
118
+ Predicts the output for the test features based on the k-nearest neighbors. For classification
119
+ tasks, it can optionally return class probabilities.
120
+ """
121
+ def __init__(self, train_feats, train_labels, test_feats, k, task_type, metric="cosine", center_feats=False, normalize_feats=False, return_probabilities=False, class_values=None):
122
+ super().__init__(train_feats, train_labels, test_feats)
123
+ self.k = k
124
+ self.task_type = task_type
125
+ self.metric = metric
126
+ self.center_feats = center_feats
127
+ self.normalize_feats = normalize_feats
128
+ self.return_probabilities = return_probabilities
129
+ self.class_values = class_values
130
+ self.similarity_fn = None
131
+ self.unique_classes = None
132
+ self.class_to_idx = None
133
+ self.num_classes = None
134
+
135
+ assert not (
136
+ task_type == "regression" and return_probabilities
137
+ ), "Cannot return probabilities for regression."
138
+
139
+
140
+ def fit(self):
141
+ self.train_feats, self.test_feats = preprocess_features(
142
+ self.train_feats, self.test_feats, center=self.center_feats, normalize_feats=self.normalize_feats
143
+ )
144
+
145
+ # define similarity function
146
+ if callable(self.metric):
147
+ self.similarity_fn = self.metric
148
+ elif self.metric == "cosine":
149
+ self.similarity_fn = lambda x, y: cosine_similarity(x, y)
150
+ elif self.metric == "euclidean":
151
+ self.similarity_fn = lambda x, y: 1.0 / (euclidean_distances(x, y) + 1e-8)
152
+ else:
153
+ raise ValueError(f"Unsupported metric: {self.metric}")
154
+
155
+ if self.task_type in ["classification", "ordinal-classification"]:
156
+ self.unique_classes = np.unique(self.train_labels)
157
+ self.class_to_idx = {cls: idx for idx, cls in enumerate(self.unique_classes)}
158
+ self.num_classes = len(self.unique_classes)
159
+
160
+ def predict(self) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
161
+ if self.train_feats is None or self.test_feats is None or self.similarity_fn is None:
162
+ raise ValueError("Model has not been fitted yet. Call `fit` before `predict`.")
163
+
164
+ predictions = []
165
+ all_probs = []
166
+
167
+ for test_point in self.test_feats:
168
+ sim = self.similarity_fn(test_point.reshape(1, -1), self.train_feats).flatten()
169
+ k_indices = np.argsort(-sim)[:self.k]
170
+ k_labels = self.train_labels[k_indices]
171
+ k_similarities = sim[k_indices]
172
+
173
+ if self.task_type == "regression":
174
+ weighted_avg = np.sum(k_labels * k_similarities) / (np.sum(k_similarities) + 1e-8)
175
+ if self.class_values is not None:
176
+ diffs = np.abs(self.class_values - weighted_avg)
177
+ class_label = self.class_values[np.argmin(diffs)]
178
+ predictions.append(class_label)
179
+ else:
180
+ predictions.append(weighted_avg)
181
+
182
+ elif self.task_type in ["classification", "ordinal-classification"]:
183
+ class_weights = np.zeros(self.num_classes)
184
+ for label, sim in zip(k_labels, k_similarities):
185
+ class_weights[self.class_to_idx[label]] += sim
186
+
187
+ class_probs = class_weights / (np.sum(class_weights) + 1e-8)
188
+ all_probs.append(class_probs)
189
+
190
+ if self.task_type == "ordinal-classification":
191
+ expected_val = np.dot(class_probs, self.unique_classes)
192
+ predicted_class = int(np.round(expected_val))
193
+ else:
194
+ predicted_class = self.unique_classes[np.argmax(class_probs)]
195
+
196
+ predictions.append(predicted_class)
197
+
198
+ predictions = np.array(predictions)
199
+ if self.return_probabilities and self.task_type in ["classification", "ordinal-classification"]:
200
+ return predictions, np.vstack(all_probs)
201
+ return predictions
202
+
203
+
204
+ class LogisticRegression(BaseAdaptor):
205
+ """
206
+ An adaptor for logistic regression that extends the BaseAdaptor class. This class
207
+ provides functionality to train a logistic regression model and make predictions
208
+ using the provided training and testing features.
209
+ Attributes:
210
+ train_feats (np.ndarray): The feature matrix for training the model.
211
+ train_labels (np.ndarray): The labels corresponding to the training features.
212
+ test_feats (np.ndarray): The feature matrix for testing the model.
213
+ max_iter (int): The maximum number of iterations for the solver to converge. Default is 1000.
214
+ C (float): Inverse of regularization strength; smaller values specify stronger regularization. Default is 1.0.
215
+ solver (str): The algorithm to use in the optimization problem. Default is "lbfgs".
216
+ Methods:
217
+ fit():
218
+ Trains the logistic regression model using the training features and labels.
219
+ predict() -> np.ndarray:
220
+ Predicts the labels for the test features using the trained model.
221
+ """
222
+ def __init__(self, train_feats, train_labels, test_feats, max_iter=1000, C=1.0, solver="lbfgs"):
223
+ super().__init__(train_feats, train_labels, test_feats)
224
+ self.max_iter = max_iter
225
+ self.C = C
226
+ self.solver = solver
227
+
228
+ def fit(self):
229
+ self.model = sklearn.linear_model.LogisticRegression(C=self.C, max_iter=self.max_iter, solver=self.solver, random_state=0)
230
+ self.model.fit(self.train_feats, self.train_labels)
231
+
232
+ def predict(self) -> np.ndarray:
233
+ return self.model.predict(self.test_feats)
234
+
235
+
236
+ class LinearClassifier(nn.Module):
237
+ """
238
+ A simple linear classifier.
239
+ """
240
+
241
+ def __init__(self, input_dim: int, output_dim: int):
242
+ super().__init__()
243
+ self.fc = nn.Linear(input_dim, output_dim)
244
+
245
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
246
+ return self.fc(x)
247
+
248
+
249
+ class LinearProbing(BaseAdaptor):
250
+ """
251
+ A class for performing linear probing on features for classification or regression tasks.
252
+ Linear probing involves training a simple linear model on top of pre-extracted features
253
+ to evaluate their quality for a specific task.
254
+ Attributes:
255
+ train_feats (np.ndarray): The training feature matrix of shape (n_samples, n_features).
256
+ train_labels (np.ndarray): The training labels corresponding to the training features.
257
+ test_feats (np.ndarray): The test feature matrix of shape (n_samples, n_features).
258
+ task_type (str): The type of task, either "classification" or "regression".
259
+ num_epochs (int): The number of epochs for training the linear model. Default is 100.
260
+ learning_rate (float): The learning rate for the optimizer. Default is 0.001.
261
+ Methods:
262
+ fit():
263
+ Trains a linear model on the training features and labels using the specified task type.
264
+ predict() -> np.ndarray:
265
+ Predicts the labels for the test features using the trained model.
266
+ """
267
+ def __init__(self, train_feats, train_labels, test_feats, task_type, num_epochs=100, learning_rate=0.001):
268
+ super().__init__(train_feats, train_labels, test_feats)
269
+ self.task_type = task_type
270
+ self.num_epochs = num_epochs
271
+ self.learning_rate = learning_rate
272
+
273
+ def fit(self):
274
+ input_dim = self.train_feats.shape[1]
275
+ if self.task_type == "regression":
276
+ self.num_classes = 1
277
+ self.criterion = nn.MSELoss()
278
+ elif self.task_type == "classification":
279
+ self.num_classes = len(np.unique(self.train_labels))
280
+ self.criterion = nn.CrossEntropyLoss()
281
+ else:
282
+ raise ValueError(f"Unknown task type: {self.task_type}")
283
+
284
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
285
+ self.train_feats = torch.tensor(self.train_feats, dtype=torch.float32).to(self.device)
286
+ self.train_labels = torch.tensor(self.train_labels, dtype=torch.long).to(self.device)
287
+ self.test_feats = torch.tensor(self.test_feats, dtype=torch.float32).to(self.device)
288
+
289
+ self.model = LinearClassifier(input_dim, self.num_classes).to(self.device)
290
+ self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
291
+
292
+ for epoch in range(self.num_epochs):
293
+ self.model.train()
294
+ self.optimizer.zero_grad()
295
+ outputs = self.model(self.train_feats)
296
+ loss = self.criterion(outputs, self.train_labels)
297
+ loss.backward()
298
+ self.optimizer.step()
299
+
300
+ def predict(self) -> np.ndarray:
301
+ self.model.eval()
302
+ with torch.no_grad():
303
+ test_outputs = self.model(self.test_feats)
304
+ _, test_preds = torch.max(test_outputs, 1)
305
+ return test_preds.cpu().numpy()
306
+
307
+
308
+ class MLPClassifier(nn.Module):
309
+ """
310
+ A simple MLP classifier with one hidden layer.
311
+ """
312
+
313
+ def __init__(self, input_dim: int, hidden_dim: int, output_dim: int):
314
+ super().__init__()
315
+ self.fc1 = nn.Linear(input_dim, hidden_dim)
316
+ self.relu = nn.ReLU()
317
+ self.fc2 = nn.Linear(hidden_dim, output_dim)
318
+
319
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
320
+ x = self.relu(self.fc1(x))
321
+ return self.fc2(x)
322
+
323
+
324
+ class MLP(BaseAdaptor):
325
+ """
326
+ A PyTorch-based Multi-Layer Perceptron (MLP) adaptor for classification and regression tasks.
327
+ Attributes:
328
+ train_feats (np.ndarray): Training feature matrix of shape (n_samples, n_features).
329
+ train_labels (np.ndarray): Training labels corresponding to the training features.
330
+ test_feats (np.ndarray): Test feature matrix of shape (n_samples, n_features).
331
+ task_type (str): Type of task, either "classification" or "regression".
332
+ hidden_dim (int): Number of hidden units in the MLP. Default is 64.
333
+ num_epochs (int): Number of training epochs. Default is 100.
334
+ learning_rate (float): Learning rate for the optimizer. Default is 0.001.
335
+ Methods:
336
+ fit():
337
+ Trains the MLP model using the provided training data.
338
+ predict() -> np.ndarray:
339
+ Generates predictions for the test data using the trained model.
340
+ """
341
+ def __init__(self, train_feats, train_labels, test_feats, task_type, hidden_dim=64, num_epochs=100, learning_rate=0.001):
342
+ super().__init__(train_feats, train_labels, test_feats)
343
+ self.task_type = task_type
344
+ self.hidden_dim = hidden_dim
345
+ self.num_epochs = num_epochs
346
+ self.learning_rate = learning_rate
347
+
348
+ def fit(self):
349
+ input_dim = self.train_feats.shape[1]
350
+ if self.task_type == "regression":
351
+ self.num_classes = 1
352
+ self.criterion = nn.MSELoss()
353
+ elif self.task_type == "classification":
354
+ self.num_classes = len(np.unique(self.train_labels))
355
+ self.criterion = nn.CrossEntropyLoss()
356
+ else:
357
+ raise ValueError(f"Unknown task type: {self.task_type}")
358
+
359
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
360
+ self.train_feats = torch.tensor(self.train_feats, dtype=torch.float32).to(self.device)
361
+ self.train_labels = torch.tensor(self.train_labels, dtype=torch.long).to(self.device)
362
+ self.test_feats = torch.tensor(self.test_feats, dtype=torch.float32).to(self.device)
363
+
364
+ self.model = MLPClassifier(input_dim, self.hidden_dim, self.num_classes).to(self.device)
365
+ self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
366
+
367
+ for epoch in range(self.num_epochs):
368
+ self.model.train()
369
+ self.optimizer.zero_grad()
370
+ outputs = self.model(self.train_feats)
371
+ loss = self.criterion(outputs, self.train_labels)
372
+ loss.backward()
373
+ self.optimizer.step()
374
+
375
+ def predict(self) -> np.ndarray:
376
+ self.model.eval()
377
+ with torch.no_grad():
378
+ test_outputs = self.model(self.test_feats)
379
+ _, test_preds = torch.max(test_outputs, 1)
380
+ return test_preds.cpu().numpy()