valor-lite 0.33.8__py3-none-any.whl → 0.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/LICENSE +21 -0
- valor_lite/classification/annotation.py +24 -0
- valor_lite/classification/manager.py +189 -217
- valor_lite/classification/metric.py +266 -27
- valor_lite/{detection → object_detection}/annotation.py +144 -3
- valor_lite/{detection → object_detection}/computation.py +33 -9
- valor_lite/{detection → object_detection}/manager.py +289 -368
- valor_lite/object_detection/metric.py +795 -0
- valor_lite/semantic_segmentation/annotation.py +96 -0
- valor_lite/{segmentation → semantic_segmentation}/manager.py +33 -16
- valor_lite/semantic_segmentation/metric.py +278 -0
- valor_lite/text_generation/__init__.py +0 -0
- valor_lite-0.33.10.dist-info/METADATA +179 -0
- valor_lite-0.33.10.dist-info/RECORD +24 -0
- valor_lite/detection/metric.py +0 -380
- valor_lite/segmentation/annotation.py +0 -49
- valor_lite/segmentation/metric.py +0 -119
- valor_lite-0.33.8.dist-info/METADATA +0 -41
- valor_lite-0.33.8.dist-info/RECORD +0 -22
- /valor_lite/{detection → object_detection}/__init__.py +0 -0
- /valor_lite/{segmentation → semantic_segmentation}/__init__.py +0 -0
- /valor_lite/{segmentation → semantic_segmentation}/computation.py +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.10.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.10.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.10.dist-info}/top_level.txt +0 -0
valor_lite/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Striveworks
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -3,6 +3,30 @@ from dataclasses import dataclass
|
|
|
3
3
|
|
|
4
4
|
@dataclass
|
|
5
5
|
class Classification:
|
|
6
|
+
"""
|
|
7
|
+
Classification data structure containing a ground truth label and a list of predictions.
|
|
8
|
+
|
|
9
|
+
Parameters
|
|
10
|
+
----------
|
|
11
|
+
uid : str
|
|
12
|
+
Unique identifier for the instance.
|
|
13
|
+
groundtruth : str
|
|
14
|
+
The true label for the instance.
|
|
15
|
+
predictions : list of str
|
|
16
|
+
List of predicted labels.
|
|
17
|
+
scores : list of float
|
|
18
|
+
Confidence scores corresponding to each predicted label.
|
|
19
|
+
|
|
20
|
+
Examples
|
|
21
|
+
--------
|
|
22
|
+
>>> classification = Classification(
|
|
23
|
+
... uid='123',
|
|
24
|
+
... groundtruth='cat',
|
|
25
|
+
... predictions=['cat', 'dog', 'bird'],
|
|
26
|
+
... scores=[0.9, 0.05, 0.05]
|
|
27
|
+
... )
|
|
28
|
+
"""
|
|
29
|
+
|
|
6
30
|
uid: str
|
|
7
31
|
groundtruth: str
|
|
8
32
|
predictions: list[str]
|
|
@@ -191,12 +191,117 @@ class Evaluator:
|
|
|
191
191
|
n_datums=n_datums,
|
|
192
192
|
)
|
|
193
193
|
|
|
194
|
-
def
|
|
194
|
+
def _unpack_confusion_matrix(
|
|
195
|
+
self,
|
|
196
|
+
confusion_matrix: NDArray[np.float64],
|
|
197
|
+
number_of_labels: int,
|
|
198
|
+
number_of_examples: int,
|
|
199
|
+
) -> dict[
|
|
200
|
+
str,
|
|
201
|
+
dict[
|
|
202
|
+
str,
|
|
203
|
+
dict[
|
|
204
|
+
str,
|
|
205
|
+
int
|
|
206
|
+
| list[
|
|
207
|
+
dict[
|
|
208
|
+
str,
|
|
209
|
+
str | float,
|
|
210
|
+
]
|
|
211
|
+
],
|
|
212
|
+
],
|
|
213
|
+
],
|
|
214
|
+
]:
|
|
215
|
+
"""
|
|
216
|
+
Unpacks a numpy array of confusion matrix counts and examples.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
220
|
+
confusion_matrix[
|
|
221
|
+
gt_label_idx,
|
|
222
|
+
pd_label_idx,
|
|
223
|
+
example_idx * 2 + 1,
|
|
224
|
+
]
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
228
|
+
confusion_matrix[
|
|
229
|
+
gt_label_idx,
|
|
230
|
+
pd_label_idx,
|
|
231
|
+
example_idx * 2 + 2,
|
|
232
|
+
]
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
self.index_to_label[gt_label_idx]: {
|
|
237
|
+
self.index_to_label[pd_label_idx]: {
|
|
238
|
+
"count": max(
|
|
239
|
+
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
240
|
+
0,
|
|
241
|
+
),
|
|
242
|
+
"examples": [
|
|
243
|
+
{
|
|
244
|
+
"datum": self.index_to_uid[
|
|
245
|
+
datum_idx(
|
|
246
|
+
gt_label_idx, pd_label_idx, example_idx
|
|
247
|
+
)
|
|
248
|
+
],
|
|
249
|
+
"score": score_idx(
|
|
250
|
+
gt_label_idx, pd_label_idx, example_idx
|
|
251
|
+
),
|
|
252
|
+
}
|
|
253
|
+
for example_idx in range(number_of_examples)
|
|
254
|
+
if datum_idx(gt_label_idx, pd_label_idx, example_idx)
|
|
255
|
+
>= 0
|
|
256
|
+
],
|
|
257
|
+
}
|
|
258
|
+
for pd_label_idx in range(number_of_labels)
|
|
259
|
+
}
|
|
260
|
+
for gt_label_idx in range(number_of_labels)
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
def _unpack_missing_predictions(
|
|
264
|
+
self,
|
|
265
|
+
missing_predictions: NDArray[np.int32],
|
|
266
|
+
number_of_labels: int,
|
|
267
|
+
number_of_examples: int,
|
|
268
|
+
) -> dict[str, dict[str, int | list[dict[str, str]]]]:
|
|
269
|
+
"""
|
|
270
|
+
Unpacks a numpy array of missing prediction counts and examples.
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
datum_idx = (
|
|
274
|
+
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
275
|
+
missing_predictions[
|
|
276
|
+
gt_label_idx,
|
|
277
|
+
example_idx + 1,
|
|
278
|
+
]
|
|
279
|
+
)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
self.index_to_label[gt_label_idx]: {
|
|
284
|
+
"count": max(
|
|
285
|
+
int(missing_predictions[gt_label_idx, 0]),
|
|
286
|
+
0,
|
|
287
|
+
),
|
|
288
|
+
"examples": [
|
|
289
|
+
{
|
|
290
|
+
"datum": self.index_to_uid[
|
|
291
|
+
datum_idx(gt_label_idx, example_idx)
|
|
292
|
+
]
|
|
293
|
+
}
|
|
294
|
+
for example_idx in range(number_of_examples)
|
|
295
|
+
if datum_idx(gt_label_idx, example_idx) >= 0
|
|
296
|
+
],
|
|
297
|
+
}
|
|
298
|
+
for gt_label_idx in range(number_of_labels)
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
def compute_precision_recall(
|
|
195
302
|
self,
|
|
196
|
-
metrics_to_return: list[MetricType] = MetricType.base(),
|
|
197
303
|
score_thresholds: list[float] = [0.0],
|
|
198
304
|
hardmax: bool = True,
|
|
199
|
-
number_of_examples: int = 0,
|
|
200
305
|
filter_: Filter | None = None,
|
|
201
306
|
as_dict: bool = False,
|
|
202
307
|
) -> dict[MetricType, list]:
|
|
@@ -205,14 +310,10 @@ class Evaluator:
|
|
|
205
310
|
|
|
206
311
|
Parameters
|
|
207
312
|
----------
|
|
208
|
-
metrics_to_return : list[MetricType]
|
|
209
|
-
A list of metrics to return in the results.
|
|
210
313
|
score_thresholds : list[float]
|
|
211
314
|
A list of score thresholds to compute metrics over.
|
|
212
315
|
hardmax : bool
|
|
213
316
|
Toggles whether a hardmax is applied to predictions.
|
|
214
|
-
number_of_examples : int, default=0
|
|
215
|
-
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
216
317
|
filter_ : Filter, optional
|
|
217
318
|
An optional filter object.
|
|
218
319
|
as_dict : bool, default=False
|
|
@@ -253,7 +354,7 @@ class Evaluator:
|
|
|
253
354
|
|
|
254
355
|
metrics[MetricType.ROCAUC] = [
|
|
255
356
|
ROCAUC(
|
|
256
|
-
value=rocauc[label_idx],
|
|
357
|
+
value=float(rocauc[label_idx]),
|
|
257
358
|
label=self.index_to_label[label_idx],
|
|
258
359
|
)
|
|
259
360
|
for label_idx in range(label_metadata.shape[0])
|
|
@@ -262,7 +363,7 @@ class Evaluator:
|
|
|
262
363
|
|
|
263
364
|
metrics[MetricType.mROCAUC] = [
|
|
264
365
|
mROCAUC(
|
|
265
|
-
value=mean_rocauc,
|
|
366
|
+
value=float(mean_rocauc),
|
|
266
367
|
)
|
|
267
368
|
]
|
|
268
369
|
|
|
@@ -276,10 +377,10 @@ class Evaluator:
|
|
|
276
377
|
row = counts[:, label_idx]
|
|
277
378
|
metrics[MetricType.Counts].append(
|
|
278
379
|
Counts(
|
|
279
|
-
tp=row[:, 0].tolist(),
|
|
280
|
-
fp=row[:, 1].tolist(),
|
|
281
|
-
fn=row[:, 2].tolist(),
|
|
282
|
-
tn=row[:, 3].tolist(),
|
|
380
|
+
tp=row[:, 0].astype(int).tolist(),
|
|
381
|
+
fp=row[:, 1].astype(int).tolist(),
|
|
382
|
+
fn=row[:, 2].astype(int).tolist(),
|
|
383
|
+
tn=row[:, 3].astype(int).tolist(),
|
|
283
384
|
**kwargs,
|
|
284
385
|
)
|
|
285
386
|
)
|
|
@@ -290,44 +391,29 @@ class Evaluator:
|
|
|
290
391
|
|
|
291
392
|
metrics[MetricType.Precision].append(
|
|
292
393
|
Precision(
|
|
293
|
-
value=precision[:, label_idx].tolist(),
|
|
394
|
+
value=precision[:, label_idx].astype(float).tolist(),
|
|
294
395
|
**kwargs,
|
|
295
396
|
)
|
|
296
397
|
)
|
|
297
398
|
metrics[MetricType.Recall].append(
|
|
298
399
|
Recall(
|
|
299
|
-
value=recall[:, label_idx].tolist(),
|
|
400
|
+
value=recall[:, label_idx].astype(float).tolist(),
|
|
300
401
|
**kwargs,
|
|
301
402
|
)
|
|
302
403
|
)
|
|
303
404
|
metrics[MetricType.Accuracy].append(
|
|
304
405
|
Accuracy(
|
|
305
|
-
value=accuracy[:, label_idx].tolist(),
|
|
406
|
+
value=accuracy[:, label_idx].astype(float).tolist(),
|
|
306
407
|
**kwargs,
|
|
307
408
|
)
|
|
308
409
|
)
|
|
309
410
|
metrics[MetricType.F1].append(
|
|
310
411
|
F1(
|
|
311
|
-
value=f1_score[:, label_idx].tolist(),
|
|
412
|
+
value=f1_score[:, label_idx].astype(float).tolist(),
|
|
312
413
|
**kwargs,
|
|
313
414
|
)
|
|
314
415
|
)
|
|
315
416
|
|
|
316
|
-
if MetricType.ConfusionMatrix in metrics_to_return:
|
|
317
|
-
metrics[
|
|
318
|
-
MetricType.ConfusionMatrix
|
|
319
|
-
] = self._compute_confusion_matrix(
|
|
320
|
-
data=data,
|
|
321
|
-
label_metadata=label_metadata,
|
|
322
|
-
score_thresholds=score_thresholds,
|
|
323
|
-
hardmax=hardmax,
|
|
324
|
-
number_of_examples=number_of_examples,
|
|
325
|
-
)
|
|
326
|
-
|
|
327
|
-
for metric in set(metrics.keys()):
|
|
328
|
-
if metric not in metrics_to_return:
|
|
329
|
-
del metrics[metric]
|
|
330
|
-
|
|
331
417
|
if as_dict:
|
|
332
418
|
return {
|
|
333
419
|
mtype: [metric.to_dict() for metric in mvalues]
|
|
@@ -336,143 +422,43 @@ class Evaluator:
|
|
|
336
422
|
|
|
337
423
|
return metrics
|
|
338
424
|
|
|
339
|
-
def
|
|
425
|
+
def compute_confusion_matrix(
|
|
340
426
|
self,
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
number_of_examples: int,
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
str,
|
|
348
|
-
dict[
|
|
349
|
-
str,
|
|
350
|
-
int
|
|
351
|
-
| list[
|
|
352
|
-
dict[
|
|
353
|
-
str,
|
|
354
|
-
str | float,
|
|
355
|
-
]
|
|
356
|
-
],
|
|
357
|
-
],
|
|
358
|
-
],
|
|
359
|
-
]:
|
|
360
|
-
"""
|
|
361
|
-
Unpacks a numpy array of confusion matrix counts and examples.
|
|
362
|
-
"""
|
|
363
|
-
|
|
364
|
-
datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
365
|
-
confusion_matrix[
|
|
366
|
-
gt_label_idx,
|
|
367
|
-
pd_label_idx,
|
|
368
|
-
example_idx * 2 + 1,
|
|
369
|
-
]
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
373
|
-
confusion_matrix[
|
|
374
|
-
gt_label_idx,
|
|
375
|
-
pd_label_idx,
|
|
376
|
-
example_idx * 2 + 2,
|
|
377
|
-
]
|
|
378
|
-
)
|
|
379
|
-
|
|
380
|
-
return {
|
|
381
|
-
self.index_to_label[gt_label_idx]: {
|
|
382
|
-
self.index_to_label[pd_label_idx]: {
|
|
383
|
-
"count": max(
|
|
384
|
-
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
385
|
-
0,
|
|
386
|
-
),
|
|
387
|
-
"examples": [
|
|
388
|
-
{
|
|
389
|
-
"datum": self.index_to_uid[
|
|
390
|
-
datum_idx(
|
|
391
|
-
gt_label_idx, pd_label_idx, example_idx
|
|
392
|
-
)
|
|
393
|
-
],
|
|
394
|
-
"score": score_idx(
|
|
395
|
-
gt_label_idx, pd_label_idx, example_idx
|
|
396
|
-
),
|
|
397
|
-
}
|
|
398
|
-
for example_idx in range(number_of_examples)
|
|
399
|
-
if datum_idx(gt_label_idx, pd_label_idx, example_idx)
|
|
400
|
-
>= 0
|
|
401
|
-
],
|
|
402
|
-
}
|
|
403
|
-
for pd_label_idx in range(number_of_labels)
|
|
404
|
-
}
|
|
405
|
-
for gt_label_idx in range(number_of_labels)
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
def _unpack_missing_predictions(
|
|
409
|
-
self,
|
|
410
|
-
missing_predictions: NDArray[np.int32],
|
|
411
|
-
number_of_labels: int,
|
|
412
|
-
number_of_examples: int,
|
|
413
|
-
) -> dict[str, dict[str, int | list[dict[str, str]]]]:
|
|
414
|
-
"""
|
|
415
|
-
Unpacks a numpy array of missing prediction counts and examples.
|
|
416
|
-
"""
|
|
417
|
-
|
|
418
|
-
datum_idx = (
|
|
419
|
-
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
420
|
-
missing_predictions[
|
|
421
|
-
gt_label_idx,
|
|
422
|
-
example_idx + 1,
|
|
423
|
-
]
|
|
424
|
-
)
|
|
425
|
-
)
|
|
426
|
-
|
|
427
|
-
return {
|
|
428
|
-
self.index_to_label[gt_label_idx]: {
|
|
429
|
-
"count": max(
|
|
430
|
-
int(missing_predictions[gt_label_idx, 0]),
|
|
431
|
-
0,
|
|
432
|
-
),
|
|
433
|
-
"examples": [
|
|
434
|
-
{
|
|
435
|
-
"datum": self.index_to_uid[
|
|
436
|
-
datum_idx(gt_label_idx, example_idx)
|
|
437
|
-
]
|
|
438
|
-
}
|
|
439
|
-
for example_idx in range(number_of_examples)
|
|
440
|
-
if datum_idx(gt_label_idx, example_idx) >= 0
|
|
441
|
-
],
|
|
442
|
-
}
|
|
443
|
-
for gt_label_idx in range(number_of_labels)
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
def _compute_confusion_matrix(
|
|
447
|
-
self,
|
|
448
|
-
data: NDArray[np.float64],
|
|
449
|
-
label_metadata: NDArray[np.int32],
|
|
450
|
-
score_thresholds: list[float],
|
|
451
|
-
hardmax: bool,
|
|
452
|
-
number_of_examples: int,
|
|
453
|
-
) -> list[ConfusionMatrix]:
|
|
427
|
+
score_thresholds: list[float] = [0.0],
|
|
428
|
+
hardmax: bool = True,
|
|
429
|
+
number_of_examples: int = 0,
|
|
430
|
+
filter_: Filter | None = None,
|
|
431
|
+
as_dict: bool = False,
|
|
432
|
+
) -> list:
|
|
454
433
|
"""
|
|
455
434
|
Computes a detailed confusion matrix..
|
|
456
435
|
|
|
457
436
|
Parameters
|
|
458
437
|
----------
|
|
459
|
-
data : NDArray[np.float64]
|
|
460
|
-
A data array containing classification pairs.
|
|
461
|
-
label_metadata : NDArray[np.int32]
|
|
462
|
-
An integer array containing label metadata.
|
|
463
438
|
score_thresholds : list[float]
|
|
464
439
|
A list of score thresholds to compute metrics over.
|
|
465
440
|
hardmax : bool
|
|
466
441
|
Toggles whether a hardmax is applied to predictions.
|
|
467
442
|
number_of_examples : int, default=0
|
|
468
443
|
The number of examples to return per count.
|
|
444
|
+
filter_ : Filter, optional
|
|
445
|
+
An optional filter object.
|
|
446
|
+
as_dict : bool, default=False
|
|
447
|
+
An option to return metrics as dictionaries.
|
|
469
448
|
|
|
470
449
|
Returns
|
|
471
450
|
-------
|
|
472
|
-
list[ConfusionMatrix]
|
|
473
|
-
A list of
|
|
451
|
+
list[ConfusionMatrix] | list[dict]
|
|
452
|
+
A list of confusion matrices.
|
|
474
453
|
"""
|
|
475
454
|
|
|
455
|
+
# apply filters
|
|
456
|
+
data = self._detailed_pairs
|
|
457
|
+
label_metadata = self._label_metadata
|
|
458
|
+
if filter_ is not None:
|
|
459
|
+
data = data[filter_.indices]
|
|
460
|
+
label_metadata = filter_.label_metadata
|
|
461
|
+
|
|
476
462
|
if data.size == 0:
|
|
477
463
|
return list()
|
|
478
464
|
|
|
@@ -485,7 +471,7 @@ class Evaluator:
|
|
|
485
471
|
)
|
|
486
472
|
|
|
487
473
|
n_scores, n_labels, _, _ = confusion_matrix.shape
|
|
488
|
-
|
|
474
|
+
results = [
|
|
489
475
|
ConfusionMatrix(
|
|
490
476
|
score_threshold=score_thresholds[score_idx],
|
|
491
477
|
number_of_examples=number_of_examples,
|
|
@@ -503,6 +489,56 @@ class Evaluator:
|
|
|
503
489
|
for score_idx in range(n_scores)
|
|
504
490
|
]
|
|
505
491
|
|
|
492
|
+
if as_dict:
|
|
493
|
+
return [m.to_dict() for m in results]
|
|
494
|
+
|
|
495
|
+
return results
|
|
496
|
+
|
|
497
|
+
def evaluate(
|
|
498
|
+
self,
|
|
499
|
+
score_thresholds: list[float] = [0.0],
|
|
500
|
+
hardmax: bool = True,
|
|
501
|
+
number_of_examples: int = 0,
|
|
502
|
+
filter_: Filter | None = None,
|
|
503
|
+
as_dict: bool = False,
|
|
504
|
+
) -> dict[MetricType, list]:
|
|
505
|
+
"""
|
|
506
|
+
Computes a detailed confusion matrix..
|
|
507
|
+
|
|
508
|
+
Parameters
|
|
509
|
+
----------
|
|
510
|
+
score_thresholds : list[float]
|
|
511
|
+
A list of score thresholds to compute metrics over.
|
|
512
|
+
hardmax : bool
|
|
513
|
+
Toggles whether a hardmax is applied to predictions.
|
|
514
|
+
number_of_examples : int, default=0
|
|
515
|
+
The number of examples to return per count.
|
|
516
|
+
filter_ : Filter, optional
|
|
517
|
+
An optional filter object.
|
|
518
|
+
as_dict : bool, default=False
|
|
519
|
+
An option to return metrics as dictionaries.
|
|
520
|
+
|
|
521
|
+
Returns
|
|
522
|
+
-------
|
|
523
|
+
list[ConfusionMatrix] | list[dict]
|
|
524
|
+
A list of confusion matrices.
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
results = self.compute_precision_recall(
|
|
528
|
+
score_thresholds=score_thresholds,
|
|
529
|
+
hardmax=hardmax,
|
|
530
|
+
filter_=filter_,
|
|
531
|
+
as_dict=as_dict,
|
|
532
|
+
)
|
|
533
|
+
results[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
|
|
534
|
+
score_thresholds=score_thresholds,
|
|
535
|
+
hardmax=hardmax,
|
|
536
|
+
number_of_examples=number_of_examples,
|
|
537
|
+
filter_=filter_,
|
|
538
|
+
as_dict=as_dict,
|
|
539
|
+
)
|
|
540
|
+
return results
|
|
541
|
+
|
|
506
542
|
|
|
507
543
|
class DataLoader:
|
|
508
544
|
"""
|
|
@@ -644,70 +680,6 @@ class DataLoader:
|
|
|
644
680
|
predictions=predictions,
|
|
645
681
|
)
|
|
646
682
|
|
|
647
|
-
def add_data_from_valor_dict(
|
|
648
|
-
self,
|
|
649
|
-
classifications: list[tuple[dict, dict]],
|
|
650
|
-
show_progress: bool = False,
|
|
651
|
-
):
|
|
652
|
-
"""
|
|
653
|
-
Adds Valor-format classifications to the cache.
|
|
654
|
-
|
|
655
|
-
Parameters
|
|
656
|
-
----------
|
|
657
|
-
classifications : list[tuple[dict, dict]]
|
|
658
|
-
A list of groundtruth, prediction pairs in Valor-format dictionaries.
|
|
659
|
-
show_progress : bool, default=False
|
|
660
|
-
Toggle for tqdm progress bar.
|
|
661
|
-
"""
|
|
662
|
-
|
|
663
|
-
disable_tqdm = not show_progress
|
|
664
|
-
for groundtruth, prediction in tqdm(
|
|
665
|
-
classifications, disable=disable_tqdm
|
|
666
|
-
):
|
|
667
|
-
|
|
668
|
-
# update metadata
|
|
669
|
-
self._evaluator.n_datums += 1
|
|
670
|
-
self._evaluator.n_groundtruths += len(groundtruth["annotations"])
|
|
671
|
-
self._evaluator.n_predictions += len(prediction["annotations"])
|
|
672
|
-
|
|
673
|
-
# update datum uid index
|
|
674
|
-
uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
|
|
675
|
-
|
|
676
|
-
# cache labels and annotations
|
|
677
|
-
predictions = list()
|
|
678
|
-
groundtruths = None
|
|
679
|
-
for gann in groundtruth["annotations"]:
|
|
680
|
-
for valor_label in gann["labels"]:
|
|
681
|
-
glabel = f'{valor_label["key"]}_{valor_label["value"]}'
|
|
682
|
-
label_idx = self._add_label(glabel)
|
|
683
|
-
self.groundtruth_count[label_idx][uid_index] += 1
|
|
684
|
-
groundtruths = label_idx
|
|
685
|
-
for pann in prediction["annotations"]:
|
|
686
|
-
for valor_label in pann["labels"]:
|
|
687
|
-
plabel = f'{valor_label["key"]}_{valor_label["value"]}'
|
|
688
|
-
pscore = valor_label["score"]
|
|
689
|
-
label_idx = self._add_label(plabel)
|
|
690
|
-
self.prediction_count[label_idx][uid_index] += 1
|
|
691
|
-
predictions.append(
|
|
692
|
-
(
|
|
693
|
-
label_idx,
|
|
694
|
-
pscore,
|
|
695
|
-
)
|
|
696
|
-
)
|
|
697
|
-
|
|
698
|
-
# fix type error where groundtruths can possibly be unbound now that it's a float
|
|
699
|
-
# in practice, this error should never be hit since groundtruths can't be empty without throwing a ValueError earlier in the flow
|
|
700
|
-
if groundtruths is None:
|
|
701
|
-
raise ValueError(
|
|
702
|
-
"Expected a value for groundtruths, but got None."
|
|
703
|
-
)
|
|
704
|
-
|
|
705
|
-
self._add_data(
|
|
706
|
-
uid_index=uid_index,
|
|
707
|
-
groundtruth=groundtruths,
|
|
708
|
-
predictions=predictions,
|
|
709
|
-
)
|
|
710
|
-
|
|
711
683
|
def finalize(self) -> Evaluator:
|
|
712
684
|
"""
|
|
713
685
|
Performs data finalization and some preprocessing steps.
|