supervisely 6.73.294__py3-none-any.whl → 6.73.296__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervisely might be problematic. Click here for more details.
- supervisely/cli/release/run.py +34 -51
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/avg_precision_by_class.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +10 -0
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/explore_predictions.py +2 -2
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +14 -8
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +2 -2
- supervisely/nn/benchmark/instance_segmentation/evaluation_params.yaml +6 -1
- supervisely/nn/benchmark/instance_segmentation/text_templates.py +4 -4
- supervisely/nn/benchmark/object_detection/base_vis_metric.py +1 -1
- supervisely/nn/benchmark/object_detection/evaluation_params.yaml +6 -1
- supervisely/nn/benchmark/object_detection/evaluator.py +1 -3
- supervisely/nn/benchmark/object_detection/metric_provider.py +59 -46
- supervisely/nn/benchmark/object_detection/text_templates.py +4 -4
- supervisely/nn/benchmark/object_detection/vis_metrics/confidence_distribution.py +20 -2
- supervisely/nn/benchmark/object_detection/vis_metrics/confidence_score.py +16 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/explore_predictions.py +10 -5
- supervisely/nn/benchmark/object_detection/vis_metrics/key_metrics.py +1 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/model_predictions.py +1 -1
- supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts.py +2 -57
- supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts_per_class.py +1 -1
- supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +11 -3
- supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve.py +1 -1
- supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +18 -8
- supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +13 -3
- supervisely/nn/benchmark/object_detection/visualizer.py +1 -1
- supervisely/nn/benchmark/utils/__init__.py +0 -1
- supervisely/nn/benchmark/utils/detection/__init__.py +1 -2
- supervisely/nn/benchmark/utils/detection/calculate_metrics.py +31 -37
- supervisely/nn/benchmark/visualization/evaluation_result.py +2 -4
- supervisely/nn/benchmark/visualization/vis_click_data.py +1 -3
- {supervisely-6.73.294.dist-info → supervisely-6.73.296.dist-info}/METADATA +1 -1
- {supervisely-6.73.294.dist-info → supervisely-6.73.296.dist-info}/RECORD +38 -39
- supervisely/nn/benchmark/utils/detection/metric_provider.py +0 -533
- {supervisely-6.73.294.dist-info → supervisely-6.73.296.dist-info}/LICENSE +0 -0
- {supervisely-6.73.294.dist-info → supervisely-6.73.296.dist-info}/WHEEL +0 -0
- {supervisely-6.73.294.dist-info → supervisely-6.73.296.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.294.dist-info → supervisely-6.73.296.dist-info}/top_level.txt +0 -0
|
@@ -1,533 +0,0 @@
|
|
|
1
|
-
import warnings
|
|
2
|
-
from copy import deepcopy
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
from supervisely.nn.benchmark.utils.detection import metrics
|
|
8
|
-
|
|
9
|
-
METRIC_NAMES = {
|
|
10
|
-
"mAP": "mAP",
|
|
11
|
-
"f1": "F1-score",
|
|
12
|
-
"precision": "Precision",
|
|
13
|
-
"recall": "Recall",
|
|
14
|
-
"iou": "Avg. IoU",
|
|
15
|
-
"classification_accuracy": "Classification Accuracy",
|
|
16
|
-
"calibration_score": "Calibration Score",
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def _get_outcomes_per_image(matches, cocoGt):
|
|
21
|
-
"""
|
|
22
|
-
type cocoGt: COCO
|
|
23
|
-
"""
|
|
24
|
-
img_ids = sorted(cocoGt.getImgIds())
|
|
25
|
-
img_id_enum = {img_id: idx for idx, img_id in enumerate(img_ids)}
|
|
26
|
-
outcomes_per_image = np.zeros((len(img_ids), 3), dtype=float)
|
|
27
|
-
for m in matches:
|
|
28
|
-
img_id = m["image_id"]
|
|
29
|
-
idx = img_id_enum[img_id]
|
|
30
|
-
if m["type"] == "TP":
|
|
31
|
-
outcomes_per_image[idx, 0] += 1
|
|
32
|
-
elif m["type"] == "FP":
|
|
33
|
-
outcomes_per_image[idx, 1] += 1
|
|
34
|
-
elif m["type"] == "FN":
|
|
35
|
-
outcomes_per_image[idx, 2] += 1
|
|
36
|
-
return img_ids, outcomes_per_image
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def filter_by_conf(matches: list, conf: float):
|
|
40
|
-
matches_filtered = []
|
|
41
|
-
for m in matches:
|
|
42
|
-
if m["score"] is not None and m["score"] < conf:
|
|
43
|
-
if m["type"] == "TP":
|
|
44
|
-
# TP becomes FN
|
|
45
|
-
m = deepcopy(m)
|
|
46
|
-
m["type"] = "FN"
|
|
47
|
-
m["score"] = None
|
|
48
|
-
m["dt_id"] = None
|
|
49
|
-
m["iou"] = None
|
|
50
|
-
elif m["type"] == "FP":
|
|
51
|
-
continue
|
|
52
|
-
else:
|
|
53
|
-
raise ValueError("Invalid match type")
|
|
54
|
-
matches_filtered.append(m)
|
|
55
|
-
return matches_filtered
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class MetricProvider:
|
|
59
|
-
def __init__(self, matches: list, coco_metrics: dict, params: dict, cocoGt, cocoDt):
|
|
60
|
-
"""
|
|
61
|
-
Main class for calculating prediction metrics.
|
|
62
|
-
|
|
63
|
-
:param matches: dictionary with matches between ground truth and predicted objects
|
|
64
|
-
:type matches: list
|
|
65
|
-
:param coco_metrics: dictionary with COCO metrics
|
|
66
|
-
:type coco_metrics: dict
|
|
67
|
-
:param params: dictionary with evaluation parameters
|
|
68
|
-
:type params: dict
|
|
69
|
-
:param cocoGt: COCO object with ground truth annotations
|
|
70
|
-
:type cocoGt: COCO
|
|
71
|
-
:param cocoDt: COCO object with predicted annotations
|
|
72
|
-
:type cocoDt: COCO
|
|
73
|
-
"""
|
|
74
|
-
self.matches = matches
|
|
75
|
-
self.coco_metrics = coco_metrics
|
|
76
|
-
self.params = params
|
|
77
|
-
self.cocoGt = cocoGt
|
|
78
|
-
self.cocoDt = cocoDt
|
|
79
|
-
|
|
80
|
-
self.metric_names = METRIC_NAMES
|
|
81
|
-
|
|
82
|
-
# metainfo
|
|
83
|
-
self.cat_ids = cocoGt.getCatIds()
|
|
84
|
-
self.cat_names = [cocoGt.cats[cat_id]["name"] for cat_id in self.cat_ids]
|
|
85
|
-
|
|
86
|
-
# eval_data
|
|
87
|
-
self.matches = matches
|
|
88
|
-
self.coco_mAP = coco_metrics["mAP"]
|
|
89
|
-
self.coco_precision = coco_metrics["precision"]
|
|
90
|
-
self.iouThrs = params["iouThrs"]
|
|
91
|
-
self.recThrs = params["recThrs"]
|
|
92
|
-
|
|
93
|
-
eval_params = params.get("evaluation_params", {})
|
|
94
|
-
self.iou_threshold = eval_params.get("iou_threshold", 0.5)
|
|
95
|
-
self.iou_threshold_idx = np.searchsorted(self.iouThrs, self.iou_threshold)
|
|
96
|
-
|
|
97
|
-
def calculate(self):
|
|
98
|
-
self.m_full = _MetricProvider(
|
|
99
|
-
self.matches, self.coco_metrics, self.params, self.cocoGt, self.cocoDt
|
|
100
|
-
)
|
|
101
|
-
self.m_full._calculate_score_profile()
|
|
102
|
-
|
|
103
|
-
# Find optimal confidence threshold
|
|
104
|
-
self.f1_optimal_conf, self.best_f1 = self.m_full.get_f1_optimal_conf()
|
|
105
|
-
|
|
106
|
-
# Filter by optimal confidence threshold
|
|
107
|
-
if self.f1_optimal_conf is not None:
|
|
108
|
-
matches_filtered = filter_by_conf(self.matches, self.f1_optimal_conf)
|
|
109
|
-
else:
|
|
110
|
-
matches_filtered = self.matches
|
|
111
|
-
self.m = _MetricProvider(
|
|
112
|
-
matches_filtered, self.coco_metrics, self.params, self.cocoGt, self.cocoDt
|
|
113
|
-
)
|
|
114
|
-
self.matches_filtered = matches_filtered
|
|
115
|
-
self.m._init_counts()
|
|
116
|
-
|
|
117
|
-
self.ious = self.m.ious
|
|
118
|
-
self.TP_count = self.m.TP_count
|
|
119
|
-
self.FP_count = self.m.FP_count
|
|
120
|
-
self.FN_count = self.m.FN_count
|
|
121
|
-
self.true_positives = self.m.true_positives
|
|
122
|
-
self.false_negatives = self.m.false_negatives
|
|
123
|
-
self.false_positives = self.m.false_positives
|
|
124
|
-
self.confused_matches = self.m.confused_matches
|
|
125
|
-
|
|
126
|
-
self.score_profile_f1s = self.m_full.score_profile_f1s
|
|
127
|
-
|
|
128
|
-
# base metrics
|
|
129
|
-
self._base_metrics = self.m.base_metrics()
|
|
130
|
-
self._per_class_metrics = self.m.per_class_metrics()
|
|
131
|
-
self._pr_curve = self.m.pr_curve()
|
|
132
|
-
self._prediction_table = self.m.prediction_table()
|
|
133
|
-
self._confusion_matrix = self.m.confusion_matrix()
|
|
134
|
-
self._frequently_confused = self.m.frequently_confused(self._confusion_matrix)
|
|
135
|
-
# calibration metrics
|
|
136
|
-
self._confidence_score_profile = self.m_full.confidence_score_profile()
|
|
137
|
-
self._calibration_curve = self.m_full.calibration_curve()
|
|
138
|
-
self._scores_tp_and_fp = self.m_full.scores_tp_and_fp()
|
|
139
|
-
self._maximum_calibration_error = self.m_full.maximum_calibration_error()
|
|
140
|
-
self._expected_calibration_error = self.m_full.expected_calibration_error()
|
|
141
|
-
|
|
142
|
-
def json_metrics(self):
|
|
143
|
-
base = self.base_metrics()
|
|
144
|
-
iou_name = int(self.iou_threshold * 100)
|
|
145
|
-
ap_by_class = self.AP_per_class().tolist()
|
|
146
|
-
ap_by_class = dict(zip(self.cat_names, ap_by_class))
|
|
147
|
-
ap_custom_by_class = self.AP_custom_per_class().tolist()
|
|
148
|
-
ap_custom_by_class = dict(zip(self.cat_names, ap_custom_by_class))
|
|
149
|
-
return {
|
|
150
|
-
"mAP": base["mAP"],
|
|
151
|
-
"AP50": self.coco_metrics.get("AP50"),
|
|
152
|
-
"AP75": self.coco_metrics.get("AP75"),
|
|
153
|
-
f"AP{iou_name}": self.AP_custom(),
|
|
154
|
-
"f1": base["f1"],
|
|
155
|
-
"precision": base["precision"],
|
|
156
|
-
"recall": base["recall"],
|
|
157
|
-
"iou": base["iou"],
|
|
158
|
-
"classification_accuracy": base["classification_accuracy"],
|
|
159
|
-
"calibration_score": base["calibration_score"],
|
|
160
|
-
"f1_optimal_conf": self.f1_optimal_conf,
|
|
161
|
-
"expected_calibration_error": self.expected_calibration_error(),
|
|
162
|
-
"maximum_calibration_error": self.maximum_calibration_error(),
|
|
163
|
-
"AP_by_class": ap_by_class,
|
|
164
|
-
f"AP{iou_name}_by_class": ap_custom_by_class,
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
def metric_table(self):
|
|
168
|
-
table = self.json_metrics()
|
|
169
|
-
iou_name = int(self.iou_threshold * 100)
|
|
170
|
-
return {
|
|
171
|
-
"mAP": table["mAP"],
|
|
172
|
-
"AP50": table["AP50"],
|
|
173
|
-
"AP75": table["AP75"],
|
|
174
|
-
f"AP{iou_name}": table[f"AP{iou_name}"],
|
|
175
|
-
"f1": table["f1"],
|
|
176
|
-
"precision": table["precision"],
|
|
177
|
-
"recall": table["recall"],
|
|
178
|
-
"Avg. IoU": table["iou"],
|
|
179
|
-
"Classification Acc.": table["classification_accuracy"],
|
|
180
|
-
"Calibration Score": table["calibration_score"],
|
|
181
|
-
"optimal confidence threshold": table["f1_optimal_conf"],
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
def AP_per_class(self):
|
|
185
|
-
s = self.coco_precision[:, :, :, 0, 2]
|
|
186
|
-
s[s == -1] = np.nan
|
|
187
|
-
ap = np.nanmean(s, axis=(0, 1))
|
|
188
|
-
return ap
|
|
189
|
-
|
|
190
|
-
def AP_custom_per_class(self):
|
|
191
|
-
s = self.coco_precision[self.iou_threshold_idx, :, :, 0, 2]
|
|
192
|
-
s[s == -1] = np.nan
|
|
193
|
-
ap = np.nanmean(s, axis=0)
|
|
194
|
-
return ap
|
|
195
|
-
|
|
196
|
-
def AP_custom(self):
|
|
197
|
-
return np.nanmean(self.AP_custom_per_class())
|
|
198
|
-
|
|
199
|
-
def base_metrics(self):
|
|
200
|
-
base = self._base_metrics
|
|
201
|
-
calibration_score = 1 - self._expected_calibration_error
|
|
202
|
-
return {**base, "calibration_score": calibration_score}
|
|
203
|
-
|
|
204
|
-
def per_class_metrics(self):
|
|
205
|
-
return self._per_class_metrics
|
|
206
|
-
|
|
207
|
-
def pr_curve(self):
|
|
208
|
-
return self._pr_curve
|
|
209
|
-
|
|
210
|
-
def prediction_table(self):
|
|
211
|
-
return self._prediction_table
|
|
212
|
-
|
|
213
|
-
def confusion_matrix(self):
|
|
214
|
-
return self._confusion_matrix
|
|
215
|
-
|
|
216
|
-
def frequently_confused(self):
|
|
217
|
-
return self._frequently_confused
|
|
218
|
-
|
|
219
|
-
def confidence_score_profile(self):
|
|
220
|
-
return self._confidence_score_profile
|
|
221
|
-
|
|
222
|
-
def calibration_curve(self):
|
|
223
|
-
return self._calibration_curve
|
|
224
|
-
|
|
225
|
-
def scores_tp_and_fp(self):
|
|
226
|
-
return self._scores_tp_and_fp
|
|
227
|
-
|
|
228
|
-
def maximum_calibration_error(self):
|
|
229
|
-
return self._maximum_calibration_error
|
|
230
|
-
|
|
231
|
-
def expected_calibration_error(self):
|
|
232
|
-
return self._expected_calibration_error
|
|
233
|
-
|
|
234
|
-
def get_f1_optimal_conf(self):
|
|
235
|
-
return self.f1_optimal_conf, self.best_f1
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
class _MetricProvider:
|
|
239
|
-
def __init__(self, matches: list, coco_metrics: dict, params: dict, cocoGt, cocoDt):
|
|
240
|
-
"""
|
|
241
|
-
type cocoGt: COCO
|
|
242
|
-
type cocoDt: COCO
|
|
243
|
-
"""
|
|
244
|
-
|
|
245
|
-
self.cocoGt = cocoGt
|
|
246
|
-
|
|
247
|
-
# metainfo
|
|
248
|
-
self.cat_ids = cocoGt.getCatIds()
|
|
249
|
-
self.cat_names = [cocoGt.cats[cat_id]["name"] for cat_id in self.cat_ids]
|
|
250
|
-
|
|
251
|
-
# eval_data
|
|
252
|
-
self.matches = matches
|
|
253
|
-
self.coco_mAP = coco_metrics["mAP"]
|
|
254
|
-
self.coco_precision = coco_metrics["precision"]
|
|
255
|
-
self.iouThrs = params["iouThrs"]
|
|
256
|
-
self.recThrs = params["recThrs"]
|
|
257
|
-
|
|
258
|
-
# Matches
|
|
259
|
-
self.tp_matches = [m for m in self.matches if m["type"] == "TP"]
|
|
260
|
-
self.fp_matches = [m for m in self.matches if m["type"] == "FP"]
|
|
261
|
-
self.fn_matches = [m for m in self.matches if m["type"] == "FN"]
|
|
262
|
-
self.confused_matches = [m for m in self.fp_matches if m["miss_cls"]]
|
|
263
|
-
self.fp_not_confused_matches = [m for m in self.fp_matches if not m["miss_cls"]]
|
|
264
|
-
self.ious = np.array([m["iou"] for m in self.tp_matches])
|
|
265
|
-
|
|
266
|
-
def _init_counts(self):
|
|
267
|
-
cat_ids = self.cat_ids
|
|
268
|
-
iouThrs = self.iouThrs
|
|
269
|
-
cat_id_to_idx = {cat_id: idx for idx, cat_id in enumerate(cat_ids)}
|
|
270
|
-
ious = []
|
|
271
|
-
cats = []
|
|
272
|
-
for match in self.tp_matches:
|
|
273
|
-
ious.append(match["iou"])
|
|
274
|
-
cats.append(cat_id_to_idx[match["category_id"]])
|
|
275
|
-
ious = np.array(ious) + np.spacing(1)
|
|
276
|
-
iou_idxs = np.searchsorted(iouThrs, ious) - 1
|
|
277
|
-
cats = np.array(cats)
|
|
278
|
-
# TP
|
|
279
|
-
true_positives = np.histogram2d(
|
|
280
|
-
cats,
|
|
281
|
-
iou_idxs,
|
|
282
|
-
bins=(len(cat_ids), len(iouThrs)),
|
|
283
|
-
range=((0, len(cat_ids)), (0, len(iouThrs))),
|
|
284
|
-
)[0].astype(int)
|
|
285
|
-
true_positives = true_positives[:, ::-1].cumsum(1)[:, ::-1]
|
|
286
|
-
tp_count = true_positives[:, 0]
|
|
287
|
-
# FN
|
|
288
|
-
cats_fn = np.array([cat_id_to_idx[match["category_id"]] for match in self.fn_matches])
|
|
289
|
-
if cats_fn.size == 0:
|
|
290
|
-
fn_count = np.zeros((len(cat_ids),), dtype=int)
|
|
291
|
-
else:
|
|
292
|
-
fn_count = np.bincount(cats_fn, minlength=len(cat_ids)).astype(int)
|
|
293
|
-
gt_count = fn_count + tp_count
|
|
294
|
-
false_negatives = gt_count[:, None] - true_positives
|
|
295
|
-
# FP
|
|
296
|
-
cats_fp = np.array([cat_id_to_idx[match["category_id"]] for match in self.fp_matches])
|
|
297
|
-
if cats_fp.size == 0:
|
|
298
|
-
fp_count = np.zeros((len(cat_ids),), dtype=int)
|
|
299
|
-
else:
|
|
300
|
-
fp_count = np.bincount(cats_fp, minlength=len(cat_ids)).astype(int)
|
|
301
|
-
dt_count = fp_count + tp_count
|
|
302
|
-
false_positives = dt_count[:, None] - true_positives
|
|
303
|
-
|
|
304
|
-
self.true_positives = true_positives
|
|
305
|
-
self.false_negatives = false_negatives
|
|
306
|
-
self.false_positives = false_positives
|
|
307
|
-
self.TP_count = int(self.true_positives[:, 0].sum(0))
|
|
308
|
-
self.FP_count = int(self.false_positives[:, 0].sum(0))
|
|
309
|
-
self.FN_count = int(self.false_negatives[:, 0].sum(0))
|
|
310
|
-
|
|
311
|
-
def base_metrics(self):
|
|
312
|
-
tp = self.true_positives
|
|
313
|
-
fp = self.false_positives
|
|
314
|
-
fn = self.false_negatives
|
|
315
|
-
confuse_count = len(self.confused_matches)
|
|
316
|
-
|
|
317
|
-
mAP = self.coco_mAP
|
|
318
|
-
precision = tp / (tp + fp)
|
|
319
|
-
recall = tp / (tp + fn)
|
|
320
|
-
with warnings.catch_warnings():
|
|
321
|
-
warnings.simplefilter("ignore")
|
|
322
|
-
f1 = 2 * precision * recall / (precision + recall)
|
|
323
|
-
f1[(precision + recall) == 0.0] = 0.0
|
|
324
|
-
iou = np.mean(self.ious)
|
|
325
|
-
classification_accuracy = self.TP_count / (self.TP_count + confuse_count)
|
|
326
|
-
|
|
327
|
-
return {
|
|
328
|
-
"mAP": mAP,
|
|
329
|
-
"f1": np.nanmean(f1),
|
|
330
|
-
"precision": np.nanmean(precision),
|
|
331
|
-
"recall": np.nanmean(recall),
|
|
332
|
-
"iou": iou,
|
|
333
|
-
"classification_accuracy": classification_accuracy,
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
def per_class_metrics(self):
|
|
337
|
-
tp = self.true_positives.mean(1)
|
|
338
|
-
fp = self.false_positives.mean(1)
|
|
339
|
-
fn = self.false_negatives.mean(1)
|
|
340
|
-
pr = tp / (tp + fp)
|
|
341
|
-
rc = tp / (tp + fn)
|
|
342
|
-
f1 = 2 * pr * rc / (pr + rc)
|
|
343
|
-
return pd.DataFrame({"category": self.cat_names, "precision": pr, "recall": rc, "f1": f1})
|
|
344
|
-
|
|
345
|
-
def pr_curve(self):
|
|
346
|
-
pr_curve = self.coco_precision[:, :, :, 0, 2].mean(0)
|
|
347
|
-
return pr_curve
|
|
348
|
-
|
|
349
|
-
def prediction_table(self):
|
|
350
|
-
img_ids, outcomes_per_image = _get_outcomes_per_image(self.matches, self.cocoGt)
|
|
351
|
-
sly_ids = [self.cocoGt.imgs[img_id]["sly_id"] for img_id in img_ids]
|
|
352
|
-
image_names = [self.cocoGt.imgs[img_id]["file_name"] for img_id in img_ids]
|
|
353
|
-
n_gt = outcomes_per_image[:, 0] + outcomes_per_image[:, 2]
|
|
354
|
-
n_dt = outcomes_per_image[:, 0] + outcomes_per_image[:, 1]
|
|
355
|
-
with warnings.catch_warnings():
|
|
356
|
-
warnings.simplefilter("ignore")
|
|
357
|
-
precision_per_image = outcomes_per_image[:, 0] / n_dt
|
|
358
|
-
recall_per_image = outcomes_per_image[:, 0] / n_gt
|
|
359
|
-
f1_per_image = (
|
|
360
|
-
2
|
|
361
|
-
* precision_per_image
|
|
362
|
-
* recall_per_image
|
|
363
|
-
/ (precision_per_image + recall_per_image)
|
|
364
|
-
)
|
|
365
|
-
prediction_table = pd.DataFrame(
|
|
366
|
-
{
|
|
367
|
-
"Sly ID": sly_ids,
|
|
368
|
-
"Image name": image_names,
|
|
369
|
-
"GT objects": n_gt,
|
|
370
|
-
"Predictions": n_dt,
|
|
371
|
-
"TP": outcomes_per_image[:, 0],
|
|
372
|
-
"FP": outcomes_per_image[:, 1],
|
|
373
|
-
"FN": outcomes_per_image[:, 2],
|
|
374
|
-
"Precision": precision_per_image,
|
|
375
|
-
"Recall": recall_per_image,
|
|
376
|
-
"F1": f1_per_image,
|
|
377
|
-
}
|
|
378
|
-
)
|
|
379
|
-
return prediction_table
|
|
380
|
-
|
|
381
|
-
def confusion_matrix(self):
|
|
382
|
-
K = len(self.cat_ids)
|
|
383
|
-
cat_id_to_idx = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
|
|
384
|
-
|
|
385
|
-
confusion_matrix = np.zeros((K + 1, K + 1), dtype=int)
|
|
386
|
-
|
|
387
|
-
for m in self.confused_matches:
|
|
388
|
-
cat_idx_pred = cat_id_to_idx[m["category_id"]]
|
|
389
|
-
cat_idx_gt = cat_id_to_idx[self.cocoGt.anns[m["gt_id"]]["category_id"]]
|
|
390
|
-
confusion_matrix[cat_idx_pred, cat_idx_gt] += 1
|
|
391
|
-
|
|
392
|
-
for m in self.tp_matches:
|
|
393
|
-
cat_idx = cat_id_to_idx[m["category_id"]]
|
|
394
|
-
confusion_matrix[cat_idx, cat_idx] += 1
|
|
395
|
-
|
|
396
|
-
for m in self.fp_not_confused_matches:
|
|
397
|
-
cat_idx_pred = cat_id_to_idx[m["category_id"]]
|
|
398
|
-
confusion_matrix[cat_idx_pred, -1] += 1
|
|
399
|
-
|
|
400
|
-
for m in self.fn_matches:
|
|
401
|
-
cat_idx_gt = cat_id_to_idx[m["category_id"]]
|
|
402
|
-
confusion_matrix[-1, cat_idx_gt] += 1
|
|
403
|
-
|
|
404
|
-
return confusion_matrix
|
|
405
|
-
|
|
406
|
-
def frequently_confused(self, confusion_matrix, topk_pairs=20):
|
|
407
|
-
# Frequently confused class pairs
|
|
408
|
-
cat_id_enum = {i: cat_id for i, cat_id in enumerate(self.cat_ids)}
|
|
409
|
-
cm = confusion_matrix[:-1, :-1]
|
|
410
|
-
cm_l = np.tril(cm, -1)
|
|
411
|
-
cm_u = np.triu(cm, 1)
|
|
412
|
-
cm = cm_l + cm_u.T
|
|
413
|
-
cm_flat = cm.flatten()
|
|
414
|
-
inds_sort = np.argsort(-cm_flat)[:topk_pairs]
|
|
415
|
-
inds_sort = inds_sort[cm_flat[inds_sort] > 0] # remove zeros
|
|
416
|
-
inds_sort = np.unravel_index(inds_sort, cm.shape)
|
|
417
|
-
|
|
418
|
-
# probability of confusion: (predicted A, actually B + predicted B, actually A) / (predicted A + predicted B)
|
|
419
|
-
confused_counts = cm[inds_sort]
|
|
420
|
-
dt_total = confusion_matrix.sum(1)
|
|
421
|
-
dt_pair_sum = np.array([dt_total[i] + dt_total[j] for i, j in zip(*inds_sort)])
|
|
422
|
-
confused_prob = confused_counts / dt_pair_sum
|
|
423
|
-
inds_sort2 = np.argsort(-confused_prob)
|
|
424
|
-
|
|
425
|
-
confused_idxs = np.array(inds_sort).T[inds_sort2]
|
|
426
|
-
confused_name_pairs = [(self.cat_names[i], self.cat_names[j]) for i, j in confused_idxs]
|
|
427
|
-
confused_counts = confused_counts[inds_sort2]
|
|
428
|
-
confused_prob = confused_prob[inds_sort2]
|
|
429
|
-
confused_catIds = [(cat_id_enum[i], cat_id_enum[j]) for i, j in confused_idxs]
|
|
430
|
-
|
|
431
|
-
return pd.DataFrame(
|
|
432
|
-
{
|
|
433
|
-
"category_pair": confused_name_pairs,
|
|
434
|
-
"category_id_pair": confused_catIds,
|
|
435
|
-
"count": confused_counts,
|
|
436
|
-
"probability": confused_prob,
|
|
437
|
-
}
|
|
438
|
-
)
|
|
439
|
-
|
|
440
|
-
def _calculate_score_profile(self):
|
|
441
|
-
iouThrs = self.iouThrs
|
|
442
|
-
n_gt = len(self.tp_matches) + len(self.fn_matches)
|
|
443
|
-
matches_sorted = sorted(
|
|
444
|
-
self.tp_matches + self.fp_matches, key=lambda x: x["score"], reverse=True
|
|
445
|
-
)
|
|
446
|
-
scores = np.array([m["score"] for m in matches_sorted])
|
|
447
|
-
ious = np.array([m["iou"] if m["type"] == "TP" else 0.0 for m in matches_sorted])
|
|
448
|
-
iou_idxs = np.searchsorted(iouThrs, ious + np.spacing(1))
|
|
449
|
-
|
|
450
|
-
# Check
|
|
451
|
-
tps = np.array([m["type"] == "TP" for m in matches_sorted])
|
|
452
|
-
assert np.all(iou_idxs[tps] > 0)
|
|
453
|
-
assert np.all(iou_idxs[~tps] == 0)
|
|
454
|
-
|
|
455
|
-
f1s = []
|
|
456
|
-
pr_line = np.zeros(len(scores))
|
|
457
|
-
rc_line = np.zeros(len(scores))
|
|
458
|
-
for iou_idx, iou_th in enumerate(iouThrs):
|
|
459
|
-
tps = iou_idxs > iou_idx
|
|
460
|
-
fps = ~tps
|
|
461
|
-
tps_sum = np.cumsum(tps)
|
|
462
|
-
fps_sum = np.cumsum(fps)
|
|
463
|
-
precision = tps_sum / (tps_sum + fps_sum)
|
|
464
|
-
recall = tps_sum / n_gt
|
|
465
|
-
f1 = 2 * precision * recall / (precision + recall)
|
|
466
|
-
pr_line = pr_line + precision
|
|
467
|
-
rc_line = rc_line + recall
|
|
468
|
-
f1s.append(f1)
|
|
469
|
-
pr_line /= len(iouThrs)
|
|
470
|
-
rc_line /= len(iouThrs)
|
|
471
|
-
f1s = np.array(f1s)
|
|
472
|
-
# f1_line = f1s.mean(axis=0)
|
|
473
|
-
f1_line = np.nanmean(f1s, axis=0)
|
|
474
|
-
self.score_profile = {
|
|
475
|
-
"scores": scores,
|
|
476
|
-
"precision": pr_line,
|
|
477
|
-
"recall": rc_line,
|
|
478
|
-
"f1": f1_line,
|
|
479
|
-
}
|
|
480
|
-
self.score_profile_f1s = f1s
|
|
481
|
-
|
|
482
|
-
self.iou_idxs = iou_idxs
|
|
483
|
-
self.scores = scores
|
|
484
|
-
self.y_true = iou_idxs > 0
|
|
485
|
-
|
|
486
|
-
# def confidence_score_profile_v0(self):
|
|
487
|
-
# n_gt = len(self.tp_matches) + len(self.fn_matches)
|
|
488
|
-
# matches_sorted = sorted(self.tp_matches + self.fp_matches, key=lambda x: x['score'], reverse=True)
|
|
489
|
-
# scores = np.array([m["score"] for m in matches_sorted])
|
|
490
|
-
# tps = np.array([m["type"] == "TP" for m in matches_sorted])
|
|
491
|
-
# fps = ~tps
|
|
492
|
-
# tps_sum = np.cumsum(tps)
|
|
493
|
-
# fps_sum = np.cumsum(fps)
|
|
494
|
-
# precision = tps_sum / (tps_sum + fps_sum)
|
|
495
|
-
# recall = tps_sum / n_gt
|
|
496
|
-
# f1 = 2 * precision * recall / (precision + recall)
|
|
497
|
-
# return {
|
|
498
|
-
# "scores": scores,
|
|
499
|
-
# "precision": precision,
|
|
500
|
-
# "recall": recall,
|
|
501
|
-
# "f1": f1
|
|
502
|
-
# }
|
|
503
|
-
|
|
504
|
-
def confidence_score_profile(self):
|
|
505
|
-
return self.score_profile
|
|
506
|
-
|
|
507
|
-
def get_f1_optimal_conf(self):
|
|
508
|
-
if (~np.isnan(self.score_profile["f1"])).sum() == 0:
|
|
509
|
-
return None, None
|
|
510
|
-
argmax = np.nanargmax(self.score_profile["f1"])
|
|
511
|
-
f1_optimal_conf = self.score_profile["scores"][argmax]
|
|
512
|
-
best_f1 = self.score_profile["f1"][argmax]
|
|
513
|
-
return f1_optimal_conf, best_f1
|
|
514
|
-
|
|
515
|
-
def calibration_curve(self):
|
|
516
|
-
from sklearn.calibration import ( # pylint: disable=import-error
|
|
517
|
-
calibration_curve,
|
|
518
|
-
)
|
|
519
|
-
|
|
520
|
-
true_probs, pred_probs = calibration_curve(self.y_true, self.scores, n_bins=10)
|
|
521
|
-
return true_probs, pred_probs
|
|
522
|
-
|
|
523
|
-
def maximum_calibration_error(self):
|
|
524
|
-
return metrics.maximum_calibration_error(self.y_true, self.scores, n_bins=10)
|
|
525
|
-
|
|
526
|
-
def expected_calibration_error(self):
|
|
527
|
-
return metrics.expected_calibration_error(self.y_true, self.scores, n_bins=10)
|
|
528
|
-
|
|
529
|
-
def scores_tp_and_fp(self):
|
|
530
|
-
tps = self.y_true
|
|
531
|
-
scores_tp = self.scores[tps]
|
|
532
|
-
scores_fp = self.scores[~tps]
|
|
533
|
-
return scores_tp, scores_fp
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|