supervisely 6.73.293__py3-none-any.whl → 6.73.295__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervisely might be problematic. Click here for more details.
- supervisely/api/image_api.py +5 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/avg_precision_by_class.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +10 -0
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/explore_predictions.py +2 -2
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +14 -8
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +2 -2
- supervisely/nn/benchmark/instance_segmentation/evaluation_params.yaml +6 -1
- supervisely/nn/benchmark/instance_segmentation/text_templates.py +4 -4
- supervisely/nn/benchmark/object_detection/base_vis_metric.py +1 -1
- supervisely/nn/benchmark/object_detection/evaluation_params.yaml +6 -1
- supervisely/nn/benchmark/object_detection/evaluator.py +1 -3
- supervisely/nn/benchmark/object_detection/metric_provider.py +59 -46
- supervisely/nn/benchmark/object_detection/text_templates.py +4 -4
- supervisely/nn/benchmark/object_detection/vis_metrics/confidence_distribution.py +20 -2
- supervisely/nn/benchmark/object_detection/vis_metrics/confidence_score.py +16 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/explore_predictions.py +10 -5
- supervisely/nn/benchmark/object_detection/vis_metrics/key_metrics.py +1 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/model_predictions.py +1 -1
- supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts.py +2 -57
- supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts_per_class.py +1 -1
- supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +11 -3
- supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve.py +1 -1
- supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +18 -8
- supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +13 -3
- supervisely/nn/benchmark/object_detection/visualizer.py +1 -1
- supervisely/nn/benchmark/utils/__init__.py +0 -1
- supervisely/nn/benchmark/utils/detection/__init__.py +1 -2
- supervisely/nn/benchmark/utils/detection/calculate_metrics.py +31 -37
- supervisely/nn/benchmark/visualization/evaluation_result.py +2 -4
- supervisely/nn/benchmark/visualization/vis_click_data.py +1 -3
- {supervisely-6.73.293.dist-info → supervisely-6.73.295.dist-info}/METADATA +1 -1
- {supervisely-6.73.293.dist-info → supervisely-6.73.295.dist-info}/RECORD +38 -39
- supervisely/nn/benchmark/utils/detection/metric_provider.py +0 -533
- {supervisely-6.73.293.dist-info → supervisely-6.73.295.dist-info}/LICENSE +0 -0
- {supervisely-6.73.293.dist-info → supervisely-6.73.295.dist-info}/WHEEL +0 -0
- {supervisely-6.73.293.dist-info → supervisely-6.73.295.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.293.dist-info → supervisely-6.73.295.dist-info}/top_level.txt +0 -0
supervisely/api/image_api.py
CHANGED
|
@@ -391,6 +391,7 @@ class ImageApi(RemoveableBulkModuleApi):
|
|
|
391
391
|
project_id: Optional[int] = None,
|
|
392
392
|
only_labelled: Optional[bool] = False,
|
|
393
393
|
fields: Optional[List[str]] = None,
|
|
394
|
+
recursive: Optional[bool] = False,
|
|
394
395
|
) -> List[ImageInfo]:
|
|
395
396
|
"""
|
|
396
397
|
List of Images in the given :class:`Dataset<supervisely.project.project.Dataset>`.
|
|
@@ -415,6 +416,8 @@ class ImageApi(RemoveableBulkModuleApi):
|
|
|
415
416
|
:type only_labelled: bool, optional
|
|
416
417
|
:param fields: List of fields to return. If None, returns all fields.
|
|
417
418
|
:type fields: List[str], optional
|
|
419
|
+
:param recursive: If True, returns all images from dataset recursively (including images in nested datasets).
|
|
420
|
+
:type recursive: bool, optional
|
|
418
421
|
:return: Objects with image information from Supervisely.
|
|
419
422
|
:rtype: :class:`List[ImageInfo]<ImageInfo>`
|
|
420
423
|
:Usage example:
|
|
@@ -474,6 +477,7 @@ class ImageApi(RemoveableBulkModuleApi):
|
|
|
474
477
|
ApiField.SORT: sort,
|
|
475
478
|
ApiField.SORT_ORDER: sort_order,
|
|
476
479
|
ApiField.FORCE_METADATA_FOR_LINKS: force_metadata_for_links,
|
|
480
|
+
ApiField.RECURSIVE: recursive,
|
|
477
481
|
}
|
|
478
482
|
if only_labelled:
|
|
479
483
|
data[ApiField.FILTERS] = [
|
|
@@ -4537,7 +4541,7 @@ class ImageApi(RemoveableBulkModuleApi):
|
|
|
4537
4541
|
loop = sly.utils.get_or_create_event_loop()
|
|
4538
4542
|
images = loop.run_until_complete(api.image.get_list_async(123456, per_page=600))
|
|
4539
4543
|
"""
|
|
4540
|
-
|
|
4544
|
+
|
|
4541
4545
|
method = "images.list"
|
|
4542
4546
|
dataset_info = kwargs.get("dataset_info", None)
|
|
4543
4547
|
|
supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/avg_precision_by_class.py
CHANGED
|
@@ -114,7 +114,7 @@ class AveragePrecisionByClass(BaseVisMetrics):
|
|
|
114
114
|
{
|
|
115
115
|
"type": "tag",
|
|
116
116
|
"tagId": "confidence",
|
|
117
|
-
"value": [eval_result.mp.
|
|
117
|
+
"value": [eval_result.mp.conf_threshold, 1],
|
|
118
118
|
},
|
|
119
119
|
{"type": "tag", "tagId": "outcome", "value": "TP"},
|
|
120
120
|
{"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
|
supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py
CHANGED
|
@@ -154,6 +154,16 @@ class CalibrationScore(BaseVisMetrics):
|
|
|
154
154
|
line=dict(color="gray", width=2, dash="dash"),
|
|
155
155
|
name=f"F1-optimal threshold ({eval_result.name})",
|
|
156
156
|
)
|
|
157
|
+
if eval_result.mp.custom_conf_threshold is not None:
|
|
158
|
+
fig.add_shape(
|
|
159
|
+
type="line",
|
|
160
|
+
x0=eval_result.mp.custom_conf_threshold,
|
|
161
|
+
x1=eval_result.mp.custom_conf_threshold,
|
|
162
|
+
y0=0,
|
|
163
|
+
y1=eval_result.mp.custom_f1,
|
|
164
|
+
line=dict(color="black", width=2, dash="dash"),
|
|
165
|
+
name=f"Confidence threshold ({eval_result.name})",
|
|
166
|
+
)
|
|
157
167
|
|
|
158
168
|
# Update the layout
|
|
159
169
|
fig.update_layout(
|
supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/explore_predictions.py
CHANGED
|
@@ -76,7 +76,7 @@ class ExplorePredictions(BaseVisMetrics):
|
|
|
76
76
|
anns = eval_res.api.annotation.download_batch(dataset_info.id, images_ids, force_metadata_for_links=False)
|
|
77
77
|
annotations.append(anns)
|
|
78
78
|
skip_tags_filtering.append(False)
|
|
79
|
-
min_conf = min(min_conf, eval_res.mp.
|
|
79
|
+
min_conf = min(min_conf, eval_res.mp.conf_threshold)
|
|
80
80
|
|
|
81
81
|
images = list(i for x in zip(*images) for i in x)
|
|
82
82
|
annotations = list(i for x in zip(*annotations) for i in x)
|
|
@@ -127,7 +127,7 @@ class ExplorePredictions(BaseVisMetrics):
|
|
|
127
127
|
current_images_infos = sorted(current_images_infos, key=lambda x: names.index(x.name))
|
|
128
128
|
images_ids.append([image_info.id for image_info in current_images_infos])
|
|
129
129
|
|
|
130
|
-
min_conf = min(min_conf, eval_res.mp.
|
|
130
|
+
min_conf = min(min_conf, eval_res.mp.conf_threshold)
|
|
131
131
|
|
|
132
132
|
explore["imagesIds"] = list(i for x in zip(*images_ids) for i in x)
|
|
133
133
|
explore["filters"] = [{"type": "tag", "tagId": "confidence", "value": [min_conf, 1]}]
|
|
@@ -276,7 +276,7 @@ class OutcomeCounts(BaseVisMetrics):
|
|
|
276
276
|
title = f"{model_name}. {outcome}: {len(obj_ids)} object{'s' if len(obj_ids) > 1 else ''}"
|
|
277
277
|
outcome_dict["title"] = title
|
|
278
278
|
outcome_dict["imagesIds"] = list(img_ids)
|
|
279
|
-
thr = eval_result.mp.
|
|
279
|
+
thr = eval_result.mp.conf_threshold
|
|
280
280
|
if outcome == "FN":
|
|
281
281
|
outcome_dict["filters"] = [
|
|
282
282
|
{"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
|
|
@@ -27,6 +27,7 @@ class Overview(BaseVisMetrics):
|
|
|
27
27
|
evaluation result metrics displayed
|
|
28
28
|
"""
|
|
29
29
|
super().__init__(vis_texts, eval_results)
|
|
30
|
+
self.team_id = None # will be set in the visualizer
|
|
30
31
|
|
|
31
32
|
@property
|
|
32
33
|
def overview_md(self) -> List[MarkdownWidget]:
|
|
@@ -120,8 +121,7 @@ class Overview(BaseVisMetrics):
|
|
|
120
121
|
if idx == 3 and not same_iou_thr:
|
|
121
122
|
continue
|
|
122
123
|
metric_name = metric_renames_map.get(metric, metric)
|
|
123
|
-
values = [m
|
|
124
|
-
values = [v if v is not None else "―" for v in values]
|
|
124
|
+
values = [m.get(metric, "―") for m in all_metrics]
|
|
125
125
|
values = [round(v, 2) if isinstance(v, float) else v for v in values]
|
|
126
126
|
row = [metric_name] + values
|
|
127
127
|
dct = {"row": row, "id": metric, "items": row}
|
|
@@ -247,12 +247,18 @@ class Overview(BaseVisMetrics):
|
|
|
247
247
|
|
|
248
248
|
iou_thrs_map = defaultdict(set)
|
|
249
249
|
matched = True
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
250
|
+
|
|
251
|
+
if not all([not r.different_iou_thresholds_per_class for r in self.eval_results]):
|
|
252
|
+
matched = False
|
|
253
|
+
else:
|
|
254
|
+
for eval_result in self.eval_results:
|
|
255
|
+
iou_thrs_per_class = eval_result.mp.iou_threshold_per_class
|
|
256
|
+
if iou_thrs_per_class is not None:
|
|
257
|
+
for cat_id, iou_thr in eval_result.mp.iou_threshold_per_class.items():
|
|
258
|
+
iou_thrs_map[cat_id].add(iou_thr)
|
|
259
|
+
if len(iou_thrs_map[cat_id]) > 1:
|
|
260
|
+
matched = False
|
|
261
|
+
break
|
|
256
262
|
|
|
257
263
|
if matched:
|
|
258
264
|
return None
|
|
@@ -19,7 +19,7 @@ class PrCurve(BaseVisMetrics):
|
|
|
19
19
|
@property
|
|
20
20
|
def markdown_widget(self) -> MarkdownWidget:
|
|
21
21
|
text: str = getattr(self.vis_texts, self.MARKDOWN_PR_CURVE).format(
|
|
22
|
-
self.vis_texts.definitions.
|
|
22
|
+
self.vis_texts.definitions.about_pr_tradeoffs
|
|
23
23
|
)
|
|
24
24
|
return MarkdownWidget(
|
|
25
25
|
name=self.MARKDOWN_PR_CURVE, title="mAP & Precision-Recall Curve", text=text
|
supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py
CHANGED
|
@@ -205,7 +205,7 @@ class PrecisionRecallF1(BaseVisMetrics):
|
|
|
205
205
|
{
|
|
206
206
|
"type": "tag",
|
|
207
207
|
"tagId": "confidence",
|
|
208
|
-
"value": [eval_result.mp.
|
|
208
|
+
"value": [eval_result.mp.conf_threshold, 1],
|
|
209
209
|
},
|
|
210
210
|
{"type": "tag", "tagId": "outcome", "value": "TP"},
|
|
211
211
|
{"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
|
|
@@ -293,7 +293,7 @@ class PrecisionRecallF1(BaseVisMetrics):
|
|
|
293
293
|
{
|
|
294
294
|
"type": "tag",
|
|
295
295
|
"tagId": "confidence",
|
|
296
|
-
"value": [eval_result.mp.
|
|
296
|
+
"value": [eval_result.mp.conf_threshold, 1],
|
|
297
297
|
},
|
|
298
298
|
{"type": "tag", "tagId": "outcome", "value": "TP"},
|
|
299
299
|
{"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
|
|
@@ -1,2 +1,7 @@
|
|
|
1
|
-
# Intersection over Union threshold that will be used for
|
|
1
|
+
# Intersection over Union threshold that will be used for object matching.
|
|
2
|
+
# It mostly affects visualizations, such as Outcome Counts, Confusion Matrix, and image previews.
|
|
2
3
|
iou_threshold: 0.5
|
|
4
|
+
|
|
5
|
+
# Confidence threshold.
|
|
6
|
+
# Set 'auto' to calculate the optimal confidence threshold.
|
|
7
|
+
confidence_threshold: auto
|
|
@@ -43,7 +43,7 @@ markdown_overview = """
|
|
|
43
43
|
- **Ground Truth project**: <a href="/projects/{}/datasets" target="_blank">{}</a>, {}{}
|
|
44
44
|
{}
|
|
45
45
|
- **IoU threshold**: {}
|
|
46
|
-
|
|
46
|
+
{}
|
|
47
47
|
- **Averaging across IoU thresholds:** {}, <a href="{}" target="_blank">learn more</a>.
|
|
48
48
|
|
|
49
49
|
Learn more about Model Benchmark, implementation details, and how to use the charts in our <a href="{}" target="_blank">Technical Report</a>.
|
|
@@ -73,7 +73,7 @@ In this section you can visually assess the model performance through examples.
|
|
|
73
73
|
|
|
74
74
|
> Click on the image to view the **Ground Truth**, **Prediction**, and **Difference** annotations side-by-side.
|
|
75
75
|
|
|
76
|
-
> Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences).
|
|
76
|
+
> Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences). {}
|
|
77
77
|
"""
|
|
78
78
|
|
|
79
79
|
markdown_predictions_table = """### Prediction details for every image
|
|
@@ -100,7 +100,7 @@ To measure this, we calculate **Recall**. Recall counts errors, when the model d
|
|
|
100
100
|
"""
|
|
101
101
|
|
|
102
102
|
notification_recall = {
|
|
103
|
-
"title": "
|
|
103
|
+
"title": "{}",
|
|
104
104
|
"description": "The model correctly found <b>{} of {}</b> total instances in the dataset.",
|
|
105
105
|
}
|
|
106
106
|
|
|
@@ -123,7 +123,7 @@ To measure this, we calculate **Precision**. Precision counts errors, when the m
|
|
|
123
123
|
"""
|
|
124
124
|
|
|
125
125
|
notification_precision = {
|
|
126
|
-
"title": "
|
|
126
|
+
"title": "{}",
|
|
127
127
|
"description": "The model correctly predicted <b>{} of {}</b> predictions made by the model in total.",
|
|
128
128
|
}
|
|
129
129
|
|
|
@@ -42,7 +42,7 @@ class DetectionVisMetric(BaseVisMetric):
|
|
|
42
42
|
{
|
|
43
43
|
"type": "tag",
|
|
44
44
|
"tagId": "confidence",
|
|
45
|
-
"value": [self.eval_result.mp.
|
|
45
|
+
"value": [self.eval_result.mp.conf_threshold, 1],
|
|
46
46
|
},
|
|
47
47
|
{"type": "tag", "tagId": "outcome", "value": "TP"},
|
|
48
48
|
{"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
|
|
@@ -1,2 +1,7 @@
|
|
|
1
|
-
# Intersection over Union threshold that will be used for
|
|
1
|
+
# Intersection over Union threshold that will be used for object matching.
|
|
2
|
+
# It mostly affects visualizations, such as Outcome Counts, Confusion Matrix, and image previews.
|
|
2
3
|
iou_threshold: 0.5
|
|
4
|
+
|
|
5
|
+
# Confidence threshold.
|
|
6
|
+
# Set 'auto' to calculate the optimal confidence threshold.
|
|
7
|
+
confidence_threshold: auto
|
|
@@ -55,9 +55,7 @@ class ObjectDetectionEvalResult(BaseEvalResult):
|
|
|
55
55
|
self.coco_gt, self.coco_dt = read_coco_datasets(self.coco_gt, self.coco_dt)
|
|
56
56
|
|
|
57
57
|
self.mp = MetricProvider(
|
|
58
|
-
self.eval_data
|
|
59
|
-
self.eval_data["coco_metrics"],
|
|
60
|
-
self.eval_data["params"],
|
|
58
|
+
self.eval_data,
|
|
61
59
|
self.coco_gt,
|
|
62
60
|
self.coco_dt,
|
|
63
61
|
)
|
|
@@ -56,7 +56,7 @@ def filter_by_conf(matches: list, conf: float):
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class MetricProvider:
|
|
59
|
-
def __init__(self,
|
|
59
|
+
def __init__(self, eval_data: dict, cocoGt, cocoDt):
|
|
60
60
|
"""
|
|
61
61
|
Main class for calculating prediction metrics.
|
|
62
62
|
|
|
@@ -71,11 +71,16 @@ class MetricProvider:
|
|
|
71
71
|
:param cocoDt: COCO object with predicted annotations
|
|
72
72
|
:type cocoDt: COCO
|
|
73
73
|
"""
|
|
74
|
-
self.
|
|
75
|
-
self.
|
|
76
|
-
self.
|
|
74
|
+
self.eval_data = eval_data
|
|
75
|
+
self.matches = eval_data["matches"]
|
|
76
|
+
self.coco_metrics = eval_data["coco_metrics"]
|
|
77
|
+
self.params = eval_data["params"]
|
|
77
78
|
self.cocoGt = cocoGt
|
|
78
79
|
self.cocoDt = cocoDt
|
|
80
|
+
self.coco_mAP = self.coco_metrics["mAP"]
|
|
81
|
+
self.coco_precision = self.coco_metrics["precision"]
|
|
82
|
+
self.iouThrs = self.params["iouThrs"]
|
|
83
|
+
self.recThrs = self.params["recThrs"]
|
|
79
84
|
|
|
80
85
|
self.metric_names = METRIC_NAMES
|
|
81
86
|
|
|
@@ -83,41 +88,31 @@ class MetricProvider:
|
|
|
83
88
|
self.cat_ids = cocoGt.getCatIds()
|
|
84
89
|
self.cat_names = [cocoGt.cats[cat_id]["name"] for cat_id in self.cat_ids]
|
|
85
90
|
|
|
86
|
-
# eval_data
|
|
87
|
-
self.matches = matches
|
|
88
|
-
self.coco_mAP = coco_metrics["mAP"]
|
|
89
|
-
self.coco_precision = coco_metrics["precision"]
|
|
90
|
-
self.iouThrs = params["iouThrs"]
|
|
91
|
-
self.recThrs = params["recThrs"]
|
|
92
|
-
|
|
93
91
|
# Evaluation params
|
|
94
|
-
eval_params = params.get("evaluation_params", {})
|
|
92
|
+
eval_params = self.params.get("evaluation_params", {})
|
|
95
93
|
self.iou_threshold = eval_params.get("iou_threshold", 0.5)
|
|
96
94
|
self.iou_threshold_idx = np.where(np.isclose(self.iouThrs, self.iou_threshold))[0][0]
|
|
97
95
|
self.iou_threshold_per_class = eval_params.get("iou_threshold_per_class")
|
|
98
|
-
self.iou_idx_per_class = params.get("iou_idx_per_class") # {cat id: iou_idx}
|
|
99
|
-
if self.iou_threshold_per_class is not None:
|
|
100
|
-
# TODO: temporary solution
|
|
101
|
-
eval_params["average_across_iou_thresholds"] = False
|
|
96
|
+
self.iou_idx_per_class = self.params.get("iou_idx_per_class") # {cat id: iou_idx}
|
|
102
97
|
self.average_across_iou_thresholds = eval_params.get("average_across_iou_thresholds", True)
|
|
103
98
|
|
|
104
99
|
def calculate(self):
|
|
105
|
-
self.m_full = _MetricProvider(
|
|
106
|
-
self.matches, self.coco_metrics, self.params, self.cocoGt, self.cocoDt
|
|
107
|
-
)
|
|
100
|
+
self.m_full = _MetricProvider(self.matches, self.eval_data, self.cocoGt, self.cocoDt)
|
|
108
101
|
self.m_full._calculate_score_profile()
|
|
109
102
|
|
|
110
103
|
# Find optimal confidence threshold
|
|
111
104
|
self.f1_optimal_conf, self.best_f1 = self.m_full.get_f1_optimal_conf()
|
|
105
|
+
self.custom_conf_threshold, self.custom_f1 = self.m_full.get_custom_conf_threshold()
|
|
106
|
+
|
|
107
|
+
# Confidence threshold that will be used in visualizations
|
|
108
|
+
self.conf_threshold = self.custom_conf_threshold or self.f1_optimal_conf
|
|
112
109
|
|
|
113
110
|
# Filter by optimal confidence threshold
|
|
114
|
-
if self.
|
|
115
|
-
matches_filtered = filter_by_conf(self.matches, self.
|
|
111
|
+
if self.conf_threshold is not None:
|
|
112
|
+
matches_filtered = filter_by_conf(self.matches, self.conf_threshold)
|
|
116
113
|
else:
|
|
117
114
|
matches_filtered = self.matches
|
|
118
|
-
self.m = _MetricProvider(
|
|
119
|
-
matches_filtered, self.coco_metrics, self.params, self.cocoGt, self.cocoDt
|
|
120
|
-
)
|
|
115
|
+
self.m = _MetricProvider(matches_filtered, self.eval_data, self.cocoGt, self.cocoDt)
|
|
121
116
|
self.matches_filtered = matches_filtered
|
|
122
117
|
self.m._init_counts()
|
|
123
118
|
|
|
@@ -155,7 +150,7 @@ class MetricProvider:
|
|
|
155
150
|
ap_by_class = dict(zip(self.cat_names, ap_by_class))
|
|
156
151
|
ap_custom_by_class = self.AP_custom_per_class().tolist()
|
|
157
152
|
ap_custom_by_class = dict(zip(self.cat_names, ap_custom_by_class))
|
|
158
|
-
|
|
153
|
+
data = {
|
|
159
154
|
"mAP": base["mAP"],
|
|
160
155
|
"AP50": self.coco_metrics.get("AP50"),
|
|
161
156
|
"AP75": self.coco_metrics.get("AP75"),
|
|
@@ -172,6 +167,9 @@ class MetricProvider:
|
|
|
172
167
|
"AP_by_class": ap_by_class,
|
|
173
168
|
f"AP{iou_name}_by_class": ap_custom_by_class,
|
|
174
169
|
}
|
|
170
|
+
if self.custom_conf_threshold is not None:
|
|
171
|
+
data["custom_confidence_threshold"] = self.custom_conf_threshold
|
|
172
|
+
return data
|
|
175
173
|
|
|
176
174
|
def key_metrics(self):
|
|
177
175
|
iou_name = int(self.iou_threshold * 100)
|
|
@@ -187,7 +185,7 @@ class MetricProvider:
|
|
|
187
185
|
iou_name = int(self.iou_threshold * 100)
|
|
188
186
|
if self.iou_threshold_per_class is not None:
|
|
189
187
|
iou_name = "_custom"
|
|
190
|
-
|
|
188
|
+
data = {
|
|
191
189
|
"mAP": table["mAP"],
|
|
192
190
|
"AP50": table["AP50"],
|
|
193
191
|
"AP75": table["AP75"],
|
|
@@ -198,8 +196,11 @@ class MetricProvider:
|
|
|
198
196
|
"Avg. IoU": table["iou"],
|
|
199
197
|
"Classification Acc.": table["classification_accuracy"],
|
|
200
198
|
"Calibration Score": table["calibration_score"],
|
|
201
|
-
"
|
|
199
|
+
"Optimal confidence threshold": table["f1_optimal_conf"],
|
|
202
200
|
}
|
|
201
|
+
if self.custom_conf_threshold is not None:
|
|
202
|
+
data["Custom confidence threshold"] = table["custom_confidence_threshold"]
|
|
203
|
+
return data
|
|
203
204
|
|
|
204
205
|
def AP_per_class(self):
|
|
205
206
|
s = self.coco_precision[:, :, :, 0, 2].copy()
|
|
@@ -262,25 +263,27 @@ class MetricProvider:
|
|
|
262
263
|
|
|
263
264
|
|
|
264
265
|
class _MetricProvider:
|
|
265
|
-
def __init__(self, matches: list,
|
|
266
|
+
def __init__(self, matches: list, eval_data: dict, cocoGt, cocoDt):
|
|
266
267
|
"""
|
|
267
268
|
type cocoGt: COCO
|
|
268
269
|
type cocoDt: COCO
|
|
269
270
|
"""
|
|
270
271
|
|
|
272
|
+
self.matches = matches
|
|
273
|
+
self.eval_data = eval_data
|
|
274
|
+
self.coco_metrics = eval_data["coco_metrics"]
|
|
275
|
+
self.params = eval_data["params"]
|
|
271
276
|
self.cocoGt = cocoGt
|
|
277
|
+
self.cocoDt = cocoDt
|
|
278
|
+
self.coco_mAP = self.coco_metrics["mAP"]
|
|
279
|
+
self.coco_precision = self.coco_metrics["precision"]
|
|
280
|
+
self.iouThrs = self.params["iouThrs"]
|
|
281
|
+
self.recThrs = self.params["recThrs"]
|
|
272
282
|
|
|
273
283
|
# metainfo
|
|
274
284
|
self.cat_ids = cocoGt.getCatIds()
|
|
275
285
|
self.cat_names = [cocoGt.cats[cat_id]["name"] for cat_id in self.cat_ids]
|
|
276
286
|
|
|
277
|
-
# eval_data
|
|
278
|
-
self.matches = matches
|
|
279
|
-
self.coco_mAP = coco_metrics["mAP"]
|
|
280
|
-
self.coco_precision = coco_metrics["precision"]
|
|
281
|
-
self.iouThrs = params["iouThrs"]
|
|
282
|
-
self.recThrs = params["recThrs"]
|
|
283
|
-
|
|
284
287
|
# Matches
|
|
285
288
|
self.tp_matches = [m for m in self.matches if m["type"] == "TP"]
|
|
286
289
|
self.fp_matches = [m for m in self.matches if m["type"] == "FP"]
|
|
@@ -290,13 +293,12 @@ class _MetricProvider:
|
|
|
290
293
|
self.ious = np.array([m["iou"] for m in self.tp_matches])
|
|
291
294
|
|
|
292
295
|
# Evaluation params
|
|
293
|
-
self.params = params
|
|
294
296
|
self.iou_idx_per_class = np.array(
|
|
295
|
-
[params["iou_idx_per_class"][cat_id] for cat_id in self.cat_ids]
|
|
297
|
+
[self.params["iou_idx_per_class"][cat_id] for cat_id in self.cat_ids]
|
|
296
298
|
)[:, None]
|
|
297
|
-
eval_params = params.get("evaluation_params", {})
|
|
299
|
+
eval_params = self.params.get("evaluation_params", {})
|
|
298
300
|
self.average_across_iou_thresholds = eval_params.get("average_across_iou_thresholds", True)
|
|
299
|
-
|
|
301
|
+
|
|
300
302
|
def _init_counts(self):
|
|
301
303
|
cat_ids = self.cat_ids
|
|
302
304
|
iouThrs = self.iouThrs
|
|
@@ -307,9 +309,6 @@ class _MetricProvider:
|
|
|
307
309
|
ious.append(match["iou"])
|
|
308
310
|
cats.append(cat_id_to_idx[match["category_id"]])
|
|
309
311
|
ious = np.array(ious) + np.spacing(1)
|
|
310
|
-
if 0.8999999999999999 in iouThrs:
|
|
311
|
-
iouThrs = iouThrs.copy()
|
|
312
|
-
iouThrs[iouThrs == 0.8999999999999999] = 0.9
|
|
313
312
|
iou_idxs = np.searchsorted(iouThrs, ious) - 1
|
|
314
313
|
cats = np.array(cats)
|
|
315
314
|
# TP
|
|
@@ -345,9 +344,16 @@ class _MetricProvider:
|
|
|
345
344
|
self.FP_count = int(self._take_iou_thresholds(false_positives).sum())
|
|
346
345
|
self.FN_count = int(self._take_iou_thresholds(false_negatives).sum())
|
|
347
346
|
|
|
347
|
+
# self.true_positives = self.eval_data["true_positives"]
|
|
348
|
+
# self.false_negatives = self.eval_data["false_negatives"]
|
|
349
|
+
# self.false_positives = self.eval_data["false_positives"]
|
|
350
|
+
# self.TP_count = int(self._take_iou_thresholds(self.true_positives).sum())
|
|
351
|
+
# self.FP_count = int(self._take_iou_thresholds(self.false_positives).sum())
|
|
352
|
+
# self.FN_count = int(self._take_iou_thresholds(self.false_negatives).sum())
|
|
353
|
+
|
|
348
354
|
def _take_iou_thresholds(self, x):
|
|
349
355
|
return np.take_along_axis(x, self.iou_idx_per_class, axis=1)
|
|
350
|
-
|
|
356
|
+
|
|
351
357
|
def base_metrics(self):
|
|
352
358
|
if self.average_across_iou_thresholds:
|
|
353
359
|
tp = self.true_positives
|
|
@@ -495,9 +501,6 @@ class _MetricProvider:
|
|
|
495
501
|
)
|
|
496
502
|
scores = np.array([m["score"] for m in matches_sorted])
|
|
497
503
|
ious = np.array([m["iou"] if m["type"] == "TP" else 0.0 for m in matches_sorted])
|
|
498
|
-
if 0.8999999999999999 in iouThrs:
|
|
499
|
-
iouThrs = iouThrs.copy()
|
|
500
|
-
iouThrs[iouThrs == 0.8999999999999999] = 0.9
|
|
501
504
|
iou_idxs = np.searchsorted(iouThrs, ious + np.spacing(1))
|
|
502
505
|
|
|
503
506
|
# Check
|
|
@@ -565,6 +568,16 @@ class _MetricProvider:
|
|
|
565
568
|
best_f1 = self.score_profile["f1"][argmax]
|
|
566
569
|
return f1_optimal_conf, best_f1
|
|
567
570
|
|
|
571
|
+
def get_custom_conf_threshold(self):
|
|
572
|
+
if (~np.isnan(self.score_profile["f1"])).sum() == 0:
|
|
573
|
+
return None, None
|
|
574
|
+
conf_threshold = self.params.get("evaluation_params", {}).get("confidence_threshold")
|
|
575
|
+
if conf_threshold is not None and conf_threshold != "auto":
|
|
576
|
+
idx = np.argmin(np.abs(self.score_profile["scores"] - conf_threshold))
|
|
577
|
+
custom_f1 = self.score_profile["f1"][idx]
|
|
578
|
+
return conf_threshold, custom_f1
|
|
579
|
+
return None, None
|
|
580
|
+
|
|
568
581
|
def calibration_curve(self):
|
|
569
582
|
from sklearn.calibration import ( # pylint: disable=import-error
|
|
570
583
|
calibration_curve,
|
|
@@ -43,7 +43,7 @@ markdown_overview = """
|
|
|
43
43
|
- **Ground Truth project**: <a href="/projects/{}/datasets" target="_blank">{}</a>, {}{}
|
|
44
44
|
{}
|
|
45
45
|
- **IoU threshold**: {}
|
|
46
|
-
|
|
46
|
+
{}
|
|
47
47
|
- **Averaging across IoU thresholds:** {}, <a href="{}" target="_blank">learn more</a>.
|
|
48
48
|
|
|
49
49
|
Learn more about Model Benchmark, implementation details, and how to use the charts in our <a href="{}" target="_blank">Technical Report</a>.
|
|
@@ -78,7 +78,7 @@ In this section you can visually assess the model performance through examples.
|
|
|
78
78
|
|
|
79
79
|
> Click on the image to view the **Ground Truth**, **Prediction**, and **Difference** annotations side-by-side.
|
|
80
80
|
|
|
81
|
-
> Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences).
|
|
81
|
+
> Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences). {}
|
|
82
82
|
"""
|
|
83
83
|
|
|
84
84
|
markdown_predictions_gallery = """
|
|
@@ -125,7 +125,7 @@ To measure this, we calculate **Recall**. Recall counts errors, when the model d
|
|
|
125
125
|
"""
|
|
126
126
|
|
|
127
127
|
notification_recall = {
|
|
128
|
-
"title": "
|
|
128
|
+
"title": "{}",
|
|
129
129
|
"description": "The model correctly found <b>{} of {}</b> total instances in the dataset.",
|
|
130
130
|
}
|
|
131
131
|
|
|
@@ -148,7 +148,7 @@ To measure this, we calculate **Precision**. Precision counts errors, when the m
|
|
|
148
148
|
"""
|
|
149
149
|
|
|
150
150
|
notification_precision = {
|
|
151
|
-
"title": "
|
|
151
|
+
"title": "{}",
|
|
152
152
|
"description": "The model correctly predicted <b>{} of {}</b> predictions made by the model in total.",
|
|
153
153
|
}
|
|
154
154
|
|
|
@@ -28,7 +28,8 @@ class ConfidenceDistribution(DetectionVisMetric):
|
|
|
28
28
|
def _get_figure(self): # -> go.Figure:
|
|
29
29
|
import plotly.graph_objects as go # pylint: disable=import-error
|
|
30
30
|
|
|
31
|
-
f1_optimal_conf
|
|
31
|
+
f1_optimal_conf = self.eval_result.mp.f1_optimal_conf
|
|
32
|
+
custom_conf_threshold = self.eval_result.mp.custom_conf_threshold
|
|
32
33
|
|
|
33
34
|
# Histogram of confidence scores (TP vs FP)
|
|
34
35
|
scores_tp, scores_fp = self.eval_result.mp.m_full.scores_tp_and_fp()
|
|
@@ -88,7 +89,7 @@ class ConfidenceDistribution(DetectionVisMetric):
|
|
|
88
89
|
x1=f1_optimal_conf,
|
|
89
90
|
y0=0,
|
|
90
91
|
y1=tp_y.max() * 1.3,
|
|
91
|
-
line=dict(color="orange", width=
|
|
92
|
+
line=dict(color="orange", width=2, dash="dash"),
|
|
92
93
|
)
|
|
93
94
|
fig.add_annotation(
|
|
94
95
|
x=f1_optimal_conf,
|
|
@@ -104,4 +105,21 @@ class ConfidenceDistribution(DetectionVisMetric):
|
|
|
104
105
|
)
|
|
105
106
|
fig.update_xaxes(title_text="Confidence Score", range=[0, 1])
|
|
106
107
|
fig.update_yaxes(title_text="Count", range=[0, tp_y.max() * 1.3])
|
|
108
|
+
|
|
109
|
+
if custom_conf_threshold is not None:
|
|
110
|
+
# Custom threshold
|
|
111
|
+
fig.add_shape(
|
|
112
|
+
type="line",
|
|
113
|
+
x0=custom_conf_threshold,
|
|
114
|
+
x1=custom_conf_threshold,
|
|
115
|
+
y0=0,
|
|
116
|
+
y1=tp_y.max() * 1.3,
|
|
117
|
+
line=dict(color="orange", width=2, dash="dash"),
|
|
118
|
+
)
|
|
119
|
+
fig.add_annotation(
|
|
120
|
+
x=custom_conf_threshold,
|
|
121
|
+
y=tp_y.max() * 1.3,
|
|
122
|
+
text=f"Confidence threshold: {custom_conf_threshold:.2f}",
|
|
123
|
+
showarrow=False,
|
|
124
|
+
)
|
|
107
125
|
return fig
|
|
@@ -101,6 +101,22 @@ class ConfidenceScore(DetectionVisMetric):
|
|
|
101
101
|
text=f"F1-optimal threshold: {self.eval_result.mp.f1_optimal_conf:.2f}",
|
|
102
102
|
showarrow=False,
|
|
103
103
|
)
|
|
104
|
+
if self.eval_result.mp.custom_conf_threshold is not None:
|
|
105
|
+
# Add vertical line for the custom threshold
|
|
106
|
+
fig.add_shape(
|
|
107
|
+
type="line",
|
|
108
|
+
x0=self.eval_result.mp.custom_conf_threshold,
|
|
109
|
+
x1=self.eval_result.mp.custom_conf_threshold,
|
|
110
|
+
y0=0,
|
|
111
|
+
y1=self.eval_result.mp.custom_f1,
|
|
112
|
+
line=dict(color="black", width=2, dash="dash"),
|
|
113
|
+
)
|
|
114
|
+
fig.add_annotation(
|
|
115
|
+
x=self.eval_result.mp.custom_conf_threshold,
|
|
116
|
+
y=self.eval_result.mp.custom_f1 + 0.04,
|
|
117
|
+
text=f"Confidence threshold: {self.eval_result.mp.custom_conf_threshold:.2f}",
|
|
118
|
+
showarrow=False,
|
|
119
|
+
)
|
|
104
120
|
fig.update_layout(
|
|
105
121
|
dragmode=False,
|
|
106
122
|
modebar=dict(
|
|
@@ -14,12 +14,17 @@ class ExplorePredictions(DetectionVisMetric):
|
|
|
14
14
|
|
|
15
15
|
@property
|
|
16
16
|
def md(self) -> MarkdownWidget:
|
|
17
|
-
|
|
17
|
+
conf_threshold_info = "Differences are calculated only for the optimal confidence threshold, allowing you to focus on the most accurate predictions made by the model."
|
|
18
|
+
if self.eval_result.mp.custom_conf_threshold is not None:
|
|
19
|
+
conf_threshold_info = (
|
|
20
|
+
"Differences are calculated for the custom confidence threshold (set manually)."
|
|
21
|
+
)
|
|
22
|
+
text = self.vis_texts.markdown_explorer.format(conf_threshold_info)
|
|
23
|
+
|
|
18
24
|
return MarkdownWidget(self.MARKDOWN, "Explore Predictions", text)
|
|
19
25
|
|
|
20
26
|
def gallery(self, opacity) -> GalleryWidget:
|
|
21
|
-
|
|
22
|
-
default_filters = [{"confidence": [optimal_conf, 1]}]
|
|
27
|
+
default_filters = [{"confidence": [self.eval_result.mp.conf_threshold, 1]}]
|
|
23
28
|
gallery = GalleryWidget(
|
|
24
29
|
self.GALLERY, columns_number=3, filters=default_filters, opacity=opacity
|
|
25
30
|
)
|
|
@@ -62,7 +67,7 @@ class ExplorePredictions(DetectionVisMetric):
|
|
|
62
67
|
{
|
|
63
68
|
"type": "tag",
|
|
64
69
|
"tagId": "confidence",
|
|
65
|
-
"value": [self.eval_result.mp.
|
|
70
|
+
"value": [self.eval_result.mp.conf_threshold, 1],
|
|
66
71
|
}
|
|
67
72
|
]
|
|
68
73
|
explore["title"] = "Explore all predictions"
|
|
@@ -89,7 +94,7 @@ class ExplorePredictions(DetectionVisMetric):
|
|
|
89
94
|
{
|
|
90
95
|
"type": "tag",
|
|
91
96
|
"tagId": "confidence",
|
|
92
|
-
"value": [self.eval_result.mp.
|
|
97
|
+
"value": [self.eval_result.mp.conf_threshold, 1],
|
|
93
98
|
},
|
|
94
99
|
]
|
|
95
100
|
for pairs_data in self.eval_result.matched_pair_data.values():
|
|
@@ -99,7 +99,7 @@ class ModelPredictions(DetectionVisMetric):
|
|
|
99
99
|
{
|
|
100
100
|
"type": "tag",
|
|
101
101
|
"tagId": "confidence",
|
|
102
|
-
"value": [self.eval_result.mp.
|
|
102
|
+
"value": [self.eval_result.mp.conf_threshold, 1],
|
|
103
103
|
},
|
|
104
104
|
# {"type": "tag", "tagId": "outcome", "value": "FP"},
|
|
105
105
|
]
|