supervisely 6.73.238__py3-none-any.whl → 6.73.240__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervisely/annotation/annotation.py +2 -2
- supervisely/api/entity_annotation/tag_api.py +11 -4
- supervisely/api/file_api.py +17 -3
- supervisely/nn/__init__.py +1 -0
- supervisely/nn/benchmark/__init__.py +14 -2
- supervisely/nn/benchmark/base_benchmark.py +84 -37
- supervisely/nn/benchmark/base_evaluator.py +120 -0
- supervisely/nn/benchmark/base_visualizer.py +265 -0
- supervisely/nn/benchmark/comparison/detection_visualization/text_templates.py +5 -5
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +2 -2
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/explore_predicttions.py +39 -16
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/localization_accuracy.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +4 -4
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +12 -11
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +6 -6
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/speedtest.py +3 -3
- supervisely/nn/benchmark/{instance_segmentation_benchmark.py → instance_segmentation/benchmark.py} +9 -3
- supervisely/nn/benchmark/instance_segmentation/evaluator.py +58 -0
- supervisely/nn/benchmark/{visualization/text_templates/instance_segmentation_text.py → instance_segmentation/text_templates.py} +53 -69
- supervisely/nn/benchmark/instance_segmentation/visualizer.py +18 -0
- supervisely/nn/benchmark/object_detection/__init__.py +0 -0
- supervisely/nn/benchmark/object_detection/base_vis_metric.py +51 -0
- supervisely/nn/benchmark/{object_detection_benchmark.py → object_detection/benchmark.py} +4 -2
- supervisely/nn/benchmark/object_detection/evaluation_params.yaml +2 -0
- supervisely/nn/benchmark/{evaluation/object_detection_evaluator.py → object_detection/evaluator.py} +67 -9
- supervisely/nn/benchmark/{evaluation/coco → object_detection}/metric_provider.py +13 -14
- supervisely/nn/benchmark/{visualization/text_templates/object_detection_text.py → object_detection/text_templates.py} +49 -41
- supervisely/nn/benchmark/object_detection/vis_metrics/__init__.py +48 -0
- supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/confidence_distribution.py +20 -24
- supervisely/nn/benchmark/object_detection/vis_metrics/confidence_score.py +119 -0
- supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/confusion_matrix.py +34 -22
- supervisely/nn/benchmark/object_detection/vis_metrics/explore_predictions.py +129 -0
- supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/f1_score_at_different_iou.py +21 -26
- supervisely/nn/benchmark/object_detection/vis_metrics/frequently_confused.py +137 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/iou_distribution.py +106 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/key_metrics.py +136 -0
- supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/model_predictions.py +53 -49
- supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts.py +188 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts_per_class.py +191 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +116 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve.py +106 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve_by_class.py +49 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +72 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/precision_avg_per_class.py +59 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +71 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/recall_vs_precision.py +56 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/reliability_diagram.py +110 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/speedtest.py +151 -0
- supervisely/nn/benchmark/object_detection/visualizer.py +697 -0
- supervisely/nn/benchmark/semantic_segmentation/__init__.py +9 -0
- supervisely/nn/benchmark/semantic_segmentation/base_vis_metric.py +55 -0
- supervisely/nn/benchmark/semantic_segmentation/benchmark.py +32 -0
- supervisely/nn/benchmark/semantic_segmentation/evaluation_params.yaml +0 -0
- supervisely/nn/benchmark/semantic_segmentation/evaluator.py +162 -0
- supervisely/nn/benchmark/semantic_segmentation/metric_provider.py +153 -0
- supervisely/nn/benchmark/semantic_segmentation/text_templates.py +130 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/__init__.py +0 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/acknowledgement.py +15 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/classwise_error_analysis.py +57 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/confusion_matrix.py +92 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/explore_predictions.py +84 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/frequently_confused.py +101 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/iou_eou.py +45 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/key_metrics.py +60 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/model_predictions.py +107 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/overview.py +112 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/renormalized_error_ou.py +48 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/speedtest.py +178 -0
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/vis_texts.py +21 -0
- supervisely/nn/benchmark/semantic_segmentation/visualizer.py +304 -0
- supervisely/nn/benchmark/utils/__init__.py +12 -0
- supervisely/nn/benchmark/utils/detection/__init__.py +2 -0
- supervisely/nn/benchmark/{evaluation/coco → utils/detection}/calculate_metrics.py +6 -4
- supervisely/nn/benchmark/utils/detection/metric_provider.py +533 -0
- supervisely/nn/benchmark/{coco_utils → utils/detection}/sly2coco.py +4 -4
- supervisely/nn/benchmark/{coco_utils/utils.py → utils/detection/utlis.py} +11 -0
- supervisely/nn/benchmark/utils/semantic_segmentation/__init__.py +0 -0
- supervisely/nn/benchmark/utils/semantic_segmentation/calculate_metrics.py +35 -0
- supervisely/nn/benchmark/utils/semantic_segmentation/evaluator.py +804 -0
- supervisely/nn/benchmark/utils/semantic_segmentation/loader.py +65 -0
- supervisely/nn/benchmark/utils/semantic_segmentation/utils.py +109 -0
- supervisely/nn/benchmark/visualization/evaluation_result.py +17 -3
- supervisely/nn/benchmark/visualization/vis_click_data.py +1 -1
- supervisely/nn/benchmark/visualization/widgets/__init__.py +3 -0
- supervisely/nn/benchmark/visualization/widgets/chart/chart.py +12 -4
- supervisely/nn/benchmark/visualization/widgets/gallery/gallery.py +35 -8
- supervisely/nn/benchmark/visualization/widgets/gallery/template.html +8 -4
- supervisely/nn/benchmark/visualization/widgets/markdown/markdown.py +1 -1
- supervisely/nn/benchmark/visualization/widgets/notification/notification.py +11 -7
- supervisely/nn/benchmark/visualization/widgets/radio_group/__init__.py +0 -0
- supervisely/nn/benchmark/visualization/widgets/radio_group/radio_group.py +34 -0
- supervisely/nn/benchmark/visualization/widgets/table/table.py +9 -3
- supervisely/nn/benchmark/visualization/widgets/widget.py +4 -0
- supervisely/project/project.py +18 -6
- {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/METADATA +3 -1
- {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/RECORD +104 -82
- supervisely/nn/benchmark/coco_utils/__init__.py +0 -2
- supervisely/nn/benchmark/evaluation/__init__.py +0 -3
- supervisely/nn/benchmark/evaluation/base_evaluator.py +0 -64
- supervisely/nn/benchmark/evaluation/coco/__init__.py +0 -2
- supervisely/nn/benchmark/evaluation/instance_segmentation_evaluator.py +0 -88
- supervisely/nn/benchmark/utils.py +0 -13
- supervisely/nn/benchmark/visualization/inference_speed/__init__.py +0 -19
- supervisely/nn/benchmark/visualization/inference_speed/speedtest_batch.py +0 -161
- supervisely/nn/benchmark/visualization/inference_speed/speedtest_intro.py +0 -28
- supervisely/nn/benchmark/visualization/inference_speed/speedtest_overview.py +0 -141
- supervisely/nn/benchmark/visualization/inference_speed/speedtest_real_time.py +0 -63
- supervisely/nn/benchmark/visualization/text_templates/inference_speed_text.py +0 -23
- supervisely/nn/benchmark/visualization/vis_metric_base.py +0 -337
- supervisely/nn/benchmark/visualization/vis_metrics/__init__.py +0 -67
- supervisely/nn/benchmark/visualization/vis_metrics/classwise_error_analysis.py +0 -55
- supervisely/nn/benchmark/visualization/vis_metrics/confidence_score.py +0 -93
- supervisely/nn/benchmark/visualization/vis_metrics/explorer_grid.py +0 -144
- supervisely/nn/benchmark/visualization/vis_metrics/frequently_confused.py +0 -115
- supervisely/nn/benchmark/visualization/vis_metrics/iou_distribution.py +0 -86
- supervisely/nn/benchmark/visualization/vis_metrics/outcome_counts.py +0 -119
- supervisely/nn/benchmark/visualization/vis_metrics/outcome_counts_per_class.py +0 -148
- supervisely/nn/benchmark/visualization/vis_metrics/overall_error_analysis.py +0 -109
- supervisely/nn/benchmark/visualization/vis_metrics/overview.py +0 -189
- supervisely/nn/benchmark/visualization/vis_metrics/percision_avg_per_class.py +0 -57
- supervisely/nn/benchmark/visualization/vis_metrics/pr_curve.py +0 -101
- supervisely/nn/benchmark/visualization/vis_metrics/pr_curve_by_class.py +0 -46
- supervisely/nn/benchmark/visualization/vis_metrics/precision.py +0 -56
- supervisely/nn/benchmark/visualization/vis_metrics/recall.py +0 -54
- supervisely/nn/benchmark/visualization/vis_metrics/recall_vs_precision.py +0 -57
- supervisely/nn/benchmark/visualization/vis_metrics/reliability_diagram.py +0 -88
- supervisely/nn/benchmark/visualization/vis_metrics/what_is.py +0 -23
- supervisely/nn/benchmark/visualization/vis_templates.py +0 -241
- supervisely/nn/benchmark/visualization/vis_widgets.py +0 -128
- supervisely/nn/benchmark/visualization/visualizer.py +0 -729
- /supervisely/nn/benchmark/{visualization/text_templates → instance_segmentation}/__init__.py +0 -0
- /supervisely/nn/benchmark/{evaluation/coco → instance_segmentation}/evaluation_params.yaml +0 -0
- /supervisely/nn/benchmark/{evaluation/coco → utils/detection}/metrics.py +0 -0
- {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/LICENSE +0 -0
- {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/WHEEL +0 -0
- {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,10 @@ definitions = SimpleNamespace(
|
|
|
13
13
|
iou_threshold="The IoU threshold is a predefined value (set to 0.5 in many benchmarks) that determines the minimum acceptable IoU score for a predicted bounding box to be considered a correct prediction. When the IoU of a predicted box and actual box is higher than this IoU threshold, the prediction is considered correct. Some metrics will evaluate the model with different IoU thresholds to provide more insights about the model's performance.",
|
|
14
14
|
)
|
|
15
15
|
|
|
16
|
+
docs_url = (
|
|
17
|
+
"https://docs.supervisely.com/neural-networks/model-evaluation-benchmark/object-detection"
|
|
18
|
+
)
|
|
19
|
+
|
|
16
20
|
# <i class="zmdi zmdi-check-circle" style="color: #13ce66; margin-right: 5px"></i>
|
|
17
21
|
clickable_label = """
|
|
18
22
|
> <span style="color: #5a6772">
|
|
@@ -35,9 +39,11 @@ markdown_overview = """
|
|
|
35
39
|
- **Architecture**: {}
|
|
36
40
|
- **Task type**: {}
|
|
37
41
|
- **Runtime**: {}
|
|
38
|
-
- **Checkpoint file**: <a href="{}" target="_blank">{}</a>
|
|
42
|
+
- **Checkpoint file**: <a class="checkpoint-url" href="{}" target="_blank">{}</a>
|
|
39
43
|
- **Ground Truth project**: <a href="/projects/{}/datasets" target="_blank">{}</a>, {}{}
|
|
40
44
|
{}
|
|
45
|
+
- **IoU threshold**: {}
|
|
46
|
+
- **Optimal confidence threshold**: {} (calculated automatically), <a href="{}" target="_blank">learn more</a>.
|
|
41
47
|
|
|
42
48
|
Learn more about Model Benchmark, implementation details, and how to use the charts in our <a href="{}" target="_blank">Technical Report</a>.
|
|
43
49
|
"""
|
|
@@ -97,18 +103,15 @@ markdown_experts = """
|
|
|
97
103
|
markdown_how_to_use = """
|
|
98
104
|
"""
|
|
99
105
|
|
|
100
|
-
markdown_outcome_counts =
|
|
101
|
-
"""## Outcome Counts
|
|
106
|
+
markdown_outcome_counts = """## Outcome Counts
|
|
102
107
|
|
|
103
108
|
This chart is used to evaluate the overall model performance by breaking down all predictions into <abbr title="{}">True Positives</abbr> (TP), <abbr title="{}">False Positives</abbr> (FP), and <abbr title="{}">False Negatives</abbr> (FN). This helps to visually assess the type of errors the model often encounters.
|
|
104
109
|
|
|
105
110
|
"""
|
|
106
|
-
+ clickable_label
|
|
107
|
-
)
|
|
108
111
|
|
|
109
112
|
markdown_R = """## Recall
|
|
110
113
|
|
|
111
|
-
This section measures the ability of the model to detect **all relevant instances in the dataset**. In other words, it answers the question:
|
|
114
|
+
This section measures the ability of the model to detect **all relevant instances in the dataset**. In other words, it answers the question: "Of all instances in the dataset, how many of them is the model managed to find out?"
|
|
112
115
|
|
|
113
116
|
To measure this, we calculate **Recall**. Recall counts errors, when the model does not detect an object that actually is present in a dataset and should be detected. Recall is calculated as the portion of correct predictions (true positives) over all instances in the dataset (true positives + false negatives).
|
|
114
117
|
"""
|
|
@@ -118,8 +121,7 @@ notification_recall = {
|
|
|
118
121
|
"description": "The model correctly found <b>{} of {}</b> total instances in the dataset.",
|
|
119
122
|
}
|
|
120
123
|
|
|
121
|
-
markdown_R_perclass =
|
|
122
|
-
"""### Per-class Recall
|
|
124
|
+
markdown_R_perclass = """### Per-class Recall
|
|
123
125
|
|
|
124
126
|
This chart further analyzes Recall, breaking it down to each class in separate.
|
|
125
127
|
|
|
@@ -128,15 +130,13 @@ Since the overall recall is calculated as an average across all classes, we prov
|
|
|
128
130
|
_Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
|
|
129
131
|
|
|
130
132
|
"""
|
|
131
|
-
+ clickable_label
|
|
132
|
-
)
|
|
133
133
|
|
|
134
134
|
|
|
135
135
|
markdown_P = """## Precision
|
|
136
136
|
|
|
137
|
-
This section measures the accuracy of all predictions made by the model. In other words, it answers the question:
|
|
137
|
+
This section measures the accuracy of all predictions made by the model. In other words, it answers the question: "Of all predictions made by the model, how many of them are actually correct?".
|
|
138
138
|
|
|
139
|
-
To measure this, we calculate **Precision**. Precision counts errors, when the model predicts an object (bounding box), but the image has no objects of the predicted class in this place. Precision is calculated as a portion of correct predictions (true positives) over all model
|
|
139
|
+
To measure this, we calculate **Precision**. Precision counts errors, when the model predicts an object (bounding box), but the image has no objects of the predicted class in this place. Precision is calculated as a portion of correct predictions (true positives) over all model's predictions (true positives + false positives).
|
|
140
140
|
"""
|
|
141
141
|
|
|
142
142
|
notification_precision = {
|
|
@@ -144,8 +144,7 @@ notification_precision = {
|
|
|
144
144
|
"description": "The model correctly predicted <b>{} of {}</b> predictions made by the model in total.",
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
-
markdown_P_perclass =
|
|
148
|
-
"""### Per-class Precision
|
|
147
|
+
markdown_P_perclass = """### Per-class Precision
|
|
149
148
|
|
|
150
149
|
This chart further analyzes Precision, breaking it down to each class in separate.
|
|
151
150
|
|
|
@@ -154,20 +153,15 @@ Since the overall precision is computed as an average across all classes, we pro
|
|
|
154
153
|
_Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
|
|
155
154
|
|
|
156
155
|
"""
|
|
157
|
-
+ clickable_label
|
|
158
|
-
)
|
|
159
156
|
|
|
160
157
|
|
|
161
|
-
markdown_PR =
|
|
162
|
-
"""## Recall vs. Precision
|
|
158
|
+
markdown_PR = """## Recall vs. Precision
|
|
163
159
|
|
|
164
160
|
This section compares Precision and Recall in one graph, identifying **imbalance** between these two.
|
|
165
161
|
|
|
166
162
|
_Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
|
|
167
163
|
|
|
168
164
|
"""
|
|
169
|
-
+ clickable_label
|
|
170
|
-
)
|
|
171
165
|
|
|
172
166
|
|
|
173
167
|
markdown_pr_curve = """## Precision-Recall Curve
|
|
@@ -199,39 +193,34 @@ notification_ap = {
|
|
|
199
193
|
"description": "",
|
|
200
194
|
}
|
|
201
195
|
|
|
202
|
-
markdown_pr_by_class =
|
|
203
|
-
"""### Precision-Recall Curve by Class
|
|
196
|
+
markdown_pr_by_class = """### Precision-Recall Curve by Class
|
|
204
197
|
|
|
205
198
|
In this plot, you can evaluate PR curve for each class individually.
|
|
206
199
|
|
|
207
200
|
"""
|
|
208
|
-
+ clickable_label
|
|
209
|
-
)
|
|
210
201
|
|
|
211
|
-
markdown_confusion_matrix =
|
|
212
|
-
"""## Confusion Matrix
|
|
202
|
+
markdown_confusion_matrix = """## Confusion Matrix
|
|
213
203
|
|
|
214
204
|
Confusion matrix helps to find the number of confusions between different classes made by the model.
|
|
215
205
|
Each row of the matrix represents the instances in a ground truth class, while each column represents the instances in a predicted class.
|
|
216
206
|
The diagonal elements represent the number of correct predictions for each class (True Positives), and the off-diagonal elements show misclassifications.
|
|
217
207
|
|
|
218
208
|
"""
|
|
219
|
-
+ clickable_label
|
|
220
|
-
)
|
|
221
209
|
|
|
210
|
+
markdown_frequently_confused_empty = """### Frequently Confused Classes
|
|
211
|
+
|
|
212
|
+
No frequently confused class pairs found
|
|
213
|
+
"""
|
|
222
214
|
|
|
223
|
-
markdown_frequently_confused =
|
|
224
|
-
"""### Frequently Confused Classes
|
|
215
|
+
markdown_frequently_confused = """### Frequently Confused Classes
|
|
225
216
|
|
|
226
217
|
This chart displays the most frequently confused pairs of classes. In general, it finds out which classes visually seem very similar to the model.
|
|
227
218
|
|
|
228
|
-
The chart calculates the **probability of confusion** between different pairs of classes. For instance, if the probability of confusion for the pair
|
|
219
|
+
The chart calculates the **probability of confusion** between different pairs of classes. For instance, if the probability of confusion for the pair "{} - {}" is {}, this means that when the model predicts either "{}" or "{}", there is a {}% chance that the model might mistakenly predict one instead of the other.
|
|
229
220
|
|
|
230
221
|
The measure is class-symmetric, meaning that the probability of confusing a {} with a {} is equal to the probability of confusing a {} with a {}.
|
|
231
222
|
|
|
232
223
|
"""
|
|
233
|
-
+ clickable_label
|
|
234
|
-
)
|
|
235
224
|
|
|
236
225
|
|
|
237
226
|
markdown_localization_accuracy = """## Localization Accuracy (IoU)
|
|
@@ -317,14 +306,11 @@ Additionally, it provides a view of how predicted probabilities are distributed.
|
|
|
317
306
|
Ideally, the green histogram (TP predictions) should have higher confidence scores and be shifted to the right, indicating that the model is sure about its correct predictions, and the red histogram (FP predictions) should have lower confidence scores and be shifted to the left.
|
|
318
307
|
"""
|
|
319
308
|
|
|
320
|
-
markdown_class_ap =
|
|
321
|
-
"""## Average Precision by Class
|
|
309
|
+
markdown_class_ap = """## Average Precision by Class
|
|
322
310
|
|
|
323
311
|
A quick visual comparison of the model performance across all classes. Each axis in the chart represents a different class, and the distance to the center indicates the <abbr title="{}">Average Precision</abbr> (AP) for that class.
|
|
324
312
|
|
|
325
313
|
"""
|
|
326
|
-
+ clickable_label
|
|
327
|
-
)
|
|
328
314
|
|
|
329
315
|
|
|
330
316
|
markdown_class_outcome_counts_1 = """### Outcome Counts by Class
|
|
@@ -338,16 +324,38 @@ markdown_normalization = """Normalization is used for better interclass comparis
|
|
|
338
324
|
If normalization is off, the chart will display the total count of instances that correspond to outcome type (one of TP, FP or FN). This mode is identical to the main Outcome Counts graph on the top of the page. However, when normalization is off, you may encounter a class imbalance problem. Visually, bars that correspond to classes with many instances in the dataset will be much larger than others. This complicates the visual analysis.
|
|
339
325
|
"""
|
|
340
326
|
|
|
341
|
-
markdown_class_outcome_counts_2 =
|
|
342
|
-
"""You can switch the plot view between normalized and absolute values.
|
|
327
|
+
markdown_class_outcome_counts_2 = """You can switch the plot view between normalized and absolute values.
|
|
343
328
|
|
|
344
329
|
_Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
|
|
345
330
|
|
|
346
331
|
"""
|
|
347
|
-
+ clickable_label
|
|
348
|
-
)
|
|
349
332
|
|
|
350
333
|
empty = """### {}
|
|
351
334
|
|
|
352
335
|
> {}
|
|
353
336
|
"""
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
markdown_speedtest_intro = """## Inference Speed
|
|
340
|
+
|
|
341
|
+
This is a speed test benchmark for this model. The model was tested with the following configuration:
|
|
342
|
+
|
|
343
|
+
- **Device**: {}
|
|
344
|
+
- **Hardware**: {}
|
|
345
|
+
- **Runtime**: {}
|
|
346
|
+
"""
|
|
347
|
+
|
|
348
|
+
markdown_speedtest_table = """
|
|
349
|
+
The table below shows the speed test results. For each test, the time taken to process one batch of images is shown, as well as the model's throughput (i.e, the number of images processed per second, or FPS). Results are averaged across **{}** iterations.
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
markdown_real_time_inference = """## Real-time Inference
|
|
353
|
+
|
|
354
|
+
This chart compares different runtimes and devices (CPU or GPU)."""
|
|
355
|
+
|
|
356
|
+
# We additionally divide **predict** procedure into three stages: pre-process, inference, and post-process. Each bar in this chart consists of these three stages. For example, in the chart you can find how long the post-process phase lasts in a CPU device with an ONNXRuntime environment."""
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
markdown_speedtest_chart = """
|
|
360
|
+
This chart shows how the model's speed changes with different batch sizes . As the batch size increases, you can observe an increase in FPS (images per second).
|
|
361
|
+
"""
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.confidence_distribution import (
|
|
2
|
+
ConfidenceDistribution,
|
|
3
|
+
)
|
|
4
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.confidence_score import (
|
|
5
|
+
ConfidenceScore,
|
|
6
|
+
)
|
|
7
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.confusion_matrix import (
|
|
8
|
+
ConfusionMatrix,
|
|
9
|
+
)
|
|
10
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.explore_predictions import (
|
|
11
|
+
ExplorePredictions,
|
|
12
|
+
)
|
|
13
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.f1_score_at_different_iou import (
|
|
14
|
+
F1ScoreAtDifferentIOU,
|
|
15
|
+
)
|
|
16
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.frequently_confused import (
|
|
17
|
+
FrequentlyConfused,
|
|
18
|
+
)
|
|
19
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.iou_distribution import (
|
|
20
|
+
IOUDistribution,
|
|
21
|
+
)
|
|
22
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.key_metrics import KeyMetrics
|
|
23
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.model_predictions import (
|
|
24
|
+
ModelPredictions,
|
|
25
|
+
)
|
|
26
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.outcome_counts import (
|
|
27
|
+
OutcomeCounts,
|
|
28
|
+
)
|
|
29
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.outcome_counts_per_class import (
|
|
30
|
+
PerClassOutcomeCounts,
|
|
31
|
+
)
|
|
32
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.overview import Overview
|
|
33
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.pr_curve import PRCurve
|
|
34
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.pr_curve_by_class import (
|
|
35
|
+
PRCurveByClass,
|
|
36
|
+
)
|
|
37
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.precision import Precision
|
|
38
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.precision_avg_per_class import (
|
|
39
|
+
PerClassAvgPrecision,
|
|
40
|
+
)
|
|
41
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.recall import Recall
|
|
42
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.recall_vs_precision import (
|
|
43
|
+
RecallVsPrecision,
|
|
44
|
+
)
|
|
45
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.reliability_diagram import (
|
|
46
|
+
ReliabilityDiagram,
|
|
47
|
+
)
|
|
48
|
+
from supervisely.nn.benchmark.object_detection.vis_metrics.speedtest import Speedtest
|
supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/confidence_distribution.py
RENAMED
|
@@ -1,40 +1,37 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
3
|
import numpy as np
|
|
6
4
|
|
|
7
|
-
from supervisely.nn.benchmark.
|
|
8
|
-
from supervisely.nn.benchmark.visualization.
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
from supervisely.nn.benchmark.visualization.visualizer import Visualizer
|
|
5
|
+
from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
|
|
6
|
+
from supervisely.nn.benchmark.visualization.widgets import ChartWidget, MarkdownWidget
|
|
12
7
|
|
|
13
8
|
|
|
14
|
-
class ConfidenceDistribution(
|
|
9
|
+
class ConfidenceDistribution(DetectionVisMetric):
|
|
10
|
+
MARKDOWN = "confidence_distribution"
|
|
11
|
+
CHART = "confidence_distribution"
|
|
15
12
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
self.
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
self._loader.vis_texts.definitions.true_positives,
|
|
25
|
-
self._loader.vis_texts.definitions.false_positives,
|
|
26
|
-
],
|
|
13
|
+
@property
|
|
14
|
+
def md(self) -> MarkdownWidget:
|
|
15
|
+
return MarkdownWidget(
|
|
16
|
+
self.MARKDOWN,
|
|
17
|
+
"Confidence Distribution",
|
|
18
|
+
self.vis_texts.markdown_confidence_distribution.format(
|
|
19
|
+
self.vis_texts.definitions.true_positives,
|
|
20
|
+
self.vis_texts.definitions.false_positives,
|
|
27
21
|
),
|
|
28
|
-
chart=Widget.Chart(),
|
|
29
22
|
)
|
|
30
23
|
|
|
31
|
-
|
|
24
|
+
@property
|
|
25
|
+
def chart(self) -> ChartWidget:
|
|
26
|
+
return ChartWidget(self.CHART, self._get_figure())
|
|
27
|
+
|
|
28
|
+
def _get_figure(self): # -> go.Figure:
|
|
32
29
|
import plotly.graph_objects as go # pylint: disable=import-error
|
|
33
30
|
|
|
34
|
-
f1_optimal_conf, best_f1 = self.
|
|
31
|
+
f1_optimal_conf, best_f1 = self.eval_result.mp.m_full.get_f1_optimal_conf()
|
|
35
32
|
|
|
36
33
|
# Histogram of confidence scores (TP vs FP)
|
|
37
|
-
scores_tp, scores_fp = self.
|
|
34
|
+
scores_tp, scores_fp = self.eval_result.mp.m_full.scores_tp_and_fp()
|
|
38
35
|
|
|
39
36
|
tp_y, tp_x = np.histogram(scores_tp, bins=40, range=[0, 1])
|
|
40
37
|
fp_y, fp_x = np.histogram(scores_fp, bins=40, range=[0, 1])
|
|
@@ -102,7 +99,6 @@ class ConfidenceDistribution(MetricVis):
|
|
|
102
99
|
|
|
103
100
|
fig.update_layout(
|
|
104
101
|
barmode="overlay",
|
|
105
|
-
# title="Histogram of Confidence Scores (TP vs FP)",
|
|
106
102
|
width=800,
|
|
107
103
|
height=500,
|
|
108
104
|
)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
|
|
4
|
+
from supervisely.nn.benchmark.visualization.widgets import (
|
|
5
|
+
ChartWidget,
|
|
6
|
+
CollapseWidget,
|
|
7
|
+
MarkdownWidget,
|
|
8
|
+
NotificationWidget,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConfidenceScore(DetectionVisMetric):
|
|
13
|
+
MARKDOWN_CONFIDENCE_SCORE = "confidence_score"
|
|
14
|
+
NOTIFICATION = "confidence_score"
|
|
15
|
+
MARKDOWN_CONFIDENCE_SCORE_2 = "confidence_score_2"
|
|
16
|
+
MARKDOWN_CONFIDENCE_SCORE_3 = "calibration_score_3"
|
|
17
|
+
COLLAPSE_TITLE = "confidence_score_collapse"
|
|
18
|
+
CHART = "confidence_score"
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def md_confidence_score(self) -> MarkdownWidget:
|
|
22
|
+
text = self.vis_texts.markdown_confidence_score_1
|
|
23
|
+
text = text.format(self.vis_texts.definitions.confidence_threshold)
|
|
24
|
+
return MarkdownWidget(self.MARKDOWN_CONFIDENCE_SCORE, "Confidence Score Profile", text)
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def notification(self) -> NotificationWidget:
|
|
28
|
+
title, _ = self.vis_texts.notification_f1.values()
|
|
29
|
+
return NotificationWidget(
|
|
30
|
+
self.NOTIFICATION,
|
|
31
|
+
title.format(self.eval_result.mp.f1_optimal_conf.round(4)),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def chart(self) -> ChartWidget:
|
|
36
|
+
return ChartWidget(name=self.CHART, figure=self._get_figure())
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def md_confidence_score_2(self) -> MarkdownWidget:
|
|
40
|
+
return MarkdownWidget(
|
|
41
|
+
self.MARKDOWN_CONFIDENCE_SCORE_2,
|
|
42
|
+
"",
|
|
43
|
+
self.vis_texts.markdown_confidence_score_2,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def collapse_conf_score(self) -> CollapseWidget:
|
|
48
|
+
md = MarkdownWidget(
|
|
49
|
+
self.COLLAPSE_TITLE,
|
|
50
|
+
"How to plot Confidence Profile?",
|
|
51
|
+
self.vis_texts.markdown_plot_confidence_profile,
|
|
52
|
+
)
|
|
53
|
+
return CollapseWidget([md])
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def md_confidence_score_3(self) -> MarkdownWidget:
|
|
57
|
+
return MarkdownWidget(
|
|
58
|
+
self.MARKDOWN_CONFIDENCE_SCORE_3,
|
|
59
|
+
"",
|
|
60
|
+
self.vis_texts.markdown_calibration_score_3,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def _get_figure(self): # -> go.Figure:
|
|
64
|
+
import plotly.express as px # pylint: disable=import-error
|
|
65
|
+
|
|
66
|
+
color_map = {
|
|
67
|
+
"Precision": "#1f77b4",
|
|
68
|
+
"Recall": "orange",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
fig = px.line(
|
|
72
|
+
self.eval_result.dfsp_down,
|
|
73
|
+
x="scores",
|
|
74
|
+
y=["precision", "recall", "f1"],
|
|
75
|
+
labels={"value": "Value", "variable": "Metric", "scores": "Confidence Score"},
|
|
76
|
+
width=None,
|
|
77
|
+
height=500,
|
|
78
|
+
color_discrete_map=color_map,
|
|
79
|
+
)
|
|
80
|
+
fig.update_traces(
|
|
81
|
+
hovertemplate="Confidence Score: %{x:.2f}<br>Value: %{y:.2f}<extra></extra>"
|
|
82
|
+
)
|
|
83
|
+
fig.update_layout(yaxis=dict(range=[0, 1]), xaxis=dict(range=[0, 1], tick0=0, dtick=0.1))
|
|
84
|
+
|
|
85
|
+
if (
|
|
86
|
+
self.eval_result.mp.f1_optimal_conf is not None
|
|
87
|
+
and self.eval_result.mp.best_f1 is not None
|
|
88
|
+
):
|
|
89
|
+
# Add vertical line for the best threshold
|
|
90
|
+
fig.add_shape(
|
|
91
|
+
type="line",
|
|
92
|
+
x0=self.eval_result.mp.f1_optimal_conf,
|
|
93
|
+
x1=self.eval_result.mp.f1_optimal_conf,
|
|
94
|
+
y0=0,
|
|
95
|
+
y1=self.eval_result.mp.best_f1,
|
|
96
|
+
line=dict(color="gray", width=2, dash="dash"),
|
|
97
|
+
)
|
|
98
|
+
fig.add_annotation(
|
|
99
|
+
x=self.eval_result.mp.f1_optimal_conf,
|
|
100
|
+
y=self.eval_result.mp.best_f1 + 0.04,
|
|
101
|
+
text=f"F1-optimal threshold: {self.eval_result.mp.f1_optimal_conf:.2f}",
|
|
102
|
+
showarrow=False,
|
|
103
|
+
)
|
|
104
|
+
fig.update_layout(
|
|
105
|
+
dragmode=False,
|
|
106
|
+
modebar=dict(
|
|
107
|
+
remove=[
|
|
108
|
+
"zoom2d",
|
|
109
|
+
"pan2d",
|
|
110
|
+
"select2d",
|
|
111
|
+
"lasso2d",
|
|
112
|
+
"zoomIn2d",
|
|
113
|
+
"zoomOut2d",
|
|
114
|
+
"autoScale2d",
|
|
115
|
+
"resetScale2d",
|
|
116
|
+
]
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
return fig
|
|
@@ -1,36 +1,44 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Dict
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from supervisely.nn.benchmark.
|
|
9
|
-
from supervisely.nn.benchmark.visualization.
|
|
8
|
+
from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
|
|
9
|
+
from supervisely.nn.benchmark.visualization.widgets import ChartWidget, MarkdownWidget
|
|
10
10
|
|
|
11
|
-
if TYPE_CHECKING:
|
|
12
|
-
from supervisely.nn.benchmark.visualization.visualizer import Visualizer
|
|
13
11
|
|
|
12
|
+
class ConfusionMatrix(DetectionVisMetric):
|
|
13
|
+
MARKDOWN = "confusion_matrix"
|
|
14
|
+
CHART = "confusion_matrix"
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def __init__(self, loader: Visualizer) -> None:
|
|
18
|
-
super().__init__(loader)
|
|
19
|
-
|
|
16
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
17
|
+
super().__init__(*args, **kwargs)
|
|
20
18
|
self.clickable = True
|
|
21
|
-
self.
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
19
|
+
self._keypair_sep: str = "-"
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def md(self) -> MarkdownWidget:
|
|
23
|
+
text = self.vis_texts.markdown_confusion_matrix
|
|
24
|
+
return MarkdownWidget(self.MARKDOWN, "Confusion Matrix", text)
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def chart(self) -> ChartWidget:
|
|
28
|
+
chart = ChartWidget(self.CHART, self._get_figure())
|
|
29
|
+
chart.set_click_data(
|
|
30
|
+
self.explore_modal_table.id,
|
|
31
|
+
self.get_click_data(),
|
|
32
|
+
chart_click_extra="'getKey': (payload) => `${payload.points[0].x}${'-'}${payload.points[0].y}`, 'keySeparator': '-',",
|
|
25
33
|
)
|
|
34
|
+
return chart
|
|
26
35
|
|
|
27
|
-
def
|
|
36
|
+
def _get_figure(self): # -> go.Figure:
|
|
28
37
|
import plotly.express as px # pylint: disable=import-error
|
|
29
38
|
|
|
30
|
-
confusion_matrix = self.
|
|
31
|
-
# Confusion Matrix
|
|
39
|
+
confusion_matrix = self.eval_result.mp.confusion_matrix()
|
|
32
40
|
# TODO: Green-red
|
|
33
|
-
cat_names = self.
|
|
41
|
+
cat_names = self.eval_result.mp.cat_names
|
|
34
42
|
none_name = "(None)"
|
|
35
43
|
|
|
36
44
|
with np.errstate(divide="ignore"):
|
|
@@ -53,6 +61,7 @@ class ConfusionMatrix(MetricVis):
|
|
|
53
61
|
fig.update_traces(
|
|
54
62
|
customdata=confusion_matrix,
|
|
55
63
|
hovertemplate="Objects Count: %{customdata}<br>Predicted: %{y}<br>Ground Truth: %{x}",
|
|
64
|
+
colorscale="Viridis",
|
|
56
65
|
)
|
|
57
66
|
|
|
58
67
|
# Text on cells
|
|
@@ -62,18 +71,21 @@ class ConfusionMatrix(MetricVis):
|
|
|
62
71
|
# fig.show()
|
|
63
72
|
return fig
|
|
64
73
|
|
|
65
|
-
def get_click_data(self
|
|
66
|
-
res = dict(projectMeta=self.
|
|
74
|
+
def get_click_data(self) -> Dict:
|
|
75
|
+
res = dict(projectMeta=self.eval_result.pred_project_meta.to_json())
|
|
67
76
|
res["layoutTemplate"] = [None, None, None]
|
|
68
77
|
res["clickData"] = {}
|
|
69
78
|
|
|
70
|
-
for (
|
|
79
|
+
for (
|
|
80
|
+
pred_key,
|
|
81
|
+
gt_key,
|
|
82
|
+
), matches_data in self.eval_result.click_data.confusion_matrix.items():
|
|
71
83
|
key = gt_key + self._keypair_sep + pred_key
|
|
72
84
|
res["clickData"][key] = {}
|
|
73
85
|
res["clickData"][key]["imagesIds"] = []
|
|
74
86
|
gt_title = f"GT: '{gt_key}'" if gt_key != "(None)" else "No GT Objects"
|
|
75
87
|
pred_title = f"Predicted: '{pred_key}'" if pred_key != "(None)" else "No Predictions"
|
|
76
|
-
res["clickData"][key]["title"] = f"Confusion Matrix. {gt_title}
|
|
88
|
+
res["clickData"][key]["title"] = f"Confusion Matrix. {gt_title} ― {pred_title}"
|
|
77
89
|
|
|
78
90
|
img_ids = set()
|
|
79
91
|
obj_ids = set()
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
|
|
4
|
+
from supervisely.nn.benchmark.visualization.widgets import GalleryWidget, MarkdownWidget
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ExplorePredictions(DetectionVisMetric):
|
|
8
|
+
MARKDOWN = "explore_predictions"
|
|
9
|
+
GALLERY = "explore_predictions"
|
|
10
|
+
|
|
11
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
12
|
+
super().__init__(*args, **kwargs)
|
|
13
|
+
self.clickable = True
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def md(self) -> MarkdownWidget:
|
|
17
|
+
text = self.vis_texts.markdown_explorer
|
|
18
|
+
return MarkdownWidget(self.MARKDOWN, "Explore Predictions", text)
|
|
19
|
+
|
|
20
|
+
def gallery(self, opacity) -> GalleryWidget:
|
|
21
|
+
optimal_conf = self.eval_result.mp.f1_optimal_conf
|
|
22
|
+
default_filters = [{"confidence": [optimal_conf, 1]}]
|
|
23
|
+
gallery = GalleryWidget(
|
|
24
|
+
self.GALLERY, columns_number=3, filters=default_filters, opacity=opacity
|
|
25
|
+
)
|
|
26
|
+
gallery.add_image_left_header("Compare with GT")
|
|
27
|
+
gallery.set_project_meta(self.eval_result.filtered_project_meta)
|
|
28
|
+
|
|
29
|
+
gallery.set_images(
|
|
30
|
+
image_infos=self.eval_result.sample_images,
|
|
31
|
+
ann_infos=self.eval_result.sample_anns,
|
|
32
|
+
)
|
|
33
|
+
gallery._gallery._update_filters()
|
|
34
|
+
|
|
35
|
+
# set click data for diff gallery
|
|
36
|
+
self.explore_modal_table.set_click_data(
|
|
37
|
+
self.diff_modal_table.id,
|
|
38
|
+
self.get_diff_data(),
|
|
39
|
+
get_key="(payload) => `${payload.annotation.imageId}`",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# set click data for explore gallery
|
|
43
|
+
gallery.set_click_data(
|
|
44
|
+
self.diff_modal_table.id,
|
|
45
|
+
self.get_diff_data(),
|
|
46
|
+
get_key="(payload) => `${payload.annotation.image_id}`",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
gallery.set_show_all_data(
|
|
50
|
+
self.explore_modal_table.id,
|
|
51
|
+
self.get_click_data(),
|
|
52
|
+
)
|
|
53
|
+
return gallery
|
|
54
|
+
|
|
55
|
+
def get_click_data(self) -> dict:
|
|
56
|
+
res = {}
|
|
57
|
+
|
|
58
|
+
res["layoutTemplate"] = [{"skipObjectTagsFiltering": ["outcome"]}] * 3
|
|
59
|
+
click_data = res.setdefault("clickData", {})
|
|
60
|
+
explore = click_data.setdefault("explore", {})
|
|
61
|
+
explore["filters"] = [
|
|
62
|
+
{
|
|
63
|
+
"type": "tag",
|
|
64
|
+
"tagId": "confidence",
|
|
65
|
+
"value": [self.eval_result.mp.f1_optimal_conf, 1],
|
|
66
|
+
}
|
|
67
|
+
]
|
|
68
|
+
explore["title"] = "Explore all predictions"
|
|
69
|
+
images_ids = explore.setdefault("imagesIds", [])
|
|
70
|
+
|
|
71
|
+
images_ids.extend(
|
|
72
|
+
[d.pred_image_info.id for d in self.eval_result.matched_pair_data.values()]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return res
|
|
76
|
+
|
|
77
|
+
def get_diff_data(self) -> Dict:
|
|
78
|
+
res = {}
|
|
79
|
+
|
|
80
|
+
res["layoutTemplate"] = [
|
|
81
|
+
{"skipObjectTagsFiltering": True, "columnTitle": "Ground Truth"},
|
|
82
|
+
{"skipObjectTagsFiltering": ["outcome"], "columnTitle": "Prediction"},
|
|
83
|
+
{"skipObjectTagsFiltering": ["confidence"], "columnTitle": "Difference"},
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
click_data = res.setdefault("clickData", {})
|
|
87
|
+
|
|
88
|
+
default_filters = [
|
|
89
|
+
{
|
|
90
|
+
"type": "tag",
|
|
91
|
+
"tagId": "confidence",
|
|
92
|
+
"value": [self.eval_result.mp.f1_optimal_conf, 1],
|
|
93
|
+
},
|
|
94
|
+
]
|
|
95
|
+
for pairs_data in self.eval_result.matched_pair_data.values():
|
|
96
|
+
gt = pairs_data.gt_image_info
|
|
97
|
+
pred = pairs_data.pred_image_info
|
|
98
|
+
diff = pairs_data.diff_image_info
|
|
99
|
+
assert gt.name == pred.name == diff.name
|
|
100
|
+
for img_id in [gt.id, pred.id, diff.id]:
|
|
101
|
+
key = click_data.setdefault(str(img_id), {})
|
|
102
|
+
key["imagesIds"] = [gt.id, pred.id, diff.id]
|
|
103
|
+
key["filters"] = default_filters
|
|
104
|
+
key["title"] = f"Image: {gt.name}"
|
|
105
|
+
|
|
106
|
+
object_bindings = []
|
|
107
|
+
for img in [pred, diff]:
|
|
108
|
+
if img == pred:
|
|
109
|
+
ann_json = pairs_data.pred_annotation.to_json()
|
|
110
|
+
else:
|
|
111
|
+
ann_json = pairs_data.diff_annotation.to_json()
|
|
112
|
+
for obj in ann_json["objects"]:
|
|
113
|
+
for tag in obj["tags"]:
|
|
114
|
+
if tag["name"] == "matched_gt_id":
|
|
115
|
+
object_bindings.append(
|
|
116
|
+
[
|
|
117
|
+
{
|
|
118
|
+
"id": obj["id"],
|
|
119
|
+
"annotationKey": img.id,
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"id": int(tag["value"]),
|
|
123
|
+
"annotationKey": gt.id if img == pred else pred.id,
|
|
124
|
+
},
|
|
125
|
+
]
|
|
126
|
+
)
|
|
127
|
+
key["objectsBindings"] = object_bindings
|
|
128
|
+
|
|
129
|
+
return res
|