supervisely 6.73.254__py3-none-any.whl → 6.73.255__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervisely might be problematic. Click here for more details.
- supervisely/api/file_api.py +16 -5
- supervisely/api/task_api.py +4 -2
- supervisely/app/widgets/field/field.py +10 -7
- supervisely/app/widgets/grid_gallery_v2/grid_gallery_v2.py +3 -1
- supervisely/nn/benchmark/base_benchmark.py +33 -35
- supervisely/nn/benchmark/base_evaluator.py +27 -1
- supervisely/nn/benchmark/base_visualizer.py +8 -11
- supervisely/nn/benchmark/comparison/base_visualizer.py +147 -0
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/__init__.py +1 -1
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/avg_precision_by_class.py +5 -7
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +4 -6
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/{explore_predicttions.py → explore_predictions.py} +17 -17
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/localization_accuracy.py +3 -5
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +7 -9
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +11 -22
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +3 -5
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +22 -20
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/speedtest.py +12 -6
- supervisely/nn/benchmark/comparison/detection_visualization/visualizer.py +31 -76
- supervisely/nn/benchmark/comparison/model_comparison.py +112 -19
- supervisely/nn/benchmark/comparison/semantic_segmentation/__init__.py +0 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/text_templates.py +128 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/__init__.py +21 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/classwise_error_analysis.py +68 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/explore_predictions.py +141 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/frequently_confused.py +71 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/iou_eou.py +68 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/overview.py +223 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/renormalized_error_ou.py +57 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/speedtest.py +314 -0
- supervisely/nn/benchmark/comparison/semantic_segmentation/visualizer.py +159 -0
- supervisely/nn/benchmark/instance_segmentation/evaluator.py +1 -1
- supervisely/nn/benchmark/object_detection/evaluator.py +1 -1
- supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +1 -3
- supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +3 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +3 -0
- supervisely/nn/benchmark/object_detection/vis_metrics/recall_vs_precision.py +1 -1
- supervisely/nn/benchmark/object_detection/visualizer.py +5 -10
- supervisely/nn/benchmark/semantic_segmentation/evaluator.py +12 -2
- supervisely/nn/benchmark/semantic_segmentation/metric_provider.py +8 -9
- supervisely/nn/benchmark/semantic_segmentation/text_templates.py +2 -2
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/key_metrics.py +31 -1
- supervisely/nn/benchmark/semantic_segmentation/vis_metrics/overview.py +1 -3
- supervisely/nn/benchmark/semantic_segmentation/visualizer.py +7 -6
- supervisely/nn/benchmark/utils/semantic_segmentation/evaluator.py +3 -21
- supervisely/nn/benchmark/visualization/renderer.py +25 -10
- supervisely/nn/benchmark/visualization/widgets/gallery/gallery.py +1 -0
- supervisely/nn/inference/inference.py +1 -0
- supervisely/nn/training/gui/gui.py +32 -10
- supervisely/nn/training/gui/training_artifacts.py +145 -0
- supervisely/nn/training/gui/training_process.py +3 -19
- supervisely/nn/training/train_app.py +179 -70
- {supervisely-6.73.254.dist-info → supervisely-6.73.255.dist-info}/METADATA +1 -1
- {supervisely-6.73.254.dist-info → supervisely-6.73.255.dist-info}/RECORD +58 -46
- supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/vis_metric.py +0 -19
- {supervisely-6.73.254.dist-info → supervisely-6.73.255.dist-info}/LICENSE +0 -0
- {supervisely-6.73.254.dist-info → supervisely-6.73.255.dist-info}/WHEEL +0 -0
- {supervisely-6.73.254.dist-info → supervisely-6.73.255.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.254.dist-info → supervisely-6.73.255.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,35 @@
|
|
|
1
1
|
import random
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
4
|
|
|
5
5
|
from supervisely.api.api import Api
|
|
6
6
|
from supervisely.app.widgets import SlyTqdm
|
|
7
7
|
from supervisely.imaging.color import get_predefined_colors, rgb2hex
|
|
8
|
+
from supervisely.io import env
|
|
9
|
+
from supervisely.io.fs import dir_empty, mkdir
|
|
10
|
+
from supervisely.io.json import load_json_file
|
|
8
11
|
from supervisely.nn.benchmark.comparison.detection_visualization.visualizer import (
|
|
9
12
|
DetectionComparisonVisualizer,
|
|
10
13
|
)
|
|
11
|
-
from supervisely.nn.benchmark.
|
|
14
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.visualizer import (
|
|
15
|
+
SemanticSegmentationComparisonVisualizer,
|
|
16
|
+
)
|
|
17
|
+
from supervisely.nn.benchmark.cv_tasks import CVTask
|
|
18
|
+
from supervisely.nn.benchmark.object_detection.evaluator import (
|
|
19
|
+
ObjectDetectionEvalResult,
|
|
20
|
+
)
|
|
21
|
+
from supervisely.nn.benchmark.semantic_segmentation.evaluator import (
|
|
22
|
+
SemanticSegmentationEvalResult,
|
|
23
|
+
)
|
|
24
|
+
from supervisely.nn.task_type import TaskType
|
|
25
|
+
from supervisely.sly_logger import logger
|
|
12
26
|
from supervisely.task.progress import tqdm_sly
|
|
13
27
|
|
|
28
|
+
ComparisonVisualizer = Union[
|
|
29
|
+
DetectionComparisonVisualizer, SemanticSegmentationComparisonVisualizer
|
|
30
|
+
]
|
|
31
|
+
ComparisonEvalResult = Union[ObjectDetectionEvalResult, SemanticSegmentationEvalResult]
|
|
32
|
+
|
|
14
33
|
|
|
15
34
|
class ModelComparison:
|
|
16
35
|
|
|
@@ -20,25 +39,36 @@ class ModelComparison:
|
|
|
20
39
|
remote_eval_dirs: List[str],
|
|
21
40
|
progress: Optional[SlyTqdm] = None,
|
|
22
41
|
workdir: Optional[str] = "./benchmark/comparison",
|
|
42
|
+
cv_task: Optional[TaskType] = None,
|
|
43
|
+
team_id: Optional[int] = None,
|
|
23
44
|
):
|
|
24
45
|
self.api = api
|
|
25
46
|
self.progress = progress or tqdm_sly
|
|
26
47
|
self.workdir = workdir
|
|
27
48
|
self.remote_eval_dirs = remote_eval_dirs
|
|
28
|
-
self.
|
|
49
|
+
self.eval_results: List[ComparisonEvalResult] = []
|
|
50
|
+
self.task_type = cv_task
|
|
51
|
+
self.team_id = team_id or env.team_id()
|
|
52
|
+
|
|
53
|
+
eval_cls = SemanticSegmentationEvalResult
|
|
54
|
+
eval_cls = ObjectDetectionEvalResult
|
|
29
55
|
|
|
30
56
|
colors = get_predefined_colors(len(remote_eval_dirs) * 5) # for better visualizations
|
|
31
57
|
random.shuffle(colors)
|
|
32
58
|
for i, eval_dir in enumerate(remote_eval_dirs):
|
|
33
|
-
local_path =
|
|
34
|
-
|
|
35
|
-
|
|
59
|
+
local_path = Path(self.workdir) / "eval_data" / Path(eval_dir).name
|
|
60
|
+
self._load_eval_data(eval_dir, str(local_path))
|
|
61
|
+
|
|
62
|
+
eval_cls = self._get_eval_cls(str(local_path))
|
|
63
|
+
eval_result = eval_cls(local_path / "evaluation")
|
|
64
|
+
eval_result.report_path = Path(eval_dir, "visualizations", "template.vue").as_posix()
|
|
36
65
|
eval_result.color = rgb2hex(colors[i])
|
|
37
66
|
|
|
38
|
-
|
|
67
|
+
self.eval_results.append(eval_result)
|
|
68
|
+
|
|
39
69
|
self._validate_eval_data()
|
|
40
70
|
|
|
41
|
-
self.visualizer:
|
|
71
|
+
self.visualizer: ComparisonVisualizer = None
|
|
42
72
|
self.remote_dir = None
|
|
43
73
|
|
|
44
74
|
def _validate_eval_data(self):
|
|
@@ -49,28 +79,41 @@ class ModelComparison:
|
|
|
49
79
|
task_type = None
|
|
50
80
|
img_names = None
|
|
51
81
|
cat_names = None
|
|
52
|
-
for eval_result in self.
|
|
82
|
+
for eval_result in self.eval_results:
|
|
53
83
|
next_task_type = eval_result.cv_task
|
|
54
84
|
if not task_type is None:
|
|
55
85
|
assert task_type == next_task_type, "Task types are different in the evaluations."
|
|
56
86
|
task_type = next_task_type
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
87
|
+
if task_type == TaskType.SEMANTIC_SEGMENTATION:
|
|
88
|
+
next_img_names = set(eval_result.mp.per_image_metrics.index)
|
|
89
|
+
else:
|
|
90
|
+
next_img_names = set(
|
|
91
|
+
[img.get("file_name") for img in eval_result.coco_gt.imgs.values()]
|
|
92
|
+
)
|
|
60
93
|
if not img_names is None:
|
|
61
94
|
assert img_names == next_img_names, "Images are different in the evaluations."
|
|
62
95
|
img_names = next_img_names
|
|
63
|
-
|
|
96
|
+
if task_type == TaskType.SEMANTIC_SEGMENTATION:
|
|
97
|
+
next_cat_names = set(eval_result.mp.class_names)
|
|
98
|
+
else:
|
|
99
|
+
next_cat_names = set(eval_result.mp.cat_names)
|
|
64
100
|
if not cat_names is None:
|
|
65
101
|
assert cat_names == next_cat_names, "Categories are different in the evaluations."
|
|
66
102
|
cat_names = next_cat_names
|
|
67
103
|
|
|
68
|
-
def get_metrics(self):
|
|
69
|
-
pass
|
|
70
|
-
|
|
71
104
|
def visualize(self):
|
|
105
|
+
task_type = self.eval_results[0].cv_task
|
|
106
|
+
if task_type in [
|
|
107
|
+
TaskType.OBJECT_DETECTION,
|
|
108
|
+
TaskType.INSTANCE_SEGMENTATION,
|
|
109
|
+
]:
|
|
110
|
+
vis_cls = DetectionComparisonVisualizer
|
|
111
|
+
elif task_type == TaskType.SEMANTIC_SEGMENTATION:
|
|
112
|
+
vis_cls = SemanticSegmentationComparisonVisualizer
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(f"Unsupported task type: {task_type}")
|
|
72
115
|
if self.visualizer is None:
|
|
73
|
-
self.visualizer =
|
|
116
|
+
self.visualizer = vis_cls(self)
|
|
74
117
|
self.visualizer.visualize()
|
|
75
118
|
|
|
76
119
|
def upload_results(self, team_id: int, remote_dir: str, progress=None) -> str:
|
|
@@ -80,5 +123,55 @@ class ModelComparison:
|
|
|
80
123
|
def get_report_link(self) -> str:
|
|
81
124
|
if self.remote_dir is None:
|
|
82
125
|
raise ValueError("Results are not uploaded yet.")
|
|
83
|
-
|
|
84
|
-
|
|
126
|
+
return self.visualizer.renderer._get_report_link(self.api, self.team_id, self.remote_dir)
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def report(self):
|
|
130
|
+
return self.visualizer.renderer.report
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def lnk(self):
|
|
134
|
+
return self.visualizer.renderer.lnk
|
|
135
|
+
|
|
136
|
+
def _load_eval_data(self, src_path: str, dst_path: str) -> None:
|
|
137
|
+
dir_name = Path(src_path).name
|
|
138
|
+
if not dir_empty(dst_path):
|
|
139
|
+
logger.info(f"Directory {dst_path} is not empty. Skipping download.")
|
|
140
|
+
return
|
|
141
|
+
if not self.api.storage.dir_exists(self.team_id, src_path):
|
|
142
|
+
raise ValueError(f"Directory {src_path} not found in storage.")
|
|
143
|
+
mkdir(dst_path)
|
|
144
|
+
with self.progress(
|
|
145
|
+
message=f"Downloading evaluation data from {dir_name}",
|
|
146
|
+
total=self.api.storage.get_directory_size(self.team_id, src_path),
|
|
147
|
+
unit="B",
|
|
148
|
+
unit_scale=True,
|
|
149
|
+
unit_divisor=1024,
|
|
150
|
+
) as pbar:
|
|
151
|
+
self.api.storage.download_directory(
|
|
152
|
+
self.team_id, src_path, dst_path, progress_cb=pbar.update
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _get_cv_task(self, eval_dir: str) -> CVTask:
|
|
156
|
+
try:
|
|
157
|
+
eval_data = load_json_file(Path(eval_dir, "evaluation", "inference_info.json"))
|
|
158
|
+
task_type = eval_data.get("task_type")
|
|
159
|
+
return CVTask(task_type.replace(" ", "_").lower())
|
|
160
|
+
except Exception as e:
|
|
161
|
+
raise ValueError(
|
|
162
|
+
f"Could not get CV task from `inference_info.json`, try to set it manually. {e}"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def _get_eval_cls(self, eval_dir: str) -> ComparisonEvalResult:
|
|
166
|
+
if self.task_type is None:
|
|
167
|
+
self.task_type = self._get_cv_task(eval_dir)
|
|
168
|
+
if self.task_type in [
|
|
169
|
+
CVTask.OBJECT_DETECTION,
|
|
170
|
+
CVTask.INSTANCE_SEGMENTATION,
|
|
171
|
+
]:
|
|
172
|
+
eval_cls = ObjectDetectionEvalResult
|
|
173
|
+
elif self.task_type == CVTask.SEMANTIC_SEGMENTATION:
|
|
174
|
+
eval_cls = SemanticSegmentationEvalResult
|
|
175
|
+
else:
|
|
176
|
+
raise ValueError(f"Unsupported task type: {self.task_type}")
|
|
177
|
+
return eval_cls
|
|
File without changes
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
docs_url = (
|
|
2
|
+
"https://docs.supervisely.com/neural-networks/model-evaluation-benchmark/semantic-segmentation"
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
# <i class="zmdi zmdi-check-circle" style="color: #13ce66; margin-right: 5px"></i>
|
|
6
|
+
clickable_label = """
|
|
7
|
+
> <span style="color: #5a6772">
|
|
8
|
+
> Click on the chart to explore corresponding images.
|
|
9
|
+
> </span>
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
markdown_header = """
|
|
13
|
+
<h1>{}</h1>
|
|
14
|
+
|
|
15
|
+
<div class="model-info-block">
|
|
16
|
+
<div>Created by <b>{}</b></div>
|
|
17
|
+
<div><i class="zmdi zmdi-calendar-alt"></i><span>{}</span></div>
|
|
18
|
+
</div>
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
markdown_common_overview = """
|
|
22
|
+
- **Models**: {}
|
|
23
|
+
- **Evaluation Dataset**: <a href="/projects/{}/datasets" target="_blank">{}</a>
|
|
24
|
+
- **Task type**: {}
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
markdown_overview_info = """
|
|
28
|
+
<h3>{}</h3>
|
|
29
|
+
- **Model**: {}
|
|
30
|
+
- **Checkpoint**: {}
|
|
31
|
+
- **Architecture**: {}
|
|
32
|
+
- **Runtime**: {}
|
|
33
|
+
- **Checkpoint file**: <a class="checkpoint-url" href="{}" target="_blank">{}</a>
|
|
34
|
+
- **Evaluation Report**: <a href="{}" target="_blank">View Report</a>
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
markdown_key_metrics = """## Key Metrics
|
|
39
|
+
|
|
40
|
+
We provide a comprehensive analysis of models' performance using a set of metrics, including both basic (precision, recall, F1-score, IoU, etc.) and advanced (boundary IoU, error over union decomposition, etc.) metrics.
|
|
41
|
+
|
|
42
|
+
- **Pixel accuracy**: reflects the percent of image pixels which were correctly classified.
|
|
43
|
+
- **Precision**: reflects the number of correctly predicted positive segmentations divided by the total number of predicted positive segmentations.
|
|
44
|
+
- **Recall**: reflects the number of correctly predicted positive segmentations divided by the number of all samples that should have been segmented as positive.
|
|
45
|
+
- **F1-score**: reflects the tradeoff between precision and recall. It is equivalent to the Dice coefficient and calculated as a harmonic mean of precision and recall.
|
|
46
|
+
- **Intersection over union (IoU, also known as the Jaccard index)**: measures the overlap between ground truth mask and predicted mask. It is calculated as the ratio of the intersection of the two masks areas to their combined areas.
|
|
47
|
+
- **Boundary intersection over union**: a segmentation consistency measure that first computes the sets of ground truth and predicted masks pixels that are located within the distance d from each contour and then computes intersection over union of these two sets. Pixel distance parameter d (pixel width of the boundary region) controls the sensitivity of the metric, it is usually set as 2% of the image diagonal for normal resolution images and 0.5% of the image diagonal for high resolution images.
|
|
48
|
+
- **Error over union and its components (boundary, extent, segment)**: a metric opposite to intersection over union and can be interpreted as what the model lacked in order to show the perfect performance with IoU = 1. It reflects the ratio of incorrectly segmented pixels of ground truth and predicted masks to their combined areas. It is usually decomposed into boundary, extent and segment errors over union in order to get exhaustive information about the model's strengths and weaknesses.
|
|
49
|
+
- **Renormalized error over union**: postprocessed variant of error over union which takes into consideration cause and effect relationships between different types of segmentation errors.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
markdown_explorer = """## Explore Predictions
|
|
53
|
+
This section contains visual comparison of predictions made by different models and ground truth annotations. Sometimes a simple visualization can be more informative than any performance metric.
|
|
54
|
+
|
|
55
|
+
> Click on the image to view the **Original Image** with **Ground Truth** and **Predictions** annotations side-by-side.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
markdown_explore_difference = """## Explore Predictions
|
|
59
|
+
|
|
60
|
+
In this section, you can explore predictions made by different models side-by-side. This helps you to understand the differences in predictions made by each model, and to identify which model performs better in different scenarios.
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
> Click on the image to view the **Ground Truth**, and **Prediction** annotations side-by-side.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
### Difference in Predictions
|
|
68
|
+
|
|
69
|
+
# markdown_explore_same_errors = """
|
|
70
|
+
# ### Same Errors
|
|
71
|
+
|
|
72
|
+
# This section helps you to identify samples where all models made the same errors. It is useful for understanding the limitations of the models and the common challenges they face.
|
|
73
|
+
|
|
74
|
+
# > Click on the image to view the **Ground Truth**, and **Prediction** annotations side-by-side.
|
|
75
|
+
# """
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# """
|
|
79
|
+
markdown_iou = """## Intersection & Error Over Union
|
|
80
|
+
|
|
81
|
+
Pie charts below demonstrate performance metrics of each model in terms of Intersection over Union (IoU) and Error over Union (EoU). It is done with the help of Error over Union (EoU) decomposition into boundary, extent, and segment errors over union. These charts help to draw conclusions on the model's strongest and weakest sides.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
markdown_renormalized_error_ou = """## Renormalized Error over Union
|
|
85
|
+
|
|
86
|
+
Charts below are dedicated to the decomposition of the post-processed variant of Error over Union, which takes into consideration cause and effect relationships between different types of segmentation errors. Error over Union decomposition has its own pitfalls. It is important to understand that models which tend to produce segment errors (when entire segments are mispredicted and there is no intersection between ground truth and predicted mask) will face fewer occasions to produce boundary and extent errors - as a result, boundary and extent error over union values will be underestimated.
|
|
87
|
+
|
|
88
|
+
In terms of localization, segment error is more fundamental than extent, while extent error is more fundamental than boundary. In order to overcome this problem, renormalized error over union proposes a slightly different calculation method - by removing more fundamental errors from the denominator - read more in our <a href="{}" target="_blank">technical report</a>
|
|
89
|
+
""".format(
|
|
90
|
+
docs_url
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
markdown_eou_per_class = """## Classwise Segmentation Error Analysis
|
|
94
|
+
|
|
95
|
+
This section contains information about classwise segmentation error decomposition. For each model, each column of the chart represents a certain class from the training dataset, demonstrating model performance in terms of segmenting this specific class on images and what model lacked in order to show the perfect performance.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
markdown_frequently_confused_empty = """### Frequently Confused Classes
|
|
99
|
+
|
|
100
|
+
No frequently confused class pairs found
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
markdown_frequently_confused = """## Frequently Confused Classes
|
|
104
|
+
|
|
105
|
+
The bar chart below reveals pairs of classes which were most frequently confused for each model. Each column of the chart demonstrates the probability of confusion of a given pair of classes. It is necessary to remember that this probability is not symmetric: the probability of confusing class A with class B is not equal to the probability of confusing class B with class A.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
empty = """### {}
|
|
109
|
+
|
|
110
|
+
> {}
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
markdown_speedtest_intro = """## Inference Speed
|
|
114
|
+
|
|
115
|
+
This is a speed test benchmark for compared models. Models were tested with the following configurations:
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
markdown_speedtest_overview_ms = """### Latency (Inference Time)
|
|
119
|
+
The table below shows the speed test results. For each test, the time taken to process one batch of images is shown. Results are averaged across **{}** iterations.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
markdown_speedtest_overview_fps = """### Frames per Second (FPS)
|
|
123
|
+
The table below shows the speed test results. For each test, the number of frames processed per second is shown. Results are averaged across **{}** iterations.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
markdown_batch_inference = """
|
|
127
|
+
This chart shows how the model's speed changes with different batch sizes . As the batch size increases, you can observe an increase in FPS (images per second).
|
|
128
|
+
"""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.classwise_error_analysis import (
|
|
2
|
+
ClasswiseErrorAnalysis,
|
|
3
|
+
)
|
|
4
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.explore_predictions import (
|
|
5
|
+
ExplorePredictions,
|
|
6
|
+
)
|
|
7
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.frequently_confused import (
|
|
8
|
+
FrequentlyConfused,
|
|
9
|
+
)
|
|
10
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.iou_eou import (
|
|
11
|
+
IntersectionErrorOverUnion,
|
|
12
|
+
)
|
|
13
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.overview import (
|
|
14
|
+
Overview,
|
|
15
|
+
)
|
|
16
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.renormalized_error_ou import (
|
|
17
|
+
RenormalizedErrorOverUnion,
|
|
18
|
+
)
|
|
19
|
+
from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.speedtest import (
|
|
20
|
+
Speedtest,
|
|
21
|
+
)
|
supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/classwise_error_analysis.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from supervisely.nn.benchmark.base_visualizer import BaseVisMetrics
|
|
2
|
+
from supervisely.nn.benchmark.visualization.widgets import ChartWidget, MarkdownWidget
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ClasswiseErrorAnalysis(BaseVisMetrics):
|
|
6
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
7
|
+
super().__init__(*args, **kwargs)
|
|
8
|
+
self.clickable = True
|
|
9
|
+
|
|
10
|
+
@property
|
|
11
|
+
def md(self) -> MarkdownWidget:
|
|
12
|
+
return MarkdownWidget(
|
|
13
|
+
"classwise_error_analysis",
|
|
14
|
+
"Classwise Segmentation Error Analysis",
|
|
15
|
+
text=self.vis_texts.markdown_eou_per_class,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def chart(self) -> ChartWidget:
|
|
20
|
+
return ChartWidget("classwise_error_analysis", self.get_figure())
|
|
21
|
+
|
|
22
|
+
def get_figure(self):
|
|
23
|
+
import numpy as np
|
|
24
|
+
import plotly.graph_objects as go # pylint: disable=import-error
|
|
25
|
+
|
|
26
|
+
fig = go.Figure()
|
|
27
|
+
|
|
28
|
+
# Цветовая палитра для метрик
|
|
29
|
+
color_palette = ["#8ACAA1", "#FFE4B5", "#F7ADAA", "#dd3f3f"]
|
|
30
|
+
metrics = ["IoU", "E_extent_oU", "E_boundary_oU", "E_segment_oU"]
|
|
31
|
+
|
|
32
|
+
group_width = 0.7
|
|
33
|
+
|
|
34
|
+
for model_idx, eval_result in enumerate(self.eval_results):
|
|
35
|
+
bar_data, labels = eval_result.mp.classwise_segm_error_data
|
|
36
|
+
model_name = eval_result.name
|
|
37
|
+
|
|
38
|
+
for metric_idx, metric_name in enumerate(metrics):
|
|
39
|
+
# hover_customdata = [f"metric: {metric_name} for class '{l}' ({model_name})" for l in labels]
|
|
40
|
+
hover_customdata = [
|
|
41
|
+
f"class: {l}<br>model: {model_name}<br>{metric_name}" for l in labels
|
|
42
|
+
]
|
|
43
|
+
fig.add_trace(
|
|
44
|
+
go.Bar(
|
|
45
|
+
name=metric_name,
|
|
46
|
+
x=np.arange(len(labels)) + model_idx * group_width * 0.3,
|
|
47
|
+
y=bar_data[metric_name],
|
|
48
|
+
customdata=hover_customdata,
|
|
49
|
+
hovertemplate="%{customdata}: %{y:.2f}<extra></extra>",
|
|
50
|
+
marker=dict(color=color_palette[metric_idx]),
|
|
51
|
+
width=group_width / len(metrics),
|
|
52
|
+
offsetgroup=model_idx,
|
|
53
|
+
base=bar_data[metrics[:metric_idx]].sum(axis=1) if metric_idx > 0 else None,
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
fig.update_layout(
|
|
58
|
+
showlegend=False,
|
|
59
|
+
barmode="stack",
|
|
60
|
+
xaxis=dict(
|
|
61
|
+
title="Classes",
|
|
62
|
+
tickvals=np.arange(len(labels)) + (len(self.eval_results) - 1 - group_width) / 4,
|
|
63
|
+
ticktext=labels,
|
|
64
|
+
),
|
|
65
|
+
width=800 if len(labels) < 10 else 1000,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return fig
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from typing import List, Tuple
|
|
2
|
+
|
|
3
|
+
from supervisely.annotation.annotation import Annotation
|
|
4
|
+
from supervisely.api.image_api import ImageInfo
|
|
5
|
+
from supervisely.api.module_api import ApiField
|
|
6
|
+
from supervisely.nn.benchmark.base_visualizer import BaseVisMetrics
|
|
7
|
+
from supervisely.nn.benchmark.visualization.widgets import GalleryWidget, MarkdownWidget
|
|
8
|
+
from supervisely.project.project_meta import ProjectMeta
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExplorePredictions(BaseVisMetrics):
|
|
12
|
+
|
|
13
|
+
MARKDOWN = "markdown_explorer"
|
|
14
|
+
GALLERY_DIFFERENCE = "explore_difference_gallery"
|
|
15
|
+
|
|
16
|
+
def __init__(self, *args, **kwargs):
|
|
17
|
+
super().__init__(*args, **kwargs)
|
|
18
|
+
self.meta = None
|
|
19
|
+
|
|
20
|
+
def _merged_meta(self) -> ProjectMeta:
|
|
21
|
+
if self.meta is not None:
|
|
22
|
+
return self.meta
|
|
23
|
+
self.meta = self.eval_results[0].gt_project_meta
|
|
24
|
+
for eval_res in self.eval_results:
|
|
25
|
+
self.meta = self.meta.merge(eval_res.pred_project_meta)
|
|
26
|
+
return self.meta
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def difference_predictions_md(self) -> MarkdownWidget:
|
|
30
|
+
text = self.vis_texts.markdown_explorer
|
|
31
|
+
return MarkdownWidget(self.MARKDOWN, "Explore Predictions", text)
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def explore_gallery(self) -> GalleryWidget:
|
|
35
|
+
columns_number = len(self.eval_results) + 1
|
|
36
|
+
images, annotations = self._get_sample_data()
|
|
37
|
+
gallery = GalleryWidget(self.GALLERY_DIFFERENCE, columns_number=columns_number)
|
|
38
|
+
gallery.add_image_left_header("Click to explore more")
|
|
39
|
+
gallery.set_project_meta(self._merged_meta())
|
|
40
|
+
gallery.set_images(images, annotations)
|
|
41
|
+
click_data = self.get_click_data_explore_all()
|
|
42
|
+
gallery.set_click_data(self.explore_modal_table.id, click_data)
|
|
43
|
+
gallery.set_show_all_data(self.explore_modal_table.id, click_data)
|
|
44
|
+
gallery._gallery._update_filters()
|
|
45
|
+
|
|
46
|
+
return gallery
|
|
47
|
+
|
|
48
|
+
def _get_sample_data(self) -> Tuple[List[ImageInfo], List[Annotation], List[ProjectMeta]]:
|
|
49
|
+
images = []
|
|
50
|
+
annotations = []
|
|
51
|
+
api = self.eval_results[0].api
|
|
52
|
+
names = None
|
|
53
|
+
ds_name = None
|
|
54
|
+
for idx, eval_res in enumerate(self.eval_results):
|
|
55
|
+
if idx == 0:
|
|
56
|
+
dataset_info = eval_res.gt_dataset_infos[0]
|
|
57
|
+
infos = api.image.get_list(dataset_info.id, limit=5, force_metadata_for_links=False)
|
|
58
|
+
ds_name = dataset_info.name
|
|
59
|
+
images_ids = [image_info.id for image_info in infos]
|
|
60
|
+
names = [image_info.name for image_info in infos]
|
|
61
|
+
images.append(infos)
|
|
62
|
+
from supervisely.api.api import Api
|
|
63
|
+
|
|
64
|
+
api: Api
|
|
65
|
+
anns = api.annotation.download_batch(
|
|
66
|
+
dataset_info.id, images_ids, force_metadata_for_links=False
|
|
67
|
+
)
|
|
68
|
+
annotations.append(anns)
|
|
69
|
+
assert ds_name is not None, "Failed to get GT dataset name for gallery"
|
|
70
|
+
|
|
71
|
+
dataset_info = api.dataset.get_info_by_name(eval_res.pred_project_id, ds_name)
|
|
72
|
+
|
|
73
|
+
assert names is not None, "Failed to get GT image names for gallery"
|
|
74
|
+
infos = api.image.get_list(
|
|
75
|
+
dataset_info.id,
|
|
76
|
+
filters=[
|
|
77
|
+
{ApiField.FIELD: ApiField.NAME, ApiField.OPERATOR: "in", ApiField.VALUE: names}
|
|
78
|
+
],
|
|
79
|
+
force_metadata_for_links=False,
|
|
80
|
+
)
|
|
81
|
+
images_ids = [image_info.id for image_info in infos]
|
|
82
|
+
images.append(infos)
|
|
83
|
+
anns = api.annotation.download_batch(
|
|
84
|
+
dataset_info.id, images_ids, force_metadata_for_links=False
|
|
85
|
+
)
|
|
86
|
+
annotations.append(anns)
|
|
87
|
+
|
|
88
|
+
images = list(i for x in zip(*images) for i in x)
|
|
89
|
+
annotations = list(i for x in zip(*annotations) for i in x)
|
|
90
|
+
return images, annotations
|
|
91
|
+
|
|
92
|
+
def get_click_data_explore_all(self) -> dict:
|
|
93
|
+
res = {}
|
|
94
|
+
|
|
95
|
+
res["projectMeta"] = self._merged_meta().to_json()
|
|
96
|
+
res["layoutTemplate"] = [{"columnTitle": "Ground Truth"}]
|
|
97
|
+
for idx, eval_res in enumerate(self.eval_results, 1):
|
|
98
|
+
res["layoutTemplate"].append({"columnTitle": f"[{idx}] {eval_res.short_name}"})
|
|
99
|
+
|
|
100
|
+
click_data = res.setdefault("clickData", {})
|
|
101
|
+
explore = click_data.setdefault("explore", {})
|
|
102
|
+
explore["title"] = "Explore all predictions"
|
|
103
|
+
|
|
104
|
+
image_names = set()
|
|
105
|
+
for eval_res in self.eval_results:
|
|
106
|
+
eval_res.mp.per_image_metrics["img_names"].apply(image_names.add)
|
|
107
|
+
|
|
108
|
+
filters = [{"field": "name", "operator": "in", "value": list(image_names)}]
|
|
109
|
+
|
|
110
|
+
images_ids = []
|
|
111
|
+
api = self.eval_results[0].api
|
|
112
|
+
names = None
|
|
113
|
+
ds_names = None
|
|
114
|
+
for idx, eval_res in enumerate(self.eval_results):
|
|
115
|
+
if idx == 0:
|
|
116
|
+
dataset_infos = eval_res.gt_dataset_infos
|
|
117
|
+
ds_names = [ds.name for ds in dataset_infos]
|
|
118
|
+
current_images_ids = []
|
|
119
|
+
current_images_names = []
|
|
120
|
+
for ds in dataset_infos:
|
|
121
|
+
image_infos = api.image.get_list(ds.id, filters, force_metadata_for_links=False)
|
|
122
|
+
image_infos = sorted(image_infos, key=lambda x: x.name)
|
|
123
|
+
current_images_names.extend([image_info.name for image_info in image_infos])
|
|
124
|
+
current_images_ids.extend([image_info.id for image_info in image_infos])
|
|
125
|
+
images_ids.append(current_images_ids)
|
|
126
|
+
names = current_images_names
|
|
127
|
+
|
|
128
|
+
dataset_infos = api.dataset.get_list(eval_res.pred_project_id)
|
|
129
|
+
dataset_infos = [ds for ds in dataset_infos if ds.name in ds_names]
|
|
130
|
+
dataset_infos = sorted(dataset_infos, key=lambda x: ds_names.index(x.name))
|
|
131
|
+
current_images_infos = []
|
|
132
|
+
for ds in dataset_infos:
|
|
133
|
+
image_infos = api.image.get_list(ds.id, filters, force_metadata_for_links=False)
|
|
134
|
+
image_infos = [image_info for image_info in image_infos if image_info.name in names]
|
|
135
|
+
current_images_infos.extend(image_infos)
|
|
136
|
+
current_images_infos = sorted(current_images_infos, key=lambda x: names.index(x.name))
|
|
137
|
+
images_ids.append([image_info.id for image_info in current_images_infos])
|
|
138
|
+
|
|
139
|
+
explore["imagesIds"] = list(i for x in zip(*images_ids) for i in x)
|
|
140
|
+
|
|
141
|
+
return res
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from supervisely.nn.benchmark.base_visualizer import BaseVisMetrics
|
|
4
|
+
from supervisely.nn.benchmark.semantic_segmentation.evaluator import (
|
|
5
|
+
SemanticSegmentationEvalResult,
|
|
6
|
+
)
|
|
7
|
+
from supervisely.nn.benchmark.visualization.widgets import ChartWidget, MarkdownWidget
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FrequentlyConfused(BaseVisMetrics):
|
|
11
|
+
|
|
12
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
13
|
+
super().__init__(*args, **kwargs)
|
|
14
|
+
self.eval_results: List[SemanticSegmentationEvalResult]
|
|
15
|
+
self.clickable = True
|
|
16
|
+
self._keypair_sep = "-"
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def md(self) -> MarkdownWidget:
|
|
20
|
+
if self.is_empty:
|
|
21
|
+
text = self.vis_texts.markdown_frequently_confused_empty
|
|
22
|
+
else:
|
|
23
|
+
text = self.vis_texts.markdown_frequently_confused
|
|
24
|
+
return MarkdownWidget("frequently_confused", "Frequently Confused Classes", text=text)
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def chart(self) -> ChartWidget:
|
|
28
|
+
return ChartWidget("frequently_confused", self.get_figure())
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def is_empty(self) -> bool:
|
|
32
|
+
return all(len(e.mp.frequently_confused[0]) == 0 for e in self.eval_results)
|
|
33
|
+
|
|
34
|
+
def get_figure(self):
|
|
35
|
+
import numpy as np
|
|
36
|
+
import plotly.graph_objects as go # pylint: disable=import-error
|
|
37
|
+
|
|
38
|
+
fig = go.Figure()
|
|
39
|
+
|
|
40
|
+
classes = self.eval_results[0].classes_whitelist
|
|
41
|
+
|
|
42
|
+
model_cnt = len(self.eval_results)
|
|
43
|
+
all_models_cmat = np.zeros((model_cnt, len(classes), len(classes)))
|
|
44
|
+
for model_idx, eval_result in enumerate(self.eval_results):
|
|
45
|
+
cmat, _ = eval_result.mp.confusion_matrix
|
|
46
|
+
all_models_cmat[model_idx] = cmat[::-1].copy()
|
|
47
|
+
|
|
48
|
+
sum_cmat = all_models_cmat.sum(axis=0)
|
|
49
|
+
np.fill_diagonal(sum_cmat, 0)
|
|
50
|
+
sum_cmat_flat = sum_cmat.flatten()
|
|
51
|
+
sorted_indices = np.argsort(sum_cmat_flat)[::-1]
|
|
52
|
+
n_pairs = min(10, len(classes) * (len(classes) - 1))
|
|
53
|
+
sorted_indices = sorted_indices[:n_pairs]
|
|
54
|
+
rows = sorted_indices // len(classes)
|
|
55
|
+
cols = sorted_indices % len(classes)
|
|
56
|
+
labels = [f"{classes[rows[i]]}-{classes[cols[i]]}" for i in range(n_pairs)]
|
|
57
|
+
for model_idx, eval_result in enumerate(self.eval_results):
|
|
58
|
+
cmat = all_models_cmat[model_idx]
|
|
59
|
+
probs = cmat[rows, cols]
|
|
60
|
+
probs = probs * 100
|
|
61
|
+
fig.add_trace(
|
|
62
|
+
go.Bar(
|
|
63
|
+
name=eval_result.name,
|
|
64
|
+
x=labels,
|
|
65
|
+
y=probs,
|
|
66
|
+
hovertemplate="%{x}: %{y:.2f}%<extra></extra>",
|
|
67
|
+
marker=dict(color=eval_result.color, line=dict(width=0.7)),
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return fig
|