supervisely 6.73.254__py3-none-any.whl → 6.73.256__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervisely might be problematic. Click here for more details.

Files changed (61) hide show
  1. supervisely/api/api.py +16 -8
  2. supervisely/api/file_api.py +16 -5
  3. supervisely/api/task_api.py +4 -2
  4. supervisely/app/widgets/field/field.py +10 -7
  5. supervisely/app/widgets/grid_gallery_v2/grid_gallery_v2.py +3 -1
  6. supervisely/io/network_exceptions.py +14 -2
  7. supervisely/nn/benchmark/base_benchmark.py +33 -35
  8. supervisely/nn/benchmark/base_evaluator.py +27 -1
  9. supervisely/nn/benchmark/base_visualizer.py +8 -11
  10. supervisely/nn/benchmark/comparison/base_visualizer.py +147 -0
  11. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/__init__.py +1 -1
  12. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/avg_precision_by_class.py +5 -7
  13. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +4 -6
  14. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/{explore_predicttions.py → explore_predictions.py} +17 -17
  15. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/localization_accuracy.py +3 -5
  16. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +7 -9
  17. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +11 -22
  18. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +3 -5
  19. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +22 -20
  20. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/speedtest.py +12 -6
  21. supervisely/nn/benchmark/comparison/detection_visualization/visualizer.py +31 -76
  22. supervisely/nn/benchmark/comparison/model_comparison.py +112 -19
  23. supervisely/nn/benchmark/comparison/semantic_segmentation/__init__.py +0 -0
  24. supervisely/nn/benchmark/comparison/semantic_segmentation/text_templates.py +128 -0
  25. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/__init__.py +21 -0
  26. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/classwise_error_analysis.py +68 -0
  27. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/explore_predictions.py +141 -0
  28. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/frequently_confused.py +71 -0
  29. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/iou_eou.py +68 -0
  30. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/overview.py +223 -0
  31. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/renormalized_error_ou.py +57 -0
  32. supervisely/nn/benchmark/comparison/semantic_segmentation/vis_metrics/speedtest.py +314 -0
  33. supervisely/nn/benchmark/comparison/semantic_segmentation/visualizer.py +159 -0
  34. supervisely/nn/benchmark/instance_segmentation/evaluator.py +1 -1
  35. supervisely/nn/benchmark/object_detection/evaluator.py +1 -1
  36. supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +1 -3
  37. supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +3 -0
  38. supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +3 -0
  39. supervisely/nn/benchmark/object_detection/vis_metrics/recall_vs_precision.py +1 -1
  40. supervisely/nn/benchmark/object_detection/visualizer.py +5 -10
  41. supervisely/nn/benchmark/semantic_segmentation/evaluator.py +12 -2
  42. supervisely/nn/benchmark/semantic_segmentation/metric_provider.py +8 -9
  43. supervisely/nn/benchmark/semantic_segmentation/text_templates.py +2 -2
  44. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/key_metrics.py +31 -1
  45. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/overview.py +1 -3
  46. supervisely/nn/benchmark/semantic_segmentation/visualizer.py +7 -6
  47. supervisely/nn/benchmark/utils/semantic_segmentation/evaluator.py +3 -21
  48. supervisely/nn/benchmark/visualization/renderer.py +25 -10
  49. supervisely/nn/benchmark/visualization/widgets/gallery/gallery.py +1 -0
  50. supervisely/nn/inference/inference.py +1 -0
  51. supervisely/nn/training/gui/gui.py +32 -10
  52. supervisely/nn/training/gui/training_artifacts.py +145 -0
  53. supervisely/nn/training/gui/training_process.py +3 -19
  54. supervisely/nn/training/train_app.py +179 -70
  55. {supervisely-6.73.254.dist-info → supervisely-6.73.256.dist-info}/METADATA +1 -1
  56. {supervisely-6.73.254.dist-info → supervisely-6.73.256.dist-info}/RECORD +60 -48
  57. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/vis_metric.py +0 -19
  58. {supervisely-6.73.254.dist-info → supervisely-6.73.256.dist-info}/LICENSE +0 -0
  59. {supervisely-6.73.254.dist-info → supervisely-6.73.256.dist-info}/WHEEL +0 -0
  60. {supervisely-6.73.254.dist-info → supervisely-6.73.256.dist-info}/entry_points.txt +0 -0
  61. {supervisely-6.73.254.dist-info → supervisely-6.73.256.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- import datetime
2
- from pathlib import Path
1
+ from typing import List
3
2
 
4
- import supervisely.nn.benchmark.comparison.detection_visualization.text_templates as vis_texts
3
+ import supervisely.nn.benchmark.comparison.detection_visualization.text_templates as texts
4
+ from supervisely.nn.benchmark.comparison.base_visualizer import BaseComparisonVisualizer
5
5
  from supervisely.nn.benchmark.comparison.detection_visualization.vis_metrics import (
6
6
  AveragePrecisionByClass,
7
7
  CalibrationScore,
@@ -13,7 +13,9 @@ from supervisely.nn.benchmark.comparison.detection_visualization.vis_metrics imp
13
13
  PrecisionRecallF1,
14
14
  Speedtest,
15
15
  )
16
- from supervisely.nn.benchmark.visualization.renderer import Renderer
16
+ from supervisely.nn.benchmark.object_detection.evaluator import (
17
+ ObjectDetectionEvalResult,
18
+ )
17
19
  from supervisely.nn.benchmark.visualization.widgets import (
18
20
  ContainerWidget,
19
21
  GalleryWidget,
@@ -22,22 +24,13 @@ from supervisely.nn.benchmark.visualization.widgets import (
22
24
  )
23
25
 
24
26
 
25
- class DetectionComparisonVisualizer:
26
- def __init__(self, comparison):
27
- self.comparison = comparison
28
- self.api = comparison.api
29
- self.vis_texts = vis_texts
30
-
31
- self._create_widgets()
32
- layout = self._create_layout()
33
-
34
- self.renderer = Renderer(layout, str(Path(self.comparison.workdir, "visualizations")))
35
-
36
- def visualize(self):
37
- return self.renderer.visualize()
27
+ class DetectionComparisonVisualizer(BaseComparisonVisualizer):
28
+ vis_texts = texts
29
+ ann_opacity = 0.5
38
30
 
39
- def upload_results(self, team_id: int, remote_dir: str, progress=None):
40
- return self.renderer.upload_results(self.api, team_id, remote_dir, progress)
31
+ def __init__(self, *args, **kwargs):
32
+ super().__init__(*args, **kwargs)
33
+ self.eval_results: List[ObjectDetectionEvalResult]
41
34
 
42
35
  def _create_widgets(self):
43
36
  # Modal Gellery
@@ -48,10 +41,11 @@ class DetectionComparisonVisualizer:
48
41
  self.clickable_label = self._create_clickable_label()
49
42
 
50
43
  # Speedtest init here for overview
51
- speedtest = Speedtest(self.vis_texts, self.comparison.evaluation_results)
44
+ speedtest = Speedtest(self.vis_texts, self.comparison.eval_results)
52
45
 
53
46
  # Overview
54
- overview = Overview(self.vis_texts, self.comparison.evaluation_results)
47
+ overview = Overview(self.vis_texts, self.comparison.eval_results)
48
+ overview.team_id = self.comparison.team_id
55
49
  self.header = self._create_header()
56
50
  self.overviews = self._create_overviews(overview)
57
51
  self.overview_md = overview.overview_md
@@ -61,11 +55,11 @@ class DetectionComparisonVisualizer:
61
55
  )
62
56
  self.overview_chart = overview.chart_widget
63
57
 
64
- columns_number = len(self.comparison.evaluation_results) + 1 # +1 for GT
58
+ columns_number = len(self.comparison.eval_results) + 1 # +1 for GT
65
59
  self.explore_predictions_modal_gallery = self._create_explore_modal_table(columns_number)
66
60
  explore_predictions = ExplorePredictions(
67
61
  self.vis_texts,
68
- self.comparison.evaluation_results,
62
+ self.comparison.eval_results,
69
63
  explore_modal_table=self.explore_predictions_modal_gallery,
70
64
  )
71
65
  self.explore_predictions_md = explore_predictions.difference_predictions_md
@@ -74,7 +68,7 @@ class DetectionComparisonVisualizer:
74
68
  # Outcome Counts
75
69
  outcome_counts = OutcomeCounts(
76
70
  self.vis_texts,
77
- self.comparison.evaluation_results,
71
+ self.comparison.eval_results,
78
72
  explore_modal_table=self.explore_modal_table,
79
73
  )
80
74
  self.outcome_counts_md = self._create_outcome_counts_md()
@@ -83,7 +77,7 @@ class DetectionComparisonVisualizer:
83
77
  self.outcome_counts_comparison = outcome_counts.chart_widget_comparison
84
78
 
85
79
  # Precision-Recall Curve
86
- pr_curve = PrCurve(self.vis_texts, self.comparison.evaluation_results)
80
+ pr_curve = PrCurve(self.vis_texts, self.comparison.eval_results)
87
81
  self.pr_curve_md = pr_curve.markdown_widget
88
82
  self.pr_curve_collapsed_widgets = pr_curve.collapsed_widget
89
83
  self.pr_curve_table = pr_curve.table_widget
@@ -92,7 +86,7 @@ class DetectionComparisonVisualizer:
92
86
  # Average Precision by Class
93
87
  avg_prec_by_class = AveragePrecisionByClass(
94
88
  self.vis_texts,
95
- self.comparison.evaluation_results,
89
+ self.comparison.eval_results,
96
90
  explore_modal_table=self.explore_modal_table,
97
91
  )
98
92
  self.avg_prec_by_class_md = avg_prec_by_class.markdown_widget
@@ -101,7 +95,7 @@ class DetectionComparisonVisualizer:
101
95
  # Precision, Recall, F1
102
96
  precision_recall_f1 = PrecisionRecallF1(
103
97
  self.vis_texts,
104
- self.comparison.evaluation_results,
98
+ self.comparison.eval_results,
105
99
  explore_modal_table=self.explore_modal_table,
106
100
  )
107
101
  self.precision_recall_f1_md = precision_recall_f1.markdown_widget
@@ -118,14 +112,14 @@ class DetectionComparisonVisualizer:
118
112
  # TODO: ???
119
113
 
120
114
  # Localization Accuracy (IoU)
121
- loc_acc = LocalizationAccuracyIoU(self.vis_texts, self.comparison.evaluation_results)
115
+ loc_acc = LocalizationAccuracyIoU(self.vis_texts, self.comparison.eval_results)
122
116
  self.loc_acc_header_md = loc_acc.header_md
123
117
  self.loc_acc_iou_distribution_md = loc_acc.iou_distribution_md
124
118
  self.loc_acc_chart = loc_acc.chart
125
119
  self.loc_acc_table = loc_acc.table_widget
126
120
 
127
121
  # Calibration Score
128
- cal_score = CalibrationScore(self.vis_texts, self.comparison.evaluation_results)
122
+ cal_score = CalibrationScore(self.vis_texts, self.comparison.eval_results)
129
123
  self.cal_score_md = cal_score.header_md
130
124
  self.cal_score_md_2 = cal_score.header_md_2
131
125
  self.cal_score_collapse_tip = cal_score.collapse_tip
@@ -140,6 +134,7 @@ class DetectionComparisonVisualizer:
140
134
 
141
135
  # SpeedTest
142
136
  self.speedtest_present = False
137
+ self.speedtest_multiple_batch_sizes = False
143
138
  if not speedtest.is_empty():
144
139
  self.speedtest_present = True
145
140
  self.speedtest_md_intro = speedtest.md_intro
@@ -148,8 +143,10 @@ class DetectionComparisonVisualizer:
148
143
  self.speed_inference_time_table = speedtest.inference_time_table
149
144
  self.speed_fps_md = speedtest.fps_md
150
145
  self.speed_fps_table = speedtest.fps_table
151
- self.speed_batch_inference_md = speedtest.batch_inference_md
152
- self.speed_chart = speedtest.chart
146
+ self.speedtest_multiple_batch_sizes = speedtest.multiple_batche_sizes()
147
+ if self.speedtest_multiple_batch_sizes:
148
+ self.speed_batch_inference_md = speedtest.batch_inference_md
149
+ self.speed_chart = speedtest.chart
153
150
 
154
151
  def _create_layout(self):
155
152
  is_anchors_widgets = [
@@ -216,10 +213,11 @@ class DetectionComparisonVisualizer:
216
213
  (0, self.speed_inference_time_table),
217
214
  (0, self.speed_fps_md),
218
215
  (0, self.speed_fps_table),
219
- (0, self.speed_batch_inference_md),
220
- (0, self.speed_chart),
221
216
  ]
222
217
  )
218
+ if self.speedtest_multiple_batch_sizes:
219
+ is_anchors_widgets.append((0, self.speed_batch_inference_md))
220
+ is_anchors_widgets.append((0, self.speed_chart))
223
221
  anchors = []
224
222
  for is_anchor, widget in is_anchors_widgets:
225
223
  if is_anchor:
@@ -232,30 +230,6 @@ class DetectionComparisonVisualizer:
232
230
  )
233
231
  return layout
234
232
 
235
- def _create_header(self) -> MarkdownWidget:
236
- me = self.api.user.get_my_info().login
237
- current_date = datetime.datetime.now().strftime("%d %B %Y, %H:%M")
238
- header_main_text = " ∣ ".join( # vs. or | or ∣
239
- eval_res.name for eval_res in self.comparison.evaluation_results
240
- )
241
- header_text = self.vis_texts.markdown_header.format(header_main_text, me, current_date)
242
- header = MarkdownWidget("markdown_header", "Header", text=header_text)
243
- return header
244
-
245
- def _create_overviews(self, vm: Overview) -> ContainerWidget:
246
- grid_cols = 2
247
- if len(vm.overview_widgets) > 2:
248
- grid_cols = 3
249
- if len(vm.overview_widgets) % 4 == 0:
250
- grid_cols = 4
251
- return ContainerWidget(
252
- vm.overview_widgets,
253
- name="overview_container",
254
- title="Overview",
255
- grid=True,
256
- grid_cols=grid_cols,
257
- )
258
-
259
233
  def _create_key_metrics(self) -> MarkdownWidget:
260
234
  key_metrics_text = self.vis_texts.markdown_key_metrics.format(
261
235
  self.vis_texts.definitions.average_precision,
@@ -277,22 +251,3 @@ class DetectionComparisonVisualizer:
277
251
  return MarkdownWidget(
278
252
  "markdown_outcome_counts_diff", "Outcome Counts Differences", text=outcome_counts_text
279
253
  )
280
-
281
- def _create_explore_modal_table(self, columns_number=3):
282
- # TODO: table for each evaluation?
283
- all_predictions_modal_gallery = GalleryWidget(
284
- "all_predictions_modal_gallery", is_modal=True, columns_number=columns_number
285
- )
286
- all_predictions_modal_gallery.set_project_meta(
287
- self.comparison.evaluation_results[0].dt_project_meta
288
- )
289
- return all_predictions_modal_gallery
290
-
291
- def _create_diff_modal_table(self, columns_number=3) -> GalleryWidget:
292
- diff_modal_gallery = GalleryWidget(
293
- "diff_predictions_modal_gallery", is_modal=True, columns_number=columns_number
294
- )
295
- return diff_modal_gallery
296
-
297
- def _create_clickable_label(self):
298
- return MarkdownWidget("clickable_label", "", text=self.vis_texts.clickable_label)
@@ -1,16 +1,35 @@
1
1
  import random
2
2
  from pathlib import Path
3
- from typing import List, Optional
3
+ from typing import List, Optional, Union
4
4
 
5
5
  from supervisely.api.api import Api
6
6
  from supervisely.app.widgets import SlyTqdm
7
7
  from supervisely.imaging.color import get_predefined_colors, rgb2hex
8
+ from supervisely.io import env
9
+ from supervisely.io.fs import dir_empty, mkdir
10
+ from supervisely.io.json import load_json_file
8
11
  from supervisely.nn.benchmark.comparison.detection_visualization.visualizer import (
9
12
  DetectionComparisonVisualizer,
10
13
  )
11
- from supervisely.nn.benchmark.visualization.evaluation_result import EvalResult
14
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.visualizer import (
15
+ SemanticSegmentationComparisonVisualizer,
16
+ )
17
+ from supervisely.nn.benchmark.cv_tasks import CVTask
18
+ from supervisely.nn.benchmark.object_detection.evaluator import (
19
+ ObjectDetectionEvalResult,
20
+ )
21
+ from supervisely.nn.benchmark.semantic_segmentation.evaluator import (
22
+ SemanticSegmentationEvalResult,
23
+ )
24
+ from supervisely.nn.task_type import TaskType
25
+ from supervisely.sly_logger import logger
12
26
  from supervisely.task.progress import tqdm_sly
13
27
 
28
+ ComparisonVisualizer = Union[
29
+ DetectionComparisonVisualizer, SemanticSegmentationComparisonVisualizer
30
+ ]
31
+ ComparisonEvalResult = Union[ObjectDetectionEvalResult, SemanticSegmentationEvalResult]
32
+
14
33
 
15
34
  class ModelComparison:
16
35
 
@@ -20,25 +39,36 @@ class ModelComparison:
20
39
  remote_eval_dirs: List[str],
21
40
  progress: Optional[SlyTqdm] = None,
22
41
  workdir: Optional[str] = "./benchmark/comparison",
42
+ cv_task: Optional[TaskType] = None,
43
+ team_id: Optional[int] = None,
23
44
  ):
24
45
  self.api = api
25
46
  self.progress = progress or tqdm_sly
26
47
  self.workdir = workdir
27
48
  self.remote_eval_dirs = remote_eval_dirs
28
- self.evaluation_results: List[EvalResult] = []
49
+ self.eval_results: List[ComparisonEvalResult] = []
50
+ self.task_type = cv_task
51
+ self.team_id = team_id or env.team_id()
52
+
53
+ eval_cls = SemanticSegmentationEvalResult
54
+ eval_cls = ObjectDetectionEvalResult
29
55
 
30
56
  colors = get_predefined_colors(len(remote_eval_dirs) * 5) # for better visualizations
31
57
  random.shuffle(colors)
32
58
  for i, eval_dir in enumerate(remote_eval_dirs):
33
- local_path = str(Path(self.workdir, "eval_data"))
34
- eval_result = EvalResult(eval_dir, local_path, self.api, self.progress)
35
- self.evaluation_results.append(eval_result)
59
+ local_path = Path(self.workdir) / "eval_data" / Path(eval_dir).name
60
+ self._load_eval_data(eval_dir, str(local_path))
61
+
62
+ eval_cls = self._get_eval_cls(str(local_path))
63
+ eval_result = eval_cls(local_path / "evaluation")
64
+ eval_result.report_path = Path(eval_dir, "visualizations", "template.vue").as_posix()
36
65
  eval_result.color = rgb2hex(colors[i])
37
66
 
38
- self.task_type = self.evaluation_results[0].inference_info.get("task_type")
67
+ self.eval_results.append(eval_result)
68
+
39
69
  self._validate_eval_data()
40
70
 
41
- self.visualizer: DetectionComparisonVisualizer = None
71
+ self.visualizer: ComparisonVisualizer = None
42
72
  self.remote_dir = None
43
73
 
44
74
  def _validate_eval_data(self):
@@ -49,28 +79,41 @@ class ModelComparison:
49
79
  task_type = None
50
80
  img_names = None
51
81
  cat_names = None
52
- for eval_result in self.evaluation_results:
82
+ for eval_result in self.eval_results:
53
83
  next_task_type = eval_result.cv_task
54
84
  if not task_type is None:
55
85
  assert task_type == next_task_type, "Task types are different in the evaluations."
56
86
  task_type = next_task_type
57
- next_img_names = set(
58
- [img.get("file_name") for img in eval_result.coco_gt.imgs.values()]
59
- )
87
+ if task_type == TaskType.SEMANTIC_SEGMENTATION:
88
+ next_img_names = set(eval_result.mp.per_image_metrics.index)
89
+ else:
90
+ next_img_names = set(
91
+ [img.get("file_name") for img in eval_result.coco_gt.imgs.values()]
92
+ )
60
93
  if not img_names is None:
61
94
  assert img_names == next_img_names, "Images are different in the evaluations."
62
95
  img_names = next_img_names
63
- next_cat_names = set([cat.get("name") for cat in eval_result.coco_gt.cats.values()])
96
+ if task_type == TaskType.SEMANTIC_SEGMENTATION:
97
+ next_cat_names = set(eval_result.mp.class_names)
98
+ else:
99
+ next_cat_names = set(eval_result.mp.cat_names)
64
100
  if not cat_names is None:
65
101
  assert cat_names == next_cat_names, "Categories are different in the evaluations."
66
102
  cat_names = next_cat_names
67
103
 
68
- def get_metrics(self):
69
- pass
70
-
71
104
  def visualize(self):
105
+ task_type = self.eval_results[0].cv_task
106
+ if task_type in [
107
+ TaskType.OBJECT_DETECTION,
108
+ TaskType.INSTANCE_SEGMENTATION,
109
+ ]:
110
+ vis_cls = DetectionComparisonVisualizer
111
+ elif task_type == TaskType.SEMANTIC_SEGMENTATION:
112
+ vis_cls = SemanticSegmentationComparisonVisualizer
113
+ else:
114
+ raise ValueError(f"Unsupported task type: {task_type}")
72
115
  if self.visualizer is None:
73
- self.visualizer = DetectionComparisonVisualizer(self)
116
+ self.visualizer = vis_cls(self)
74
117
  self.visualizer.visualize()
75
118
 
76
119
  def upload_results(self, team_id: int, remote_dir: str, progress=None) -> str:
@@ -80,5 +123,55 @@ class ModelComparison:
80
123
  def get_report_link(self) -> str:
81
124
  if self.remote_dir is None:
82
125
  raise ValueError("Results are not uploaded yet.")
83
- report_link = self.remote_dir.rstrip("/") + "/template.vue"
84
- return report_link
126
+ return self.visualizer.renderer._get_report_link(self.api, self.team_id, self.remote_dir)
127
+
128
+ @property
129
+ def report(self):
130
+ return self.visualizer.renderer.report
131
+
132
+ @property
133
+ def lnk(self):
134
+ return self.visualizer.renderer.lnk
135
+
136
+ def _load_eval_data(self, src_path: str, dst_path: str) -> None:
137
+ dir_name = Path(src_path).name
138
+ if not dir_empty(dst_path):
139
+ logger.info(f"Directory {dst_path} is not empty. Skipping download.")
140
+ return
141
+ if not self.api.storage.dir_exists(self.team_id, src_path):
142
+ raise ValueError(f"Directory {src_path} not found in storage.")
143
+ mkdir(dst_path)
144
+ with self.progress(
145
+ message=f"Downloading evaluation data from {dir_name}",
146
+ total=self.api.storage.get_directory_size(self.team_id, src_path),
147
+ unit="B",
148
+ unit_scale=True,
149
+ unit_divisor=1024,
150
+ ) as pbar:
151
+ self.api.storage.download_directory(
152
+ self.team_id, src_path, dst_path, progress_cb=pbar.update
153
+ )
154
+
155
+ def _get_cv_task(self, eval_dir: str) -> CVTask:
156
+ try:
157
+ eval_data = load_json_file(Path(eval_dir, "evaluation", "inference_info.json"))
158
+ task_type = eval_data.get("task_type")
159
+ return CVTask(task_type.replace(" ", "_").lower())
160
+ except Exception as e:
161
+ raise ValueError(
162
+ f"Could not get CV task from `inference_info.json`, try to set it manually. {e}"
163
+ )
164
+
165
+ def _get_eval_cls(self, eval_dir: str) -> ComparisonEvalResult:
166
+ if self.task_type is None:
167
+ self.task_type = self._get_cv_task(eval_dir)
168
+ if self.task_type in [
169
+ CVTask.OBJECT_DETECTION,
170
+ CVTask.INSTANCE_SEGMENTATION,
171
+ ]:
172
+ eval_cls = ObjectDetectionEvalResult
173
+ elif self.task_type == CVTask.SEMANTIC_SEGMENTATION:
174
+ eval_cls = SemanticSegmentationEvalResult
175
+ else:
176
+ raise ValueError(f"Unsupported task type: {self.task_type}")
177
+ return eval_cls
@@ -0,0 +1,128 @@
1
+ docs_url = (
2
+ "https://docs.supervisely.com/neural-networks/model-evaluation-benchmark/semantic-segmentation"
3
+ )
4
+
5
+ # <i class="zmdi zmdi-check-circle" style="color: #13ce66; margin-right: 5px"></i>
6
+ clickable_label = """
7
+ > <span style="color: #5a6772">
8
+ > Click on the chart to explore corresponding images.
9
+ > </span>
10
+ """
11
+
12
+ markdown_header = """
13
+ <h1>{}</h1>
14
+
15
+ <div class="model-info-block">
16
+ <div>Created by <b>{}</b></div>
17
+ <div><i class="zmdi zmdi-calendar-alt"></i><span>{}</span></div>
18
+ </div>
19
+ """
20
+
21
+ markdown_common_overview = """
22
+ - **Models**: {}
23
+ - **Evaluation Dataset**: <a href="/projects/{}/datasets" target="_blank">{}</a>
24
+ - **Task type**: {}
25
+ """
26
+
27
+ markdown_overview_info = """
28
+ <h3>{}</h3>
29
+ - **Model**: {}
30
+ - **Checkpoint**: {}
31
+ - **Architecture**: {}
32
+ - **Runtime**: {}
33
+ - **Checkpoint file**: <a class="checkpoint-url" href="{}" target="_blank">{}</a>
34
+ - **Evaluation Report**: <a href="{}" target="_blank">View Report</a>
35
+
36
+ """
37
+
38
+ markdown_key_metrics = """## Key Metrics
39
+
40
+ We provide a comprehensive analysis of models' performance using a set of metrics, including both basic (precision, recall, F1-score, IoU, etc.) and advanced (boundary IoU, error over union decomposition, etc.) metrics.
41
+
42
+ - **Pixel accuracy**: reflects the percent of image pixels which were correctly classified.
43
+ - **Precision**: reflects the number of correctly predicted positive segmentations divided by the total number of predicted positive segmentations.
44
+ - **Recall**: reflects the number of correctly predicted positive segmentations divided by the number of all samples that should have been segmented as positive.
45
+ - **F1-score**: reflects the tradeoff between precision and recall. It is equivalent to the Dice coefficient and calculated as a harmonic mean of precision and recall.
46
+ - **Intersection over union (IoU, also known as the Jaccard index)**: measures the overlap between ground truth mask and predicted mask. It is calculated as the ratio of the intersection of the two masks areas to their combined areas.
47
+ - **Boundary intersection over union**: a segmentation consistency measure that first computes the sets of ground truth and predicted masks pixels that are located within the distance d from each contour and then computes intersection over union of these two sets. Pixel distance parameter d (pixel width of the boundary region) controls the sensitivity of the metric, it is usually set as 2% of the image diagonal for normal resolution images and 0.5% of the image diagonal for high resolution images.
48
+ - **Error over union and its components (boundary, extent, segment)**: a metric opposite to intersection over union and can be interpreted as what the model lacked in order to show the perfect performance with IoU = 1. It reflects the ratio of incorrectly segmented pixels of ground truth and predicted masks to their combined areas. It is usually decomposed into boundary, extent and segment errors over union in order to get exhaustive information about the model's strengths and weaknesses.
49
+ - **Renormalized error over union**: postprocessed variant of error over union which takes into consideration cause and effect relationships between different types of segmentation errors.
50
+ """
51
+
52
+ markdown_explorer = """## Explore Predictions
53
+ This section contains visual comparison of predictions made by different models and ground truth annotations. Sometimes a simple visualization can be more informative than any performance metric.
54
+
55
+ > Click on the image to view the **Original Image** with **Ground Truth** and **Predictions** annotations side-by-side.
56
+ """
57
+
58
+ markdown_explore_difference = """## Explore Predictions
59
+
60
+ In this section, you can explore predictions made by different models side-by-side. This helps you to understand the differences in predictions made by each model, and to identify which model performs better in different scenarios.
61
+
62
+
63
+ > Click on the image to view the **Ground Truth**, and **Prediction** annotations side-by-side.
64
+ """
65
+
66
+
67
+ ### Difference in Predictions
68
+
69
+ # markdown_explore_same_errors = """
70
+ # ### Same Errors
71
+
72
+ # This section helps you to identify samples where all models made the same errors. It is useful for understanding the limitations of the models and the common challenges they face.
73
+
74
+ # > Click on the image to view the **Ground Truth**, and **Prediction** annotations side-by-side.
75
+ # """
76
+
77
+
78
+ # """
79
+ markdown_iou = """## Intersection & Error Over Union
80
+
81
+ Pie charts below demonstrate performance metrics of each model in terms of Intersection over Union (IoU) and Error over Union (EoU). It is done with the help of Error over Union (EoU) decomposition into boundary, extent, and segment errors over union. These charts help to draw conclusions on the model's strongest and weakest sides.
82
+ """
83
+
84
+ markdown_renormalized_error_ou = """## Renormalized Error over Union
85
+
86
+ Charts below are dedicated to the decomposition of the post-processed variant of Error over Union, which takes into consideration cause and effect relationships between different types of segmentation errors. Error over Union decomposition has its own pitfalls. It is important to understand that models which tend to produce segment errors (when entire segments are mispredicted and there is no intersection between ground truth and predicted mask) will face fewer occasions to produce boundary and extent errors - as a result, boundary and extent error over union values will be underestimated.
87
+
88
+ In terms of localization, segment error is more fundamental than extent, while extent error is more fundamental than boundary. In order to overcome this problem, renormalized error over union proposes a slightly different calculation method - by removing more fundamental errors from the denominator - read more in our <a href="{}" target="_blank">technical report</a>
89
+ """.format(
90
+ docs_url
91
+ )
92
+
93
+ markdown_eou_per_class = """## Classwise Segmentation Error Analysis
94
+
95
+ This section contains information about classwise segmentation error decomposition. For each model, each column of the chart represents a certain class from the training dataset, demonstrating model performance in terms of segmenting this specific class on images and what model lacked in order to show the perfect performance.
96
+ """
97
+
98
+ markdown_frequently_confused_empty = """### Frequently Confused Classes
99
+
100
+ No frequently confused class pairs found
101
+ """
102
+
103
+ markdown_frequently_confused = """## Frequently Confused Classes
104
+
105
+ The bar chart below reveals pairs of classes which were most frequently confused for each model. Each column of the chart demonstrates the probability of confusion of a given pair of classes. It is necessary to remember that this probability is not symmetric: the probability of confusing class A with class B is not equal to the probability of confusing class B with class A.
106
+ """
107
+
108
+ empty = """### {}
109
+
110
+ > {}
111
+ """
112
+
113
+ markdown_speedtest_intro = """## Inference Speed
114
+
115
+ This is a speed test benchmark for compared models. Models were tested with the following configurations:
116
+ """
117
+
118
+ markdown_speedtest_overview_ms = """### Latency (Inference Time)
119
+ The table below shows the speed test results. For each test, the time taken to process one batch of images is shown. Results are averaged across **{}** iterations.
120
+ """
121
+
122
+ markdown_speedtest_overview_fps = """### Frames per Second (FPS)
123
+ The table below shows the speed test results. For each test, the number of frames processed per second is shown. Results are averaged across **{}** iterations.
124
+ """
125
+
126
+ markdown_batch_inference = """
127
+ This chart shows how the model's speed changes with different batch sizes . As the batch size increases, you can observe an increase in FPS (images per second).
128
+ """
@@ -0,0 +1,21 @@
1
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.classwise_error_analysis import (
2
+ ClasswiseErrorAnalysis,
3
+ )
4
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.explore_predictions import (
5
+ ExplorePredictions,
6
+ )
7
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.frequently_confused import (
8
+ FrequentlyConfused,
9
+ )
10
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.iou_eou import (
11
+ IntersectionErrorOverUnion,
12
+ )
13
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.overview import (
14
+ Overview,
15
+ )
16
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.renormalized_error_ou import (
17
+ RenormalizedErrorOverUnion,
18
+ )
19
+ from supervisely.nn.benchmark.comparison.semantic_segmentation.vis_metrics.speedtest import (
20
+ Speedtest,
21
+ )
@@ -0,0 +1,68 @@
1
+ from supervisely.nn.benchmark.base_visualizer import BaseVisMetrics
2
+ from supervisely.nn.benchmark.visualization.widgets import ChartWidget, MarkdownWidget
3
+
4
+
5
+ class ClasswiseErrorAnalysis(BaseVisMetrics):
6
+ def __init__(self, *args, **kwargs) -> None:
7
+ super().__init__(*args, **kwargs)
8
+ self.clickable = True
9
+
10
+ @property
11
+ def md(self) -> MarkdownWidget:
12
+ return MarkdownWidget(
13
+ "classwise_error_analysis",
14
+ "Classwise Segmentation Error Analysis",
15
+ text=self.vis_texts.markdown_eou_per_class,
16
+ )
17
+
18
+ @property
19
+ def chart(self) -> ChartWidget:
20
+ return ChartWidget("classwise_error_analysis", self.get_figure())
21
+
22
+ def get_figure(self):
23
+ import numpy as np
24
+ import plotly.graph_objects as go # pylint: disable=import-error
25
+
26
+ fig = go.Figure()
27
+
28
+ # Цветовая палитра для метрик
29
+ color_palette = ["#8ACAA1", "#FFE4B5", "#F7ADAA", "#dd3f3f"]
30
+ metrics = ["IoU", "E_extent_oU", "E_boundary_oU", "E_segment_oU"]
31
+
32
+ group_width = 0.7
33
+
34
+ for model_idx, eval_result in enumerate(self.eval_results):
35
+ bar_data, labels = eval_result.mp.classwise_segm_error_data
36
+ model_name = eval_result.name
37
+
38
+ for metric_idx, metric_name in enumerate(metrics):
39
+ # hover_customdata = [f"metric: {metric_name} for class '{l}' ({model_name})" for l in labels]
40
+ hover_customdata = [
41
+ f"class: {l}<br>model: {model_name}<br>{metric_name}" for l in labels
42
+ ]
43
+ fig.add_trace(
44
+ go.Bar(
45
+ name=metric_name,
46
+ x=np.arange(len(labels)) + model_idx * group_width * 0.3,
47
+ y=bar_data[metric_name],
48
+ customdata=hover_customdata,
49
+ hovertemplate="%{customdata}: %{y:.2f}<extra></extra>",
50
+ marker=dict(color=color_palette[metric_idx]),
51
+ width=group_width / len(metrics),
52
+ offsetgroup=model_idx,
53
+ base=bar_data[metrics[:metric_idx]].sum(axis=1) if metric_idx > 0 else None,
54
+ )
55
+ )
56
+
57
+ fig.update_layout(
58
+ showlegend=False,
59
+ barmode="stack",
60
+ xaxis=dict(
61
+ title="Classes",
62
+ tickvals=np.arange(len(labels)) + (len(self.eval_results) - 1 - group_width) / 4,
63
+ ticktext=labels,
64
+ ),
65
+ width=800 if len(labels) < 10 else 1000,
66
+ )
67
+
68
+ return fig