supervisely 6.73.238__py3-none-any.whl → 6.73.240__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. supervisely/annotation/annotation.py +2 -2
  2. supervisely/api/entity_annotation/tag_api.py +11 -4
  3. supervisely/api/file_api.py +17 -3
  4. supervisely/nn/__init__.py +1 -0
  5. supervisely/nn/benchmark/__init__.py +14 -2
  6. supervisely/nn/benchmark/base_benchmark.py +84 -37
  7. supervisely/nn/benchmark/base_evaluator.py +120 -0
  8. supervisely/nn/benchmark/base_visualizer.py +265 -0
  9. supervisely/nn/benchmark/comparison/detection_visualization/text_templates.py +5 -5
  10. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +2 -2
  11. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/explore_predicttions.py +39 -16
  12. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/localization_accuracy.py +1 -1
  13. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +4 -4
  14. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +12 -11
  15. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +1 -1
  16. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +6 -6
  17. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/speedtest.py +3 -3
  18. supervisely/nn/benchmark/{instance_segmentation_benchmark.py → instance_segmentation/benchmark.py} +9 -3
  19. supervisely/nn/benchmark/instance_segmentation/evaluator.py +58 -0
  20. supervisely/nn/benchmark/{visualization/text_templates/instance_segmentation_text.py → instance_segmentation/text_templates.py} +53 -69
  21. supervisely/nn/benchmark/instance_segmentation/visualizer.py +18 -0
  22. supervisely/nn/benchmark/object_detection/__init__.py +0 -0
  23. supervisely/nn/benchmark/object_detection/base_vis_metric.py +51 -0
  24. supervisely/nn/benchmark/{object_detection_benchmark.py → object_detection/benchmark.py} +4 -2
  25. supervisely/nn/benchmark/object_detection/evaluation_params.yaml +2 -0
  26. supervisely/nn/benchmark/{evaluation/object_detection_evaluator.py → object_detection/evaluator.py} +67 -9
  27. supervisely/nn/benchmark/{evaluation/coco → object_detection}/metric_provider.py +13 -14
  28. supervisely/nn/benchmark/{visualization/text_templates/object_detection_text.py → object_detection/text_templates.py} +49 -41
  29. supervisely/nn/benchmark/object_detection/vis_metrics/__init__.py +48 -0
  30. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/confidence_distribution.py +20 -24
  31. supervisely/nn/benchmark/object_detection/vis_metrics/confidence_score.py +119 -0
  32. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/confusion_matrix.py +34 -22
  33. supervisely/nn/benchmark/object_detection/vis_metrics/explore_predictions.py +129 -0
  34. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/f1_score_at_different_iou.py +21 -26
  35. supervisely/nn/benchmark/object_detection/vis_metrics/frequently_confused.py +137 -0
  36. supervisely/nn/benchmark/object_detection/vis_metrics/iou_distribution.py +106 -0
  37. supervisely/nn/benchmark/object_detection/vis_metrics/key_metrics.py +136 -0
  38. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/model_predictions.py +53 -49
  39. supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts.py +188 -0
  40. supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts_per_class.py +191 -0
  41. supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +116 -0
  42. supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve.py +106 -0
  43. supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve_by_class.py +49 -0
  44. supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +72 -0
  45. supervisely/nn/benchmark/object_detection/vis_metrics/precision_avg_per_class.py +59 -0
  46. supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +71 -0
  47. supervisely/nn/benchmark/object_detection/vis_metrics/recall_vs_precision.py +56 -0
  48. supervisely/nn/benchmark/object_detection/vis_metrics/reliability_diagram.py +110 -0
  49. supervisely/nn/benchmark/object_detection/vis_metrics/speedtest.py +151 -0
  50. supervisely/nn/benchmark/object_detection/visualizer.py +697 -0
  51. supervisely/nn/benchmark/semantic_segmentation/__init__.py +9 -0
  52. supervisely/nn/benchmark/semantic_segmentation/base_vis_metric.py +55 -0
  53. supervisely/nn/benchmark/semantic_segmentation/benchmark.py +32 -0
  54. supervisely/nn/benchmark/semantic_segmentation/evaluation_params.yaml +0 -0
  55. supervisely/nn/benchmark/semantic_segmentation/evaluator.py +162 -0
  56. supervisely/nn/benchmark/semantic_segmentation/metric_provider.py +153 -0
  57. supervisely/nn/benchmark/semantic_segmentation/text_templates.py +130 -0
  58. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/__init__.py +0 -0
  59. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/acknowledgement.py +15 -0
  60. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/classwise_error_analysis.py +57 -0
  61. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/confusion_matrix.py +92 -0
  62. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/explore_predictions.py +84 -0
  63. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/frequently_confused.py +101 -0
  64. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/iou_eou.py +45 -0
  65. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/key_metrics.py +60 -0
  66. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/model_predictions.py +107 -0
  67. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/overview.py +112 -0
  68. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/renormalized_error_ou.py +48 -0
  69. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/speedtest.py +178 -0
  70. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/vis_texts.py +21 -0
  71. supervisely/nn/benchmark/semantic_segmentation/visualizer.py +304 -0
  72. supervisely/nn/benchmark/utils/__init__.py +12 -0
  73. supervisely/nn/benchmark/utils/detection/__init__.py +2 -0
  74. supervisely/nn/benchmark/{evaluation/coco → utils/detection}/calculate_metrics.py +6 -4
  75. supervisely/nn/benchmark/utils/detection/metric_provider.py +533 -0
  76. supervisely/nn/benchmark/{coco_utils → utils/detection}/sly2coco.py +4 -4
  77. supervisely/nn/benchmark/{coco_utils/utils.py → utils/detection/utlis.py} +11 -0
  78. supervisely/nn/benchmark/utils/semantic_segmentation/__init__.py +0 -0
  79. supervisely/nn/benchmark/utils/semantic_segmentation/calculate_metrics.py +35 -0
  80. supervisely/nn/benchmark/utils/semantic_segmentation/evaluator.py +804 -0
  81. supervisely/nn/benchmark/utils/semantic_segmentation/loader.py +65 -0
  82. supervisely/nn/benchmark/utils/semantic_segmentation/utils.py +109 -0
  83. supervisely/nn/benchmark/visualization/evaluation_result.py +17 -3
  84. supervisely/nn/benchmark/visualization/vis_click_data.py +1 -1
  85. supervisely/nn/benchmark/visualization/widgets/__init__.py +3 -0
  86. supervisely/nn/benchmark/visualization/widgets/chart/chart.py +12 -4
  87. supervisely/nn/benchmark/visualization/widgets/gallery/gallery.py +35 -8
  88. supervisely/nn/benchmark/visualization/widgets/gallery/template.html +8 -4
  89. supervisely/nn/benchmark/visualization/widgets/markdown/markdown.py +1 -1
  90. supervisely/nn/benchmark/visualization/widgets/notification/notification.py +11 -7
  91. supervisely/nn/benchmark/visualization/widgets/radio_group/__init__.py +0 -0
  92. supervisely/nn/benchmark/visualization/widgets/radio_group/radio_group.py +34 -0
  93. supervisely/nn/benchmark/visualization/widgets/table/table.py +9 -3
  94. supervisely/nn/benchmark/visualization/widgets/widget.py +4 -0
  95. supervisely/project/project.py +18 -6
  96. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/METADATA +3 -1
  97. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/RECORD +104 -82
  98. supervisely/nn/benchmark/coco_utils/__init__.py +0 -2
  99. supervisely/nn/benchmark/evaluation/__init__.py +0 -3
  100. supervisely/nn/benchmark/evaluation/base_evaluator.py +0 -64
  101. supervisely/nn/benchmark/evaluation/coco/__init__.py +0 -2
  102. supervisely/nn/benchmark/evaluation/instance_segmentation_evaluator.py +0 -88
  103. supervisely/nn/benchmark/utils.py +0 -13
  104. supervisely/nn/benchmark/visualization/inference_speed/__init__.py +0 -19
  105. supervisely/nn/benchmark/visualization/inference_speed/speedtest_batch.py +0 -161
  106. supervisely/nn/benchmark/visualization/inference_speed/speedtest_intro.py +0 -28
  107. supervisely/nn/benchmark/visualization/inference_speed/speedtest_overview.py +0 -141
  108. supervisely/nn/benchmark/visualization/inference_speed/speedtest_real_time.py +0 -63
  109. supervisely/nn/benchmark/visualization/text_templates/inference_speed_text.py +0 -23
  110. supervisely/nn/benchmark/visualization/vis_metric_base.py +0 -337
  111. supervisely/nn/benchmark/visualization/vis_metrics/__init__.py +0 -67
  112. supervisely/nn/benchmark/visualization/vis_metrics/classwise_error_analysis.py +0 -55
  113. supervisely/nn/benchmark/visualization/vis_metrics/confidence_score.py +0 -93
  114. supervisely/nn/benchmark/visualization/vis_metrics/explorer_grid.py +0 -144
  115. supervisely/nn/benchmark/visualization/vis_metrics/frequently_confused.py +0 -115
  116. supervisely/nn/benchmark/visualization/vis_metrics/iou_distribution.py +0 -86
  117. supervisely/nn/benchmark/visualization/vis_metrics/outcome_counts.py +0 -119
  118. supervisely/nn/benchmark/visualization/vis_metrics/outcome_counts_per_class.py +0 -148
  119. supervisely/nn/benchmark/visualization/vis_metrics/overall_error_analysis.py +0 -109
  120. supervisely/nn/benchmark/visualization/vis_metrics/overview.py +0 -189
  121. supervisely/nn/benchmark/visualization/vis_metrics/percision_avg_per_class.py +0 -57
  122. supervisely/nn/benchmark/visualization/vis_metrics/pr_curve.py +0 -101
  123. supervisely/nn/benchmark/visualization/vis_metrics/pr_curve_by_class.py +0 -46
  124. supervisely/nn/benchmark/visualization/vis_metrics/precision.py +0 -56
  125. supervisely/nn/benchmark/visualization/vis_metrics/recall.py +0 -54
  126. supervisely/nn/benchmark/visualization/vis_metrics/recall_vs_precision.py +0 -57
  127. supervisely/nn/benchmark/visualization/vis_metrics/reliability_diagram.py +0 -88
  128. supervisely/nn/benchmark/visualization/vis_metrics/what_is.py +0 -23
  129. supervisely/nn/benchmark/visualization/vis_templates.py +0 -241
  130. supervisely/nn/benchmark/visualization/vis_widgets.py +0 -128
  131. supervisely/nn/benchmark/visualization/visualizer.py +0 -729
  132. /supervisely/nn/benchmark/{visualization/text_templates → instance_segmentation}/__init__.py +0 -0
  133. /supervisely/nn/benchmark/{evaluation/coco → instance_segmentation}/evaluation_params.yaml +0 -0
  134. /supervisely/nn/benchmark/{evaluation/coco → utils/detection}/metrics.py +0 -0
  135. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/LICENSE +0 -0
  136. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/WHEEL +0 -0
  137. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/entry_points.txt +0 -0
  138. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,265 @@
1
+ from typing import List, Tuple
2
+
3
+ from supervisely.annotation.annotation import Annotation
4
+ from supervisely.api.api import Api
5
+ from supervisely.api.image_api import ImageInfo
6
+ from supervisely.api.module_api import ApiField
7
+ from supervisely.api.project_api import ProjectInfo
8
+ from supervisely.nn.benchmark.base_evaluator import BaseEvalResult
9
+ from supervisely.nn.benchmark.cv_tasks import CVTask
10
+ from supervisely.nn.benchmark.visualization.renderer import Renderer
11
+ from supervisely.nn.benchmark.visualization.widgets import GalleryWidget
12
+ from supervisely.project.project_meta import ProjectMeta
13
+ from supervisely.task.progress import tqdm_sly
14
+
15
+
16
+ class MatchedPairData:
17
+ def __init__(
18
+ self,
19
+ gt_image_info: ImageInfo = None,
20
+ pred_image_info: ImageInfo = None,
21
+ diff_image_info: ImageInfo = None,
22
+ gt_annotation: Annotation = None,
23
+ pred_annotation: Annotation = None,
24
+ diff_annotation: Annotation = None,
25
+ ):
26
+ self.gt_image_info = gt_image_info
27
+ self.pred_image_info = pred_image_info
28
+ self.diff_image_info = diff_image_info
29
+ self.gt_annotation = gt_annotation
30
+ self.pred_annotation = pred_annotation
31
+ self.diff_annotation = diff_annotation
32
+
33
+
34
+ class BaseVisMetrics:
35
+
36
+ def __init__(
37
+ self,
38
+ vis_texts,
39
+ eval_results: List[BaseEvalResult],
40
+ explore_modal_table: GalleryWidget = None,
41
+ diff_modal_table: GalleryWidget = None,
42
+ ) -> None:
43
+ self.vis_texts = vis_texts
44
+ self.eval_results = eval_results
45
+ self.explore_modal_table = explore_modal_table
46
+ self.diff_modal_table = diff_modal_table
47
+ self.clickable = False
48
+
49
+
50
+ class BaseVisMetric(BaseVisMetrics):
51
+ def __init__(
52
+ self,
53
+ vis_texts,
54
+ eval_result: BaseEvalResult,
55
+ explore_modal_table: GalleryWidget = None,
56
+ diff_modal_table: GalleryWidget = None,
57
+ ) -> None:
58
+ super().__init__(vis_texts, [eval_result], explore_modal_table, diff_modal_table)
59
+ self.eval_result = eval_result
60
+
61
+
62
+ class BaseVisualizer:
63
+ cv_task = None
64
+
65
+ def __init__(
66
+ self,
67
+ api: Api,
68
+ eval_results: List[BaseEvalResult],
69
+ workdir="./visualizations",
70
+ progress=None,
71
+ ):
72
+ self.api = api
73
+ self.workdir = workdir
74
+ self.eval_result = eval_results[0] # for evaluation
75
+ self.eval_results = eval_results # for comparison
76
+
77
+ self.renderer = None
78
+ self.gt_project_info = None
79
+ self.gt_project_meta = None
80
+ self.gt_dataset_infos = None
81
+ self.pbar = progress or tqdm_sly
82
+ self.ann_opacity = 0.4
83
+
84
+ with self.pbar(message="Fetching project and dataset infos", total=len(eval_results)) as p:
85
+ for eval_result in self.eval_results:
86
+ self._get_eval_project_infos(eval_result)
87
+ p.update(1)
88
+
89
+ def _get_eval_project_infos(self, eval_result):
90
+ # get project infos
91
+ if self.gt_project_info is None:
92
+ self.gt_project_info = self.api.project.get_info_by_id(eval_result.gt_project_id)
93
+ eval_result.gt_project_info = self.gt_project_info
94
+ eval_result.pred_project_info = self.api.project.get_info_by_id(eval_result.pred_project_id)
95
+
96
+ # get project metas
97
+ if self.gt_project_meta is None:
98
+ self.gt_project_meta = ProjectMeta.from_json(
99
+ self.api.project.get_meta(eval_result.gt_project_id)
100
+ )
101
+ eval_result.gt_project_meta = self.gt_project_meta
102
+ eval_result.pred_project_meta = ProjectMeta.from_json(
103
+ self.api.project.get_meta(eval_result.pred_project_id)
104
+ )
105
+
106
+ # get dataset infos
107
+ filters = None
108
+ if eval_result.gt_dataset_ids is not None:
109
+ filters = [
110
+ {
111
+ ApiField.FIELD: ApiField.ID,
112
+ ApiField.OPERATOR: "in",
113
+ ApiField.VALUE: eval_result.gt_dataset_ids,
114
+ }
115
+ ]
116
+ if self.gt_dataset_infos is None:
117
+ self.gt_dataset_infos = self.api.dataset.get_list(
118
+ eval_result.gt_project_id,
119
+ filters=filters,
120
+ recursive=True,
121
+ )
122
+ eval_result.gt_dataset_infos = self.gt_dataset_infos
123
+ filters = [
124
+ {
125
+ ApiField.FIELD: ApiField.NAME,
126
+ ApiField.OPERATOR: "in",
127
+ ApiField.VALUE: [ds.name for ds in self.gt_dataset_infos],
128
+ }
129
+ ]
130
+ eval_result.pred_dataset_infos = self.api.dataset.get_list(
131
+ eval_result.pred_project_id, filters=filters, recursive=True
132
+ )
133
+
134
+ # get train task info
135
+ train_info = eval_result.train_info
136
+ if train_info:
137
+ train_task_id = train_info.get("app_session_id")
138
+ if train_task_id:
139
+ eval_result.task_info = self.api.task.get_info_by_id(int(train_task_id))
140
+
141
+ def visualize(self):
142
+ if self.renderer is None:
143
+ layout = self._create_layout()
144
+ self.renderer = Renderer(layout, self.workdir)
145
+ return self.renderer.visualize()
146
+
147
+ def upload_results(self, team_id: int, remote_dir: str, progress=None):
148
+ if self.renderer is None:
149
+ raise RuntimeError("Visualize first")
150
+ return self.renderer.upload_results(self.api, team_id, remote_dir, progress)
151
+
152
+ def _create_layout(self):
153
+ raise NotImplementedError("Implement this method in a subclass")
154
+
155
+ def _get_or_create_diff_project(self) -> Tuple[ProjectInfo, List, bool]:
156
+ """
157
+ Get or create a project for diff visualizations.
158
+ Dataset hierarchy is copied from the prediction project.
159
+ """
160
+
161
+ pred_ds_id_to_diff_ds_info = {}
162
+ diff_ds_infos = []
163
+
164
+ def _get_or_create_diff_dataset(pred_dataset_id, pred_datasets):
165
+ if pred_dataset_id in pred_ds_id_to_diff_ds_info:
166
+ return pred_ds_id_to_diff_ds_info[pred_dataset_id]
167
+ pred_dataset = pred_datasets[pred_dataset_id]
168
+ if pred_dataset.parent_id is None:
169
+ diff_dataset = self.api.dataset.create(project_info.id, pred_dataset.name)
170
+ else:
171
+ parent_dataset = _get_or_create_diff_dataset(pred_dataset.parent_id, pred_datasets)
172
+ diff_dataset = self.api.dataset.create(
173
+ project_info.id,
174
+ pred_dataset.name,
175
+ parent_id=parent_dataset.id,
176
+ )
177
+ pred_ds_id_to_diff_ds_info[pred_dataset_id] = diff_dataset
178
+ diff_ds_infos.append(diff_dataset)
179
+ return diff_dataset
180
+
181
+ project_name = self._generate_diff_project_name(self.eval_result.pred_project_info.name)
182
+ workspace_id = self.eval_result.pred_project_info.workspace_id
183
+ project_info = self.api.project.get_info_by_name(
184
+ workspace_id, project_name, raise_error=False
185
+ )
186
+ is_existed = project_info is not None
187
+ if not is_existed:
188
+ project_info = self.api.project.create(
189
+ workspace_id, project_name, change_name_if_conflict=True
190
+ )
191
+ pred_datasets = {ds.id: ds for ds in self.eval_result.pred_dataset_infos}
192
+ for dataset in pred_datasets:
193
+ _get_or_create_diff_dataset(dataset, pred_datasets)
194
+ return project_info, diff_ds_infos, is_existed
195
+
196
+ def _generate_diff_project_name(self, pred_project_name):
197
+ return "[diff]: " + pred_project_name
198
+
199
+ def _create_explore_modal_table(
200
+ self, columns_number=3, click_gallery_id=None, hover_text=None
201
+ ) -> GalleryWidget:
202
+ gallery = GalleryWidget(
203
+ "all_predictions_modal_gallery",
204
+ is_modal=True,
205
+ columns_number=columns_number,
206
+ click_gallery_id=click_gallery_id,
207
+ opacity=self.ann_opacity,
208
+ )
209
+ gallery.set_project_meta(self.eval_results[0].filtered_project_meta)
210
+ if hover_text:
211
+ gallery.add_image_left_header(hover_text)
212
+ return gallery
213
+
214
+ def _create_diff_modal_table(self, columns_number=3) -> GalleryWidget:
215
+ gallery = GalleryWidget(
216
+ "diff_predictions_modal_gallery",
217
+ is_modal=True,
218
+ columns_number=columns_number,
219
+ opacity=self.ann_opacity,
220
+ )
221
+ gallery.set_project_meta(self.eval_results[0].filtered_project_meta)
222
+ return gallery
223
+
224
+ def _get_filtered_project_meta(self, eval_result) -> ProjectMeta:
225
+ remove_classes = []
226
+ meta = eval_result.pred_project_meta.clone()
227
+ if eval_result.classes_whitelist:
228
+ for obj_class in meta.obj_classes:
229
+ if obj_class.name not in eval_result.classes_whitelist:
230
+ remove_classes.append(obj_class.name)
231
+ if remove_classes:
232
+ meta = meta.delete_obj_classes(remove_classes)
233
+ return meta
234
+
235
+ def _update_match_data(
236
+ self,
237
+ gt_image_id: int,
238
+ gt_image_info: ImageInfo = None,
239
+ pred_image_info: ImageInfo = None,
240
+ diff_image_info: ImageInfo = None,
241
+ gt_annotation: Annotation = None,
242
+ pred_annotation: Annotation = None,
243
+ diff_annotation: Annotation = None,
244
+ ):
245
+ match_data = self.eval_result.matched_pair_data.get(gt_image_id, None)
246
+ if match_data is None:
247
+ self.eval_result.matched_pair_data[gt_image_id] = MatchedPairData(
248
+ gt_image_info=gt_image_info,
249
+ pred_image_info=pred_image_info,
250
+ diff_image_info=diff_image_info,
251
+ gt_annotation=gt_annotation,
252
+ pred_annotation=pred_annotation,
253
+ diff_annotation=diff_annotation,
254
+ )
255
+ else:
256
+ for attr, value in {
257
+ "gt_image_info": gt_image_info,
258
+ "pred_image_info": pred_image_info,
259
+ "diff_image_info": diff_image_info,
260
+ "gt_annotation": gt_annotation,
261
+ "pred_annotation": pred_annotation,
262
+ "diff_annotation": diff_annotation,
263
+ }.items():
264
+ if value is not None:
265
+ setattr(match_data, attr, value)
@@ -160,7 +160,7 @@ markdown_f1_per_class_title = """### F1-score by Class"""
160
160
 
161
161
  markdown_R = """## Recall
162
162
 
163
- This section measures the ability of the model to detect **all relevant instances in the dataset**. In other words, it answers the question: Of all instances in the dataset, how many of them is the model managed to find out?”
163
+ This section measures the ability of the model to detect **all relevant instances in the dataset**. In other words, it answers the question: "Of all instances in the dataset, how many of them is the model managed to find out?"
164
164
 
165
165
  To measure this, we calculate **Recall**. Recall counts errors, when the model does not detect an object that actually is present in a dataset and should be detected. Recall is calculated as the portion of correct predictions (true positives) over all instances in the dataset (true positives + false negatives).
166
166
  """
@@ -186,9 +186,9 @@ _Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unif
186
186
 
187
187
  markdown_P = """## Precision
188
188
 
189
- This section measures the accuracy of all predictions made by the model. In other words, it answers the question: Of all predictions made by the model, how many of them are actually correct?”.
189
+ This section measures the accuracy of all predictions made by the model. In other words, it answers the question: "Of all predictions made by the model, how many of them are actually correct?".
190
190
 
191
- To measure this, we calculate **Precision**. Precision counts errors, when the model predicts an object (bounding box), but the image has no objects of the predicted class in this place. Precision is calculated as a portion of correct predictions (true positives) over all models predictions (true positives + false positives).
191
+ To measure this, we calculate **Precision**. Precision counts errors, when the model predicts an object (bounding box), but the image has no objects of the predicted class in this place. Precision is calculated as a portion of correct predictions (true positives) over all model's predictions (true positives + false positives).
192
192
  """
193
193
 
194
194
  notification_precision = {
@@ -283,7 +283,7 @@ markdown_frequently_confused = (
283
283
 
284
284
  This chart displays the most frequently confused pairs of classes. In general, it finds out which classes visually seem very similar to the model.
285
285
 
286
- The chart calculates the **probability of confusion** between different pairs of classes. For instance, if the probability of confusion for the pair {} - {} is {}, this means that when the model predicts either {} or {}”, there is a {}% chance that the model might mistakenly predict one instead of the other.
286
+ The chart calculates the **probability of confusion** between different pairs of classes. For instance, if the probability of confusion for the pair "{} - {}" is {}, this means that when the model predicts either "{}" or "{}", there is a {}% chance that the model might mistakenly predict one instead of the other.
287
287
 
288
288
  The measure is class-symmetric, meaning that the probability of confusing a {} with a {} is equal to the probability of confusing a {} with a {}.
289
289
 
@@ -421,7 +421,7 @@ empty = """### {}
421
421
 
422
422
  markdown_speedtest_intro = """## Inference Speed
423
423
 
424
- This is a speed test benchmark for compared models. Models was tested with the following configurations:
424
+ This is a speed test benchmark for compared models. Models were tested with the following configurations:
425
425
  """
426
426
 
427
427
  markdown_speedtest_overview_ms = """### Latency (Inference Time)
@@ -139,7 +139,7 @@ class CalibrationScore(BaseVisMetric):
139
139
  x=eval_result.dfsp_down["scores"],
140
140
  y=eval_result.dfsp_down["f1"],
141
141
  mode="lines",
142
- name=f"[{i+1}] {eval_result.name}",
142
+ name=f"[{i+1}] {eval_result.model_name}",
143
143
  line=dict(color=eval_result.color),
144
144
  hovertemplate="Confidence Score: %{x:.2f}<br>Value: %{y:.2f}<extra></extra>",
145
145
  )
@@ -194,7 +194,7 @@ class CalibrationScore(BaseVisMetric):
194
194
  x=pred_probs,
195
195
  y=true_probs,
196
196
  mode="lines+markers",
197
- name=f"[{i+1}] {eval_result.name}",
197
+ name=f"[{i+1}] {eval_result.model_name}",
198
198
  line=dict(color=eval_result.color),
199
199
  hovertemplate=f"{eval_result.name}<br>"
200
200
  + "Confidence Score: %{x:.2f}<br>Fraction of True Positives: %{y:.2f}<extra></extra>",
@@ -2,6 +2,7 @@ from typing import List, Tuple
2
2
 
3
3
  from supervisely.annotation.annotation import Annotation
4
4
  from supervisely.api.image_api import ImageInfo
5
+ from supervisely.api.module_api import ApiField
5
6
  from supervisely.nn.benchmark.comparison.detection_visualization.vis_metrics.vis_metric import (
6
7
  BaseVisMetric,
7
8
  )
@@ -30,13 +31,10 @@ class ExplorePredictions(BaseVisMetric):
30
31
  self.GALLERY_DIFFERENCE, columns_number=columns_number, filters=default_filters
31
32
  )
32
33
  gallery.add_image_left_header("Click to explore more")
33
- gallery.show_all_button = True
34
34
  gallery.set_project_meta(self.eval_results[0].gt_project_meta)
35
35
  gallery.set_images(*data)
36
- gallery.add_on_click(
37
- self.explore_modal_table.id, self.get_click_data_explore_all(), columns_number * 3
38
- )
39
- gallery._gallery._filters
36
+ gallery.set_click_data(self.explore_modal_table.id, self.get_click_data_explore_all())
37
+ gallery.set_show_all_data(self.explore_modal_table.id, self.get_click_data_explore_all())
40
38
  gallery._gallery._update_filters()
41
39
 
42
40
  return gallery
@@ -48,18 +46,31 @@ class ExplorePredictions(BaseVisMetric):
48
46
  skip_tags_filtering = []
49
47
  api = self.eval_results[0].api
50
48
  min_conf = float("inf")
49
+ names = None
50
+ ds_name = None
51
51
  for idx, eval_res in enumerate(self.eval_results):
52
52
  if idx == 0:
53
- dataset_info = api.dataset.get_list(eval_res.gt_project_id)[0]
53
+ dataset_info = eval_res.gt_dataset_infos[0]
54
54
  image_infos = api.image.get_list(dataset_info.id, limit=5)
55
+ ds_name = dataset_info.name
55
56
  images_ids = [image_info.id for image_info in image_infos]
57
+ names = [image_info.name for image_info in image_infos]
56
58
  images.append(image_infos)
57
59
  anns = api.annotation.download_batch(dataset_info.id, images_ids)
58
60
  annotations.append(anns)
59
61
  skip_tags_filtering.append(True)
60
62
  metas.append(eval_res.dt_project_meta)
61
- dataset_info = api.dataset.get_list(eval_res.dt_project_id)[0]
62
- image_infos = eval_res.api.image.get_list(dataset_info.id, limit=5)
63
+ assert ds_name is not None, "Failed to get GT dataset name for gallery"
64
+
65
+ dataset_info = api.dataset.get_info_by_name(eval_res.dt_project_id, ds_name)
66
+
67
+ assert names is not None, "Failed to get GT image names for gallery"
68
+ image_infos = eval_res.api.image.get_list(
69
+ dataset_info.id,
70
+ filters=[
71
+ {ApiField.FIELD: ApiField.NAME, ApiField.OPERATOR: "in", ApiField.VALUE: names}
72
+ ],
73
+ )
63
74
  images_ids = [image_info.id for image_info in image_infos]
64
75
  images.append(image_infos)
65
76
  anns = eval_res.api.annotation.download_batch(dataset_info.id, images_ids)
@@ -78,8 +89,9 @@ class ExplorePredictions(BaseVisMetric):
78
89
  res["layoutTemplate"] = [None, None, None]
79
90
 
80
91
  res["layoutTemplate"] = [{"skipObjectTagsFiltering": True, "columnTitle": "Ground Truth"}]
81
- for i in range(len(self.eval_results)):
82
- res["layoutTemplate"].append({"columnTitle": f"Model {i + 1}"})
92
+ # for i in range(len(self.eval_results)):
93
+ for idx, eval_res in enumerate(self.eval_results, 1):
94
+ res["layoutTemplate"].append({"columnTitle": f"[{idx}] {eval_res.model_name}"})
83
95
 
84
96
  click_data = res.setdefault("clickData", {})
85
97
  explore = click_data.setdefault("explore", {})
@@ -88,21 +100,32 @@ class ExplorePredictions(BaseVisMetric):
88
100
  images_ids = []
89
101
  api = self.eval_results[0].api
90
102
  min_conf = float("inf")
103
+ names = None
104
+ ds_names = None
91
105
  for idx, eval_res in enumerate(self.eval_results):
92
106
  if idx == 0:
93
- dataset_infos = api.dataset.get_list(eval_res.gt_project_id)
107
+ dataset_infos = eval_res.gt_dataset_infos
108
+ ds_names = [ds.name for ds in dataset_infos]
94
109
  current_images_ids = []
110
+ current_images_names = []
95
111
  for ds in dataset_infos:
96
- image_infos = eval_res.api.image.get_list(ds.id)
112
+ image_infos = eval_res.api.image.get_list(ds.id, force_metadata_for_links=False)
113
+ image_infos = sorted(image_infos, key=lambda x: x.name)
114
+ current_images_names.extend([image_info.name for image_info in image_infos])
97
115
  current_images_ids.extend([image_info.id for image_info in image_infos])
98
116
  images_ids.append(current_images_ids)
117
+ names = current_images_names
99
118
 
100
- current_images_ids = []
101
119
  dataset_infos = api.dataset.get_list(eval_res.dt_project_id)
120
+ dataset_infos = [ds for ds in dataset_infos if ds.name in ds_names]
121
+ dataset_infos = sorted(dataset_infos, key=lambda x: ds_names.index(x.name))
122
+ current_images_infos = []
102
123
  for ds in dataset_infos:
103
- image_infos = eval_res.api.image.get_list(ds.id)
104
- current_images_ids.extend([image_info.id for image_info in image_infos])
105
- images_ids.append(current_images_ids)
124
+ image_infos = eval_res.api.image.get_list(ds.id, force_metadata_for_links=False)
125
+ image_infos = [image_info for image_info in image_infos if image_info.name in names]
126
+ current_images_infos.extend(image_infos)
127
+ current_images_infos = sorted(current_images_infos, key=lambda x: names.index(x.name))
128
+ images_ids.append([image_info.id for image_info in current_images_infos])
106
129
 
107
130
  min_conf = min(min_conf, eval_res.f1_optimal_conf)
108
131
 
@@ -90,7 +90,7 @@ class LocalizationAccuracyIoU(BaseVisMetric):
90
90
  bin_width = min([bin_edges[1] - bin_edges[0] for _, bin_edges in hist_data])
91
91
 
92
92
  for i, (eval_result, (hist, bin_edges)) in enumerate(zip(self.eval_results, hist_data)):
93
- name = f"[{i+1}] {eval_result.name}"
93
+ name = f"[{i+1}] {eval_result.model_name}"
94
94
  kde = gaussian_kde(eval_result.mp.ious)
95
95
  density = kde(x_range)
96
96
 
@@ -97,7 +97,7 @@ class OutcomeCounts(BaseVisMetric):
97
97
  tp_counts = [eval_result.mp.TP_count for eval_result in self.eval_results][::-1]
98
98
  fn_counts = [eval_result.mp.FN_count for eval_result in self.eval_results][::-1]
99
99
  fp_counts = [eval_result.mp.FP_count for eval_result in self.eval_results][::-1]
100
- model_names = [f"Model {idx}" for idx in range(1, len(self.eval_results) + 1)][::-1]
100
+ model_names = [f"[{i}] {e.model_name}" for i, e in enumerate(self.eval_results, 1)][::-1]
101
101
  counts = [tp_counts, fn_counts, fp_counts]
102
102
  names = ["TP", "FN", "FP"]
103
103
  colors = ["#8ACAA1", "#dd3f3f", "#F7ADAA"]
@@ -123,7 +123,7 @@ class OutcomeCounts(BaseVisMetric):
123
123
  fig = go.Figure()
124
124
 
125
125
  colors = ["#8ACAA1", "#dd3f3f", "#F7ADAA"]
126
- model_names = [f"Model {idx}" for idx in range(1, len(self.eval_results) + 1)][::-1]
126
+ model_names = [f"[{i}] {e.model_name}" for i, e in enumerate(self.eval_results, 1)][::-1]
127
127
  model_names.append("Common")
128
128
 
129
129
  diff_tps, common_tps = self.common_and_diff_tp
@@ -263,7 +263,7 @@ class OutcomeCounts(BaseVisMetric):
263
263
  res["layoutTemplate"] = [None, None, None]
264
264
  res["clickData"] = {}
265
265
  for i, eval_result in enumerate(self.eval_results, 1):
266
- model_name = f"Model {i}"
266
+ model_name = f"[{i}] {eval_result.model_name}"
267
267
  for outcome, matches_data in eval_result.click_data.outcome_counts.items():
268
268
  key = f"{model_name}_{outcome}"
269
269
  outcome_dict = res["clickData"].setdefault(key, {})
@@ -327,7 +327,7 @@ class OutcomeCounts(BaseVisMetric):
327
327
  _update_outcome_dict("Common", outcome, outcome_dict, common_ids)
328
328
 
329
329
  for i, diff_ids in enumerate(diff_ids, 1):
330
- name = f"Model {i}"
330
+ name = f"[{i}] {self.eval_results[i - 1].model_name}"
331
331
  key = f"{name}_{outcome}"
332
332
  outcome_dict = res["clickData"].setdefault(key, {})
333
333
 
@@ -1,10 +1,10 @@
1
1
  from typing import List
2
2
 
3
3
  from supervisely._utils import abs_url
4
- from supervisely.nn.benchmark.visualization.evaluation_result import EvalResult
5
4
  from supervisely.nn.benchmark.comparison.detection_visualization.vis_metrics.vis_metric import (
6
5
  BaseVisMetric,
7
6
  )
7
+ from supervisely.nn.benchmark.visualization.evaluation_result import EvalResult
8
8
  from supervisely.nn.benchmark.visualization.widgets import (
9
9
  ChartWidget,
10
10
  MarkdownWidget,
@@ -162,10 +162,11 @@ class Overview(BaseVisMetric):
162
162
  train_session, images_str = "", ""
163
163
  gt_project_id = eval_result.gt_project_info.id
164
164
  gt_dataset_ids = eval_result.gt_dataset_ids
165
- gt_images_ids = eval_result.gt_images_ids
165
+ gt_images_cnt = eval_result.val_images_cnt
166
166
  train_info = eval_result.train_info
167
- if gt_images_ids is not None:
168
- val_imgs_cnt = len(gt_images_ids)
167
+ total_imgs_cnt = eval_result.gt_project_info.items_count
168
+ if gt_images_cnt is not None:
169
+ val_imgs_cnt = gt_images_cnt
169
170
  elif gt_dataset_ids is not None:
170
171
  datasets = eval_result.gt_dataset_infos
171
172
  val_imgs_cnt = sum(ds.items_count for ds in datasets)
@@ -182,18 +183,18 @@ class Overview(BaseVisMetric):
182
183
  train_imgs_cnt = train_info.get("images_count")
183
184
  images_str = f", {train_imgs_cnt} images in train, {val_imgs_cnt} images in validation"
184
185
 
185
- if gt_images_ids is not None:
186
- images_str += f". Evaluated using subset - {val_imgs_cnt} images"
186
+ if gt_images_cnt is not None:
187
+ images_str += (
188
+ f", total {total_imgs_cnt} images. Evaluated using subset - {val_imgs_cnt} images"
189
+ )
187
190
  elif gt_dataset_ids is not None:
188
191
  links = [
189
192
  f'<a href="/projects/{gt_project_id}/datasets/{ds.id}" target="_blank">{ds.name}</a>'
190
193
  for ds in datasets
191
194
  ]
192
- images_str += (
193
- f". Evaluated on the dataset{'s' if len(links) > 1 else ''}: {', '.join(links)}"
194
- )
195
+ images_str += f", total {total_imgs_cnt} images. Evaluated on the dataset{'s' if len(links) > 1 else ''}: {', '.join(links)}"
195
196
  else:
196
- images_str += f". Evaluated on the whole project ({val_imgs_cnt} images)"
197
+ images_str += f", total {total_imgs_cnt} images. Evaluated on the whole project ({val_imgs_cnt} images)"
197
198
 
198
199
  return classes_str, images_str, train_session
199
200
 
@@ -203,7 +204,7 @@ class Overview(BaseVisMetric):
203
204
  # Overall Metrics
204
205
  fig = go.Figure()
205
206
  for i, eval_result in enumerate(self.eval_results):
206
- name = f"[{i + 1}] {eval_result.name}"
207
+ name = f"[{i + 1}] {eval_result.model_name}"
207
208
  base_metrics = eval_result.mp.base_metrics()
208
209
  r = list(base_metrics.values())
209
210
  theta = [eval_result.mp.metric_names[k] for k in base_metrics.keys()]
@@ -98,7 +98,7 @@ class PrCurve(BaseVisMetric):
98
98
  pr_curve[pr_curve == -1] = np.nan
99
99
  pr_curve = np.nanmean(pr_curve, axis=-1)
100
100
 
101
- name = f"[{i}] {eval_result.name}"
101
+ name = f"[{i}] {eval_result.model_name}"
102
102
  color = ",".join(map(str, hex2rgb(eval_result.color))) + ",0.1"
103
103
  line = go.Scatter(
104
104
  x=eval_result.mp.recThrs,
@@ -136,7 +136,7 @@ class PrecisionRecallF1(BaseVisMetric):
136
136
  precision = eval_result.mp.json_metrics()["precision"]
137
137
  recall = eval_result.mp.json_metrics()["recall"]
138
138
  f1 = eval_result.mp.json_metrics()["f1"]
139
- model_name = f"[{i}] {eval_result.name}"
139
+ model_name = f"[{i}] {eval_result.model_name}"
140
140
  fig.add_trace(
141
141
  go.Bar(
142
142
  x=["Precision", "Recall", "F1-score"],
@@ -163,7 +163,7 @@ class PrecisionRecallF1(BaseVisMetric):
163
163
  fig = go.Figure()
164
164
  classes_cnt = len(self.eval_results[0].mp.cat_names)
165
165
  for i, eval_result in enumerate(self.eval_results, 1):
166
- model_name = f"[{i}] {eval_result.name}"
166
+ model_name = f"[{i}] {eval_result.model_name}"
167
167
  sorted_by_f1 = eval_result.mp.per_class_metrics().sort_values(by="f1")
168
168
 
169
169
  fig.add_trace(
@@ -191,7 +191,7 @@ class PrecisionRecallF1(BaseVisMetric):
191
191
  res["layoutTemplate"] = [None, None, None]
192
192
  res["clickData"] = {}
193
193
  for i, eval_result in enumerate(self.eval_results):
194
- model_name = f"Model [{i + 1}] {eval_result.name}"
194
+ model_name = f"Model [{i + 1}] {eval_result.model_name}"
195
195
  for key, v in eval_result.click_data.objects_by_class.items():
196
196
  click_data = res["clickData"].setdefault(f"{i}_{key}", {})
197
197
  img_ids, obj_ids = set(), set()
@@ -220,7 +220,7 @@ class PrecisionRecallF1(BaseVisMetric):
220
220
  fig = go.Figure()
221
221
  classes_cnt = len(self.eval_results[0].mp.cat_names)
222
222
  for i, eval_result in enumerate(self.eval_results, 1):
223
- model_name = f"[{i}] {eval_result.name}"
223
+ model_name = f"[{i}] {eval_result.model_name}"
224
224
  sorted_by_f1 = eval_result.mp.per_class_metrics().sort_values(by="f1")
225
225
 
226
226
  fig.add_trace(
@@ -249,7 +249,7 @@ class PrecisionRecallF1(BaseVisMetric):
249
249
  fig = go.Figure()
250
250
  classes_cnt = len(self.eval_results[0].mp.cat_names)
251
251
  for i, eval_result in enumerate(self.eval_results, 1):
252
- model_name = f"[{i}] {eval_result.name}"
252
+ model_name = f"[{i}] {eval_result.model_name}"
253
253
  sorted_by_f1 = eval_result.mp.per_class_metrics().sort_values(by="f1")
254
254
 
255
255
  fig.add_trace(
@@ -278,7 +278,7 @@ class PrecisionRecallF1(BaseVisMetric):
278
278
  res["clickData"] = {}
279
279
 
280
280
  for i, eval_result in enumerate(self.eval_results):
281
- model_name = f"Model [{i + 1}] {eval_result.name}"
281
+ model_name = f"Model [{i + 1}] {eval_result.model_name}"
282
282
  click_data = res["clickData"].setdefault(i, {})
283
283
  img_ids, obj_ids = set(), set()
284
284
  objects_cnt = 0
@@ -248,7 +248,7 @@ class Speedtest(BaseVisMetric):
248
248
 
249
249
  fig = make_subplots(cols=2)
250
250
 
251
- for eval_result in self.eval_results:
251
+ for idx, eval_result in enumerate(self.eval_results, 1):
252
252
  if eval_result.speedtest_info is None:
253
253
  continue
254
254
  temp_res = {}
@@ -272,7 +272,7 @@ class Speedtest(BaseVisMetric):
272
272
  go.Scatter(
273
273
  x=list(temp_res["ms"].keys()),
274
274
  y=list(temp_res["ms"].values()),
275
- name="Infrence time (ms)",
275
+ name=f"[{idx}] {eval_result.model_name} (ms)",
276
276
  line=dict(color=eval_result.color),
277
277
  customdata=list(temp_res["ms_std"].values()),
278
278
  error_y=dict(
@@ -290,7 +290,7 @@ class Speedtest(BaseVisMetric):
290
290
  go.Scatter(
291
291
  x=list(temp_res["fps"].keys()),
292
292
  y=list(temp_res["fps"].values()),
293
- name="FPS",
293
+ name=f"[{idx}] {eval_result.model_name} (fps)",
294
294
  line=dict(color=eval_result.color),
295
295
  hovertemplate="Batch Size: %{x}<br>FPS: %{y:.2f}<extra></extra>", # <br> Standard deviation: %{customdata:.2f}<extra></extra>",
296
296
  ),
@@ -1,12 +1,18 @@
1
- from supervisely.nn.benchmark.base_benchmark import BaseBenchmark
2
1
  from supervisely.nn.benchmark.cv_tasks import CVTask
3
- from supervisely.nn.benchmark.evaluation import InstanceSegmentationEvaluator
2
+ from supervisely.nn.benchmark.instance_segmentation.evaluator import (
3
+ InstanceSegmentationEvaluator,
4
+ )
5
+ from supervisely.nn.benchmark.instance_segmentation.visualizer import (
6
+ InstanceSegmentationVisualizer,
7
+ )
8
+ from supervisely.nn.benchmark.object_detection.benchmark import ObjectDetectionBenchmark
4
9
  from supervisely.nn.benchmark.utils import try_set_conf_auto
5
10
 
6
11
  CONF_THRES = 0.05
7
12
 
8
13
 
9
- class InstanceSegmentationBenchmark(BaseBenchmark):
14
+ class InstanceSegmentationBenchmark(ObjectDetectionBenchmark):
15
+ visualizer_cls = InstanceSegmentationVisualizer
10
16
 
11
17
  @property
12
18
  def cv_task(self) -> str: