supervisely 6.73.294__py3-none-any.whl → 6.73.295__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervisely might be problematic. Click here for more details.

Files changed (38) hide show
  1. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/avg_precision_by_class.py +1 -1
  2. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +10 -0
  3. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/explore_predictions.py +2 -2
  4. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +1 -1
  5. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +14 -8
  6. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +1 -1
  7. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +2 -2
  8. supervisely/nn/benchmark/instance_segmentation/evaluation_params.yaml +6 -1
  9. supervisely/nn/benchmark/instance_segmentation/text_templates.py +4 -4
  10. supervisely/nn/benchmark/object_detection/base_vis_metric.py +1 -1
  11. supervisely/nn/benchmark/object_detection/evaluation_params.yaml +6 -1
  12. supervisely/nn/benchmark/object_detection/evaluator.py +1 -3
  13. supervisely/nn/benchmark/object_detection/metric_provider.py +59 -46
  14. supervisely/nn/benchmark/object_detection/text_templates.py +4 -4
  15. supervisely/nn/benchmark/object_detection/vis_metrics/confidence_distribution.py +20 -2
  16. supervisely/nn/benchmark/object_detection/vis_metrics/confidence_score.py +16 -0
  17. supervisely/nn/benchmark/object_detection/vis_metrics/explore_predictions.py +10 -5
  18. supervisely/nn/benchmark/object_detection/vis_metrics/key_metrics.py +1 -0
  19. supervisely/nn/benchmark/object_detection/vis_metrics/model_predictions.py +1 -1
  20. supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts.py +2 -57
  21. supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts_per_class.py +1 -1
  22. supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +11 -3
  23. supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve.py +1 -1
  24. supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +18 -8
  25. supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +13 -3
  26. supervisely/nn/benchmark/object_detection/visualizer.py +1 -1
  27. supervisely/nn/benchmark/utils/__init__.py +0 -1
  28. supervisely/nn/benchmark/utils/detection/__init__.py +1 -2
  29. supervisely/nn/benchmark/utils/detection/calculate_metrics.py +31 -37
  30. supervisely/nn/benchmark/visualization/evaluation_result.py +2 -4
  31. supervisely/nn/benchmark/visualization/vis_click_data.py +1 -3
  32. {supervisely-6.73.294.dist-info → supervisely-6.73.295.dist-info}/METADATA +1 -1
  33. {supervisely-6.73.294.dist-info → supervisely-6.73.295.dist-info}/RECORD +37 -38
  34. supervisely/nn/benchmark/utils/detection/metric_provider.py +0 -533
  35. {supervisely-6.73.294.dist-info → supervisely-6.73.295.dist-info}/LICENSE +0 -0
  36. {supervisely-6.73.294.dist-info → supervisely-6.73.295.dist-info}/WHEEL +0 -0
  37. {supervisely-6.73.294.dist-info → supervisely-6.73.295.dist-info}/entry_points.txt +0 -0
  38. {supervisely-6.73.294.dist-info → supervisely-6.73.295.dist-info}/top_level.txt +0 -0
@@ -114,7 +114,7 @@ class AveragePrecisionByClass(BaseVisMetrics):
114
114
  {
115
115
  "type": "tag",
116
116
  "tagId": "confidence",
117
- "value": [eval_result.mp.f1_optimal_conf, 1],
117
+ "value": [eval_result.mp.conf_threshold, 1],
118
118
  },
119
119
  {"type": "tag", "tagId": "outcome", "value": "TP"},
120
120
  {"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
@@ -154,6 +154,16 @@ class CalibrationScore(BaseVisMetrics):
154
154
  line=dict(color="gray", width=2, dash="dash"),
155
155
  name=f"F1-optimal threshold ({eval_result.name})",
156
156
  )
157
+ if eval_result.mp.custom_conf_threshold is not None:
158
+ fig.add_shape(
159
+ type="line",
160
+ x0=eval_result.mp.custom_conf_threshold,
161
+ x1=eval_result.mp.custom_conf_threshold,
162
+ y0=0,
163
+ y1=eval_result.mp.custom_f1,
164
+ line=dict(color="black", width=2, dash="dash"),
165
+ name=f"Confidence threshold ({eval_result.name})",
166
+ )
157
167
 
158
168
  # Update the layout
159
169
  fig.update_layout(
@@ -76,7 +76,7 @@ class ExplorePredictions(BaseVisMetrics):
76
76
  anns = eval_res.api.annotation.download_batch(dataset_info.id, images_ids, force_metadata_for_links=False)
77
77
  annotations.append(anns)
78
78
  skip_tags_filtering.append(False)
79
- min_conf = min(min_conf, eval_res.mp.f1_optimal_conf)
79
+ min_conf = min(min_conf, eval_res.mp.conf_threshold)
80
80
 
81
81
  images = list(i for x in zip(*images) for i in x)
82
82
  annotations = list(i for x in zip(*annotations) for i in x)
@@ -127,7 +127,7 @@ class ExplorePredictions(BaseVisMetrics):
127
127
  current_images_infos = sorted(current_images_infos, key=lambda x: names.index(x.name))
128
128
  images_ids.append([image_info.id for image_info in current_images_infos])
129
129
 
130
- min_conf = min(min_conf, eval_res.mp.f1_optimal_conf)
130
+ min_conf = min(min_conf, eval_res.mp.conf_threshold)
131
131
 
132
132
  explore["imagesIds"] = list(i for x in zip(*images_ids) for i in x)
133
133
  explore["filters"] = [{"type": "tag", "tagId": "confidence", "value": [min_conf, 1]}]
@@ -276,7 +276,7 @@ class OutcomeCounts(BaseVisMetrics):
276
276
  title = f"{model_name}. {outcome}: {len(obj_ids)} object{'s' if len(obj_ids) > 1 else ''}"
277
277
  outcome_dict["title"] = title
278
278
  outcome_dict["imagesIds"] = list(img_ids)
279
- thr = eval_result.mp.f1_optimal_conf
279
+ thr = eval_result.mp.conf_threshold
280
280
  if outcome == "FN":
281
281
  outcome_dict["filters"] = [
282
282
  {"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
@@ -27,6 +27,7 @@ class Overview(BaseVisMetrics):
27
27
  evaluation result metrics displayed
28
28
  """
29
29
  super().__init__(vis_texts, eval_results)
30
+ self.team_id = None # will be set in the visualizer
30
31
 
31
32
  @property
32
33
  def overview_md(self) -> List[MarkdownWidget]:
@@ -120,8 +121,7 @@ class Overview(BaseVisMetrics):
120
121
  if idx == 3 and not same_iou_thr:
121
122
  continue
122
123
  metric_name = metric_renames_map.get(metric, metric)
123
- values = [m[metric] for m in all_metrics]
124
- values = [v if v is not None else "―" for v in values]
124
+ values = [m.get(metric, "―") for m in all_metrics]
125
125
  values = [round(v, 2) if isinstance(v, float) else v for v in values]
126
126
  row = [metric_name] + values
127
127
  dct = {"row": row, "id": metric, "items": row}
@@ -247,12 +247,18 @@ class Overview(BaseVisMetrics):
247
247
 
248
248
  iou_thrs_map = defaultdict(set)
249
249
  matched = True
250
- for eval_result in self.eval_results:
251
- for cat_id, iou_thr in eval_result.mp.iou_threshold_per_class.items():
252
- iou_thrs_map[cat_id].add(iou_thr)
253
- if len(iou_thrs_map[cat_id]) > 1:
254
- matched = False
255
- break
250
+
251
+ if not all([not r.different_iou_thresholds_per_class for r in self.eval_results]):
252
+ matched = False
253
+ else:
254
+ for eval_result in self.eval_results:
255
+ iou_thrs_per_class = eval_result.mp.iou_threshold_per_class
256
+ if iou_thrs_per_class is not None:
257
+ for cat_id, iou_thr in eval_result.mp.iou_threshold_per_class.items():
258
+ iou_thrs_map[cat_id].add(iou_thr)
259
+ if len(iou_thrs_map[cat_id]) > 1:
260
+ matched = False
261
+ break
256
262
 
257
263
  if matched:
258
264
  return None
@@ -19,7 +19,7 @@ class PrCurve(BaseVisMetrics):
19
19
  @property
20
20
  def markdown_widget(self) -> MarkdownWidget:
21
21
  text: str = getattr(self.vis_texts, self.MARKDOWN_PR_CURVE).format(
22
- self.vis_texts.definitions.f1_score
22
+ self.vis_texts.definitions.about_pr_tradeoffs
23
23
  )
24
24
  return MarkdownWidget(
25
25
  name=self.MARKDOWN_PR_CURVE, title="mAP & Precision-Recall Curve", text=text
@@ -205,7 +205,7 @@ class PrecisionRecallF1(BaseVisMetrics):
205
205
  {
206
206
  "type": "tag",
207
207
  "tagId": "confidence",
208
- "value": [eval_result.mp.f1_optimal_conf, 1],
208
+ "value": [eval_result.mp.conf_threshold, 1],
209
209
  },
210
210
  {"type": "tag", "tagId": "outcome", "value": "TP"},
211
211
  {"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
@@ -293,7 +293,7 @@ class PrecisionRecallF1(BaseVisMetrics):
293
293
  {
294
294
  "type": "tag",
295
295
  "tagId": "confidence",
296
- "value": [eval_result.mp.f1_optimal_conf, 1],
296
+ "value": [eval_result.mp.conf_threshold, 1],
297
297
  },
298
298
  {"type": "tag", "tagId": "outcome", "value": "TP"},
299
299
  {"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
@@ -1,2 +1,7 @@
1
- # Intersection over Union threshold that will be used for objects mathcing
1
+ # Intersection over Union threshold that will be used for object matching.
2
+ # It mostly affects visualizations, such as Outcome Counts, Confusion Matrix, and image previews.
2
3
  iou_threshold: 0.5
4
+
5
+ # Confidence threshold.
6
+ # Set 'auto' to calculate the optimal confidence threshold.
7
+ confidence_threshold: auto
@@ -43,7 +43,7 @@ markdown_overview = """
43
43
  - **Ground Truth project**: <a href="/projects/{}/datasets" target="_blank">{}</a>, {}{}
44
44
  {}
45
45
  - **IoU threshold**: {}
46
- - **Optimal confidence threshold**: {} (calculated automatically), <a href="{}" target="_blank">learn more</a>.
46
+ {}
47
47
  - **Averaging across IoU thresholds:** {}, <a href="{}" target="_blank">learn more</a>.
48
48
 
49
49
  Learn more about Model Benchmark, implementation details, and how to use the charts in our <a href="{}" target="_blank">Technical Report</a>.
@@ -73,7 +73,7 @@ In this section you can visually assess the model performance through examples.
73
73
 
74
74
  > Click on the image to view the **Ground Truth**, **Prediction**, and **Difference** annotations side-by-side.
75
75
 
76
- > Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences). Differences are calculated only for the optimal confidence threshold, allowing you to focus on the most accurate predictions made by the model.
76
+ > Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences). {}
77
77
  """
78
78
 
79
79
  markdown_predictions_table = """### Prediction details for every image
@@ -100,7 +100,7 @@ To measure this, we calculate **Recall**. Recall counts errors, when the model d
100
100
  """
101
101
 
102
102
  notification_recall = {
103
- "title": "Recall = {}",
103
+ "title": "{}",
104
104
  "description": "The model correctly found <b>{} of {}</b> total instances in the dataset.",
105
105
  }
106
106
 
@@ -123,7 +123,7 @@ To measure this, we calculate **Precision**. Precision counts errors, when the m
123
123
  """
124
124
 
125
125
  notification_precision = {
126
- "title": "Precision = {}",
126
+ "title": "{}",
127
127
  "description": "The model correctly predicted <b>{} of {}</b> predictions made by the model in total.",
128
128
  }
129
129
 
@@ -42,7 +42,7 @@ class DetectionVisMetric(BaseVisMetric):
42
42
  {
43
43
  "type": "tag",
44
44
  "tagId": "confidence",
45
- "value": [self.eval_result.mp.f1_optimal_conf, 1],
45
+ "value": [self.eval_result.mp.conf_threshold, 1],
46
46
  },
47
47
  {"type": "tag", "tagId": "outcome", "value": "TP"},
48
48
  {"type": "specific_objects", "tagId": None, "value": list(obj_ids)},
@@ -1,2 +1,7 @@
1
- # Intersection over Union threshold that will be used for objects mathcing
1
+ # Intersection over Union threshold that will be used for object matching.
2
+ # It mostly affects visualizations, such as Outcome Counts, Confusion Matrix, and image previews.
2
3
  iou_threshold: 0.5
4
+
5
+ # Confidence threshold.
6
+ # Set 'auto' to calculate the optimal confidence threshold.
7
+ confidence_threshold: auto
@@ -55,9 +55,7 @@ class ObjectDetectionEvalResult(BaseEvalResult):
55
55
  self.coco_gt, self.coco_dt = read_coco_datasets(self.coco_gt, self.coco_dt)
56
56
 
57
57
  self.mp = MetricProvider(
58
- self.eval_data["matches"],
59
- self.eval_data["coco_metrics"],
60
- self.eval_data["params"],
58
+ self.eval_data,
61
59
  self.coco_gt,
62
60
  self.coco_dt,
63
61
  )
@@ -56,7 +56,7 @@ def filter_by_conf(matches: list, conf: float):
56
56
 
57
57
 
58
58
  class MetricProvider:
59
- def __init__(self, matches: list, coco_metrics: dict, params: dict, cocoGt, cocoDt):
59
+ def __init__(self, eval_data: dict, cocoGt, cocoDt):
60
60
  """
61
61
  Main class for calculating prediction metrics.
62
62
 
@@ -71,11 +71,16 @@ class MetricProvider:
71
71
  :param cocoDt: COCO object with predicted annotations
72
72
  :type cocoDt: COCO
73
73
  """
74
- self.matches = matches
75
- self.coco_metrics = coco_metrics
76
- self.params = params
74
+ self.eval_data = eval_data
75
+ self.matches = eval_data["matches"]
76
+ self.coco_metrics = eval_data["coco_metrics"]
77
+ self.params = eval_data["params"]
77
78
  self.cocoGt = cocoGt
78
79
  self.cocoDt = cocoDt
80
+ self.coco_mAP = self.coco_metrics["mAP"]
81
+ self.coco_precision = self.coco_metrics["precision"]
82
+ self.iouThrs = self.params["iouThrs"]
83
+ self.recThrs = self.params["recThrs"]
79
84
 
80
85
  self.metric_names = METRIC_NAMES
81
86
 
@@ -83,41 +88,31 @@ class MetricProvider:
83
88
  self.cat_ids = cocoGt.getCatIds()
84
89
  self.cat_names = [cocoGt.cats[cat_id]["name"] for cat_id in self.cat_ids]
85
90
 
86
- # eval_data
87
- self.matches = matches
88
- self.coco_mAP = coco_metrics["mAP"]
89
- self.coco_precision = coco_metrics["precision"]
90
- self.iouThrs = params["iouThrs"]
91
- self.recThrs = params["recThrs"]
92
-
93
91
  # Evaluation params
94
- eval_params = params.get("evaluation_params", {})
92
+ eval_params = self.params.get("evaluation_params", {})
95
93
  self.iou_threshold = eval_params.get("iou_threshold", 0.5)
96
94
  self.iou_threshold_idx = np.where(np.isclose(self.iouThrs, self.iou_threshold))[0][0]
97
95
  self.iou_threshold_per_class = eval_params.get("iou_threshold_per_class")
98
- self.iou_idx_per_class = params.get("iou_idx_per_class") # {cat id: iou_idx}
99
- if self.iou_threshold_per_class is not None:
100
- # TODO: temporary solution
101
- eval_params["average_across_iou_thresholds"] = False
96
+ self.iou_idx_per_class = self.params.get("iou_idx_per_class") # {cat id: iou_idx}
102
97
  self.average_across_iou_thresholds = eval_params.get("average_across_iou_thresholds", True)
103
98
 
104
99
  def calculate(self):
105
- self.m_full = _MetricProvider(
106
- self.matches, self.coco_metrics, self.params, self.cocoGt, self.cocoDt
107
- )
100
+ self.m_full = _MetricProvider(self.matches, self.eval_data, self.cocoGt, self.cocoDt)
108
101
  self.m_full._calculate_score_profile()
109
102
 
110
103
  # Find optimal confidence threshold
111
104
  self.f1_optimal_conf, self.best_f1 = self.m_full.get_f1_optimal_conf()
105
+ self.custom_conf_threshold, self.custom_f1 = self.m_full.get_custom_conf_threshold()
106
+
107
+ # Confidence threshold that will be used in visualizations
108
+ self.conf_threshold = self.custom_conf_threshold or self.f1_optimal_conf
112
109
 
113
110
  # Filter by optimal confidence threshold
114
- if self.f1_optimal_conf is not None:
115
- matches_filtered = filter_by_conf(self.matches, self.f1_optimal_conf)
111
+ if self.conf_threshold is not None:
112
+ matches_filtered = filter_by_conf(self.matches, self.conf_threshold)
116
113
  else:
117
114
  matches_filtered = self.matches
118
- self.m = _MetricProvider(
119
- matches_filtered, self.coco_metrics, self.params, self.cocoGt, self.cocoDt
120
- )
115
+ self.m = _MetricProvider(matches_filtered, self.eval_data, self.cocoGt, self.cocoDt)
121
116
  self.matches_filtered = matches_filtered
122
117
  self.m._init_counts()
123
118
 
@@ -155,7 +150,7 @@ class MetricProvider:
155
150
  ap_by_class = dict(zip(self.cat_names, ap_by_class))
156
151
  ap_custom_by_class = self.AP_custom_per_class().tolist()
157
152
  ap_custom_by_class = dict(zip(self.cat_names, ap_custom_by_class))
158
- return {
153
+ data = {
159
154
  "mAP": base["mAP"],
160
155
  "AP50": self.coco_metrics.get("AP50"),
161
156
  "AP75": self.coco_metrics.get("AP75"),
@@ -172,6 +167,9 @@ class MetricProvider:
172
167
  "AP_by_class": ap_by_class,
173
168
  f"AP{iou_name}_by_class": ap_custom_by_class,
174
169
  }
170
+ if self.custom_conf_threshold is not None:
171
+ data["custom_confidence_threshold"] = self.custom_conf_threshold
172
+ return data
175
173
 
176
174
  def key_metrics(self):
177
175
  iou_name = int(self.iou_threshold * 100)
@@ -187,7 +185,7 @@ class MetricProvider:
187
185
  iou_name = int(self.iou_threshold * 100)
188
186
  if self.iou_threshold_per_class is not None:
189
187
  iou_name = "_custom"
190
- return {
188
+ data = {
191
189
  "mAP": table["mAP"],
192
190
  "AP50": table["AP50"],
193
191
  "AP75": table["AP75"],
@@ -198,8 +196,11 @@ class MetricProvider:
198
196
  "Avg. IoU": table["iou"],
199
197
  "Classification Acc.": table["classification_accuracy"],
200
198
  "Calibration Score": table["calibration_score"],
201
- "optimal confidence threshold": table["f1_optimal_conf"],
199
+ "Optimal confidence threshold": table["f1_optimal_conf"],
202
200
  }
201
+ if self.custom_conf_threshold is not None:
202
+ data["Custom confidence threshold"] = table["custom_confidence_threshold"]
203
+ return data
203
204
 
204
205
  def AP_per_class(self):
205
206
  s = self.coco_precision[:, :, :, 0, 2].copy()
@@ -262,25 +263,27 @@ class MetricProvider:
262
263
 
263
264
 
264
265
  class _MetricProvider:
265
- def __init__(self, matches: list, coco_metrics: dict, params: dict, cocoGt, cocoDt):
266
+ def __init__(self, matches: list, eval_data: dict, cocoGt, cocoDt):
266
267
  """
267
268
  type cocoGt: COCO
268
269
  type cocoDt: COCO
269
270
  """
270
271
 
272
+ self.matches = matches
273
+ self.eval_data = eval_data
274
+ self.coco_metrics = eval_data["coco_metrics"]
275
+ self.params = eval_data["params"]
271
276
  self.cocoGt = cocoGt
277
+ self.cocoDt = cocoDt
278
+ self.coco_mAP = self.coco_metrics["mAP"]
279
+ self.coco_precision = self.coco_metrics["precision"]
280
+ self.iouThrs = self.params["iouThrs"]
281
+ self.recThrs = self.params["recThrs"]
272
282
 
273
283
  # metainfo
274
284
  self.cat_ids = cocoGt.getCatIds()
275
285
  self.cat_names = [cocoGt.cats[cat_id]["name"] for cat_id in self.cat_ids]
276
286
 
277
- # eval_data
278
- self.matches = matches
279
- self.coco_mAP = coco_metrics["mAP"]
280
- self.coco_precision = coco_metrics["precision"]
281
- self.iouThrs = params["iouThrs"]
282
- self.recThrs = params["recThrs"]
283
-
284
287
  # Matches
285
288
  self.tp_matches = [m for m in self.matches if m["type"] == "TP"]
286
289
  self.fp_matches = [m for m in self.matches if m["type"] == "FP"]
@@ -290,13 +293,12 @@ class _MetricProvider:
290
293
  self.ious = np.array([m["iou"] for m in self.tp_matches])
291
294
 
292
295
  # Evaluation params
293
- self.params = params
294
296
  self.iou_idx_per_class = np.array(
295
- [params["iou_idx_per_class"][cat_id] for cat_id in self.cat_ids]
297
+ [self.params["iou_idx_per_class"][cat_id] for cat_id in self.cat_ids]
296
298
  )[:, None]
297
- eval_params = params.get("evaluation_params", {})
299
+ eval_params = self.params.get("evaluation_params", {})
298
300
  self.average_across_iou_thresholds = eval_params.get("average_across_iou_thresholds", True)
299
-
301
+
300
302
  def _init_counts(self):
301
303
  cat_ids = self.cat_ids
302
304
  iouThrs = self.iouThrs
@@ -307,9 +309,6 @@ class _MetricProvider:
307
309
  ious.append(match["iou"])
308
310
  cats.append(cat_id_to_idx[match["category_id"]])
309
311
  ious = np.array(ious) + np.spacing(1)
310
- if 0.8999999999999999 in iouThrs:
311
- iouThrs = iouThrs.copy()
312
- iouThrs[iouThrs == 0.8999999999999999] = 0.9
313
312
  iou_idxs = np.searchsorted(iouThrs, ious) - 1
314
313
  cats = np.array(cats)
315
314
  # TP
@@ -345,9 +344,16 @@ class _MetricProvider:
345
344
  self.FP_count = int(self._take_iou_thresholds(false_positives).sum())
346
345
  self.FN_count = int(self._take_iou_thresholds(false_negatives).sum())
347
346
 
347
+ # self.true_positives = self.eval_data["true_positives"]
348
+ # self.false_negatives = self.eval_data["false_negatives"]
349
+ # self.false_positives = self.eval_data["false_positives"]
350
+ # self.TP_count = int(self._take_iou_thresholds(self.true_positives).sum())
351
+ # self.FP_count = int(self._take_iou_thresholds(self.false_positives).sum())
352
+ # self.FN_count = int(self._take_iou_thresholds(self.false_negatives).sum())
353
+
348
354
  def _take_iou_thresholds(self, x):
349
355
  return np.take_along_axis(x, self.iou_idx_per_class, axis=1)
350
-
356
+
351
357
  def base_metrics(self):
352
358
  if self.average_across_iou_thresholds:
353
359
  tp = self.true_positives
@@ -495,9 +501,6 @@ class _MetricProvider:
495
501
  )
496
502
  scores = np.array([m["score"] for m in matches_sorted])
497
503
  ious = np.array([m["iou"] if m["type"] == "TP" else 0.0 for m in matches_sorted])
498
- if 0.8999999999999999 in iouThrs:
499
- iouThrs = iouThrs.copy()
500
- iouThrs[iouThrs == 0.8999999999999999] = 0.9
501
504
  iou_idxs = np.searchsorted(iouThrs, ious + np.spacing(1))
502
505
 
503
506
  # Check
@@ -565,6 +568,16 @@ class _MetricProvider:
565
568
  best_f1 = self.score_profile["f1"][argmax]
566
569
  return f1_optimal_conf, best_f1
567
570
 
571
+ def get_custom_conf_threshold(self):
572
+ if (~np.isnan(self.score_profile["f1"])).sum() == 0:
573
+ return None, None
574
+ conf_threshold = self.params.get("evaluation_params", {}).get("confidence_threshold")
575
+ if conf_threshold is not None and conf_threshold != "auto":
576
+ idx = np.argmin(np.abs(self.score_profile["scores"] - conf_threshold))
577
+ custom_f1 = self.score_profile["f1"][idx]
578
+ return conf_threshold, custom_f1
579
+ return None, None
580
+
568
581
  def calibration_curve(self):
569
582
  from sklearn.calibration import ( # pylint: disable=import-error
570
583
  calibration_curve,
@@ -43,7 +43,7 @@ markdown_overview = """
43
43
  - **Ground Truth project**: <a href="/projects/{}/datasets" target="_blank">{}</a>, {}{}
44
44
  {}
45
45
  - **IoU threshold**: {}
46
- - **Optimal confidence threshold**: {} (calculated automatically), <a href="{}" target="_blank">learn more</a>.
46
+ {}
47
47
  - **Averaging across IoU thresholds:** {}, <a href="{}" target="_blank">learn more</a>.
48
48
 
49
49
  Learn more about Model Benchmark, implementation details, and how to use the charts in our <a href="{}" target="_blank">Technical Report</a>.
@@ -78,7 +78,7 @@ In this section you can visually assess the model performance through examples.
78
78
 
79
79
  > Click on the image to view the **Ground Truth**, **Prediction**, and **Difference** annotations side-by-side.
80
80
 
81
- > Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences). Differences are calculated only for the optimal confidence threshold, allowing you to focus on the most accurate predictions made by the model.
81
+ > Filtering options allow you to adjust the confidence threshold (only for predictions) and the model's false outcomes (only for differences). {}
82
82
  """
83
83
 
84
84
  markdown_predictions_gallery = """
@@ -125,7 +125,7 @@ To measure this, we calculate **Recall**. Recall counts errors, when the model d
125
125
  """
126
126
 
127
127
  notification_recall = {
128
- "title": "Recall = {}",
128
+ "title": "{}",
129
129
  "description": "The model correctly found <b>{} of {}</b> total instances in the dataset.",
130
130
  }
131
131
 
@@ -148,7 +148,7 @@ To measure this, we calculate **Precision**. Precision counts errors, when the m
148
148
  """
149
149
 
150
150
  notification_precision = {
151
- "title": "Precision = {}",
151
+ "title": "{}",
152
152
  "description": "The model correctly predicted <b>{} of {}</b> predictions made by the model in total.",
153
153
  }
154
154
 
@@ -28,7 +28,8 @@ class ConfidenceDistribution(DetectionVisMetric):
28
28
  def _get_figure(self): # -> go.Figure:
29
29
  import plotly.graph_objects as go # pylint: disable=import-error
30
30
 
31
- f1_optimal_conf, best_f1 = self.eval_result.mp.m_full.get_f1_optimal_conf()
31
+ f1_optimal_conf = self.eval_result.mp.f1_optimal_conf
32
+ custom_conf_threshold = self.eval_result.mp.custom_conf_threshold
32
33
 
33
34
  # Histogram of confidence scores (TP vs FP)
34
35
  scores_tp, scores_fp = self.eval_result.mp.m_full.scores_tp_and_fp()
@@ -88,7 +89,7 @@ class ConfidenceDistribution(DetectionVisMetric):
88
89
  x1=f1_optimal_conf,
89
90
  y0=0,
90
91
  y1=tp_y.max() * 1.3,
91
- line=dict(color="orange", width=1, dash="dash"),
92
+ line=dict(color="orange", width=2, dash="dash"),
92
93
  )
93
94
  fig.add_annotation(
94
95
  x=f1_optimal_conf,
@@ -104,4 +105,21 @@ class ConfidenceDistribution(DetectionVisMetric):
104
105
  )
105
106
  fig.update_xaxes(title_text="Confidence Score", range=[0, 1])
106
107
  fig.update_yaxes(title_text="Count", range=[0, tp_y.max() * 1.3])
108
+
109
+ if custom_conf_threshold is not None:
110
+ # Custom threshold
111
+ fig.add_shape(
112
+ type="line",
113
+ x0=custom_conf_threshold,
114
+ x1=custom_conf_threshold,
115
+ y0=0,
116
+ y1=tp_y.max() * 1.3,
117
+ line=dict(color="orange", width=2, dash="dash"),
118
+ )
119
+ fig.add_annotation(
120
+ x=custom_conf_threshold,
121
+ y=tp_y.max() * 1.3,
122
+ text=f"Confidence threshold: {custom_conf_threshold:.2f}",
123
+ showarrow=False,
124
+ )
107
125
  return fig
@@ -101,6 +101,22 @@ class ConfidenceScore(DetectionVisMetric):
101
101
  text=f"F1-optimal threshold: {self.eval_result.mp.f1_optimal_conf:.2f}",
102
102
  showarrow=False,
103
103
  )
104
+ if self.eval_result.mp.custom_conf_threshold is not None:
105
+ # Add vertical line for the custom threshold
106
+ fig.add_shape(
107
+ type="line",
108
+ x0=self.eval_result.mp.custom_conf_threshold,
109
+ x1=self.eval_result.mp.custom_conf_threshold,
110
+ y0=0,
111
+ y1=self.eval_result.mp.custom_f1,
112
+ line=dict(color="black", width=2, dash="dash"),
113
+ )
114
+ fig.add_annotation(
115
+ x=self.eval_result.mp.custom_conf_threshold,
116
+ y=self.eval_result.mp.custom_f1 + 0.04,
117
+ text=f"Confidence threshold: {self.eval_result.mp.custom_conf_threshold:.2f}",
118
+ showarrow=False,
119
+ )
104
120
  fig.update_layout(
105
121
  dragmode=False,
106
122
  modebar=dict(
@@ -14,12 +14,17 @@ class ExplorePredictions(DetectionVisMetric):
14
14
 
15
15
  @property
16
16
  def md(self) -> MarkdownWidget:
17
- text = self.vis_texts.markdown_explorer
17
+ conf_threshold_info = "Differences are calculated only for the optimal confidence threshold, allowing you to focus on the most accurate predictions made by the model."
18
+ if self.eval_result.mp.custom_conf_threshold is not None:
19
+ conf_threshold_info = (
20
+ "Differences are calculated for the custom confidence threshold (set manually)."
21
+ )
22
+ text = self.vis_texts.markdown_explorer.format(conf_threshold_info)
23
+
18
24
  return MarkdownWidget(self.MARKDOWN, "Explore Predictions", text)
19
25
 
20
26
  def gallery(self, opacity) -> GalleryWidget:
21
- optimal_conf = self.eval_result.mp.f1_optimal_conf
22
- default_filters = [{"confidence": [optimal_conf, 1]}]
27
+ default_filters = [{"confidence": [self.eval_result.mp.conf_threshold, 1]}]
23
28
  gallery = GalleryWidget(
24
29
  self.GALLERY, columns_number=3, filters=default_filters, opacity=opacity
25
30
  )
@@ -62,7 +67,7 @@ class ExplorePredictions(DetectionVisMetric):
62
67
  {
63
68
  "type": "tag",
64
69
  "tagId": "confidence",
65
- "value": [self.eval_result.mp.f1_optimal_conf, 1],
70
+ "value": [self.eval_result.mp.conf_threshold, 1],
66
71
  }
67
72
  ]
68
73
  explore["title"] = "Explore all predictions"
@@ -89,7 +94,7 @@ class ExplorePredictions(DetectionVisMetric):
89
94
  {
90
95
  "type": "tag",
91
96
  "tagId": "confidence",
92
- "value": [self.eval_result.mp.f1_optimal_conf, 1],
97
+ "value": [self.eval_result.mp.conf_threshold, 1],
93
98
  },
94
99
  ]
95
100
  for pairs_data in self.eval_result.matched_pair_data.values():
@@ -56,6 +56,7 @@ class KeyMetrics(DetectionVisMetric):
56
56
  width="60%",
57
57
  show_header_controls=False,
58
58
  main_column=columns[0],
59
+ page_size=15,
59
60
  )
60
61
  return table
61
62
 
@@ -99,7 +99,7 @@ class ModelPredictions(DetectionVisMetric):
99
99
  {
100
100
  "type": "tag",
101
101
  "tagId": "confidence",
102
- "value": [self.eval_result.mp.f1_optimal_conf, 1],
102
+ "value": [self.eval_result.mp.conf_threshold, 1],
103
103
  },
104
104
  # {"type": "tag", "tagId": "outcome", "value": "FP"},
105
105
  ]