supervisely 6.73.238__py3-none-any.whl → 6.73.240__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. supervisely/annotation/annotation.py +2 -2
  2. supervisely/api/entity_annotation/tag_api.py +11 -4
  3. supervisely/api/file_api.py +17 -3
  4. supervisely/nn/__init__.py +1 -0
  5. supervisely/nn/benchmark/__init__.py +14 -2
  6. supervisely/nn/benchmark/base_benchmark.py +84 -37
  7. supervisely/nn/benchmark/base_evaluator.py +120 -0
  8. supervisely/nn/benchmark/base_visualizer.py +265 -0
  9. supervisely/nn/benchmark/comparison/detection_visualization/text_templates.py +5 -5
  10. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/calibration_score.py +2 -2
  11. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/explore_predicttions.py +39 -16
  12. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/localization_accuracy.py +1 -1
  13. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/outcome_counts.py +4 -4
  14. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/overview.py +12 -11
  15. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/pr_curve.py +1 -1
  16. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/precision_recal_f1.py +6 -6
  17. supervisely/nn/benchmark/comparison/detection_visualization/vis_metrics/speedtest.py +3 -3
  18. supervisely/nn/benchmark/{instance_segmentation_benchmark.py → instance_segmentation/benchmark.py} +9 -3
  19. supervisely/nn/benchmark/instance_segmentation/evaluator.py +58 -0
  20. supervisely/nn/benchmark/{visualization/text_templates/instance_segmentation_text.py → instance_segmentation/text_templates.py} +53 -69
  21. supervisely/nn/benchmark/instance_segmentation/visualizer.py +18 -0
  22. supervisely/nn/benchmark/object_detection/__init__.py +0 -0
  23. supervisely/nn/benchmark/object_detection/base_vis_metric.py +51 -0
  24. supervisely/nn/benchmark/{object_detection_benchmark.py → object_detection/benchmark.py} +4 -2
  25. supervisely/nn/benchmark/object_detection/evaluation_params.yaml +2 -0
  26. supervisely/nn/benchmark/{evaluation/object_detection_evaluator.py → object_detection/evaluator.py} +67 -9
  27. supervisely/nn/benchmark/{evaluation/coco → object_detection}/metric_provider.py +13 -14
  28. supervisely/nn/benchmark/{visualization/text_templates/object_detection_text.py → object_detection/text_templates.py} +49 -41
  29. supervisely/nn/benchmark/object_detection/vis_metrics/__init__.py +48 -0
  30. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/confidence_distribution.py +20 -24
  31. supervisely/nn/benchmark/object_detection/vis_metrics/confidence_score.py +119 -0
  32. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/confusion_matrix.py +34 -22
  33. supervisely/nn/benchmark/object_detection/vis_metrics/explore_predictions.py +129 -0
  34. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/f1_score_at_different_iou.py +21 -26
  35. supervisely/nn/benchmark/object_detection/vis_metrics/frequently_confused.py +137 -0
  36. supervisely/nn/benchmark/object_detection/vis_metrics/iou_distribution.py +106 -0
  37. supervisely/nn/benchmark/object_detection/vis_metrics/key_metrics.py +136 -0
  38. supervisely/nn/benchmark/{visualization → object_detection}/vis_metrics/model_predictions.py +53 -49
  39. supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts.py +188 -0
  40. supervisely/nn/benchmark/object_detection/vis_metrics/outcome_counts_per_class.py +191 -0
  41. supervisely/nn/benchmark/object_detection/vis_metrics/overview.py +116 -0
  42. supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve.py +106 -0
  43. supervisely/nn/benchmark/object_detection/vis_metrics/pr_curve_by_class.py +49 -0
  44. supervisely/nn/benchmark/object_detection/vis_metrics/precision.py +72 -0
  45. supervisely/nn/benchmark/object_detection/vis_metrics/precision_avg_per_class.py +59 -0
  46. supervisely/nn/benchmark/object_detection/vis_metrics/recall.py +71 -0
  47. supervisely/nn/benchmark/object_detection/vis_metrics/recall_vs_precision.py +56 -0
  48. supervisely/nn/benchmark/object_detection/vis_metrics/reliability_diagram.py +110 -0
  49. supervisely/nn/benchmark/object_detection/vis_metrics/speedtest.py +151 -0
  50. supervisely/nn/benchmark/object_detection/visualizer.py +697 -0
  51. supervisely/nn/benchmark/semantic_segmentation/__init__.py +9 -0
  52. supervisely/nn/benchmark/semantic_segmentation/base_vis_metric.py +55 -0
  53. supervisely/nn/benchmark/semantic_segmentation/benchmark.py +32 -0
  54. supervisely/nn/benchmark/semantic_segmentation/evaluation_params.yaml +0 -0
  55. supervisely/nn/benchmark/semantic_segmentation/evaluator.py +162 -0
  56. supervisely/nn/benchmark/semantic_segmentation/metric_provider.py +153 -0
  57. supervisely/nn/benchmark/semantic_segmentation/text_templates.py +130 -0
  58. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/__init__.py +0 -0
  59. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/acknowledgement.py +15 -0
  60. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/classwise_error_analysis.py +57 -0
  61. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/confusion_matrix.py +92 -0
  62. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/explore_predictions.py +84 -0
  63. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/frequently_confused.py +101 -0
  64. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/iou_eou.py +45 -0
  65. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/key_metrics.py +60 -0
  66. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/model_predictions.py +107 -0
  67. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/overview.py +112 -0
  68. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/renormalized_error_ou.py +48 -0
  69. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/speedtest.py +178 -0
  70. supervisely/nn/benchmark/semantic_segmentation/vis_metrics/vis_texts.py +21 -0
  71. supervisely/nn/benchmark/semantic_segmentation/visualizer.py +304 -0
  72. supervisely/nn/benchmark/utils/__init__.py +12 -0
  73. supervisely/nn/benchmark/utils/detection/__init__.py +2 -0
  74. supervisely/nn/benchmark/{evaluation/coco → utils/detection}/calculate_metrics.py +6 -4
  75. supervisely/nn/benchmark/utils/detection/metric_provider.py +533 -0
  76. supervisely/nn/benchmark/{coco_utils → utils/detection}/sly2coco.py +4 -4
  77. supervisely/nn/benchmark/{coco_utils/utils.py → utils/detection/utlis.py} +11 -0
  78. supervisely/nn/benchmark/utils/semantic_segmentation/__init__.py +0 -0
  79. supervisely/nn/benchmark/utils/semantic_segmentation/calculate_metrics.py +35 -0
  80. supervisely/nn/benchmark/utils/semantic_segmentation/evaluator.py +804 -0
  81. supervisely/nn/benchmark/utils/semantic_segmentation/loader.py +65 -0
  82. supervisely/nn/benchmark/utils/semantic_segmentation/utils.py +109 -0
  83. supervisely/nn/benchmark/visualization/evaluation_result.py +17 -3
  84. supervisely/nn/benchmark/visualization/vis_click_data.py +1 -1
  85. supervisely/nn/benchmark/visualization/widgets/__init__.py +3 -0
  86. supervisely/nn/benchmark/visualization/widgets/chart/chart.py +12 -4
  87. supervisely/nn/benchmark/visualization/widgets/gallery/gallery.py +35 -8
  88. supervisely/nn/benchmark/visualization/widgets/gallery/template.html +8 -4
  89. supervisely/nn/benchmark/visualization/widgets/markdown/markdown.py +1 -1
  90. supervisely/nn/benchmark/visualization/widgets/notification/notification.py +11 -7
  91. supervisely/nn/benchmark/visualization/widgets/radio_group/__init__.py +0 -0
  92. supervisely/nn/benchmark/visualization/widgets/radio_group/radio_group.py +34 -0
  93. supervisely/nn/benchmark/visualization/widgets/table/table.py +9 -3
  94. supervisely/nn/benchmark/visualization/widgets/widget.py +4 -0
  95. supervisely/project/project.py +18 -6
  96. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/METADATA +3 -1
  97. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/RECORD +104 -82
  98. supervisely/nn/benchmark/coco_utils/__init__.py +0 -2
  99. supervisely/nn/benchmark/evaluation/__init__.py +0 -3
  100. supervisely/nn/benchmark/evaluation/base_evaluator.py +0 -64
  101. supervisely/nn/benchmark/evaluation/coco/__init__.py +0 -2
  102. supervisely/nn/benchmark/evaluation/instance_segmentation_evaluator.py +0 -88
  103. supervisely/nn/benchmark/utils.py +0 -13
  104. supervisely/nn/benchmark/visualization/inference_speed/__init__.py +0 -19
  105. supervisely/nn/benchmark/visualization/inference_speed/speedtest_batch.py +0 -161
  106. supervisely/nn/benchmark/visualization/inference_speed/speedtest_intro.py +0 -28
  107. supervisely/nn/benchmark/visualization/inference_speed/speedtest_overview.py +0 -141
  108. supervisely/nn/benchmark/visualization/inference_speed/speedtest_real_time.py +0 -63
  109. supervisely/nn/benchmark/visualization/text_templates/inference_speed_text.py +0 -23
  110. supervisely/nn/benchmark/visualization/vis_metric_base.py +0 -337
  111. supervisely/nn/benchmark/visualization/vis_metrics/__init__.py +0 -67
  112. supervisely/nn/benchmark/visualization/vis_metrics/classwise_error_analysis.py +0 -55
  113. supervisely/nn/benchmark/visualization/vis_metrics/confidence_score.py +0 -93
  114. supervisely/nn/benchmark/visualization/vis_metrics/explorer_grid.py +0 -144
  115. supervisely/nn/benchmark/visualization/vis_metrics/frequently_confused.py +0 -115
  116. supervisely/nn/benchmark/visualization/vis_metrics/iou_distribution.py +0 -86
  117. supervisely/nn/benchmark/visualization/vis_metrics/outcome_counts.py +0 -119
  118. supervisely/nn/benchmark/visualization/vis_metrics/outcome_counts_per_class.py +0 -148
  119. supervisely/nn/benchmark/visualization/vis_metrics/overall_error_analysis.py +0 -109
  120. supervisely/nn/benchmark/visualization/vis_metrics/overview.py +0 -189
  121. supervisely/nn/benchmark/visualization/vis_metrics/percision_avg_per_class.py +0 -57
  122. supervisely/nn/benchmark/visualization/vis_metrics/pr_curve.py +0 -101
  123. supervisely/nn/benchmark/visualization/vis_metrics/pr_curve_by_class.py +0 -46
  124. supervisely/nn/benchmark/visualization/vis_metrics/precision.py +0 -56
  125. supervisely/nn/benchmark/visualization/vis_metrics/recall.py +0 -54
  126. supervisely/nn/benchmark/visualization/vis_metrics/recall_vs_precision.py +0 -57
  127. supervisely/nn/benchmark/visualization/vis_metrics/reliability_diagram.py +0 -88
  128. supervisely/nn/benchmark/visualization/vis_metrics/what_is.py +0 -23
  129. supervisely/nn/benchmark/visualization/vis_templates.py +0 -241
  130. supervisely/nn/benchmark/visualization/vis_widgets.py +0 -128
  131. supervisely/nn/benchmark/visualization/visualizer.py +0 -729
  132. /supervisely/nn/benchmark/{visualization/text_templates → instance_segmentation}/__init__.py +0 -0
  133. /supervisely/nn/benchmark/{evaluation/coco → instance_segmentation}/evaluation_params.yaml +0 -0
  134. /supervisely/nn/benchmark/{evaluation/coco → utils/detection}/metrics.py +0 -0
  135. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/LICENSE +0 -0
  136. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/WHEEL +0 -0
  137. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/entry_points.txt +0 -0
  138. {supervisely-6.73.238.dist-info → supervisely-6.73.240.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,10 @@ definitions = SimpleNamespace(
13
13
  iou_threshold="The IoU threshold is a predefined value (set to 0.5 in many benchmarks) that determines the minimum acceptable IoU score for a predicted bounding box to be considered a correct prediction. When the IoU of a predicted box and actual box is higher than this IoU threshold, the prediction is considered correct. Some metrics will evaluate the model with different IoU thresholds to provide more insights about the model's performance.",
14
14
  )
15
15
 
16
+ docs_url = (
17
+ "https://docs.supervisely.com/neural-networks/model-evaluation-benchmark/object-detection"
18
+ )
19
+
16
20
  # <i class="zmdi zmdi-check-circle" style="color: #13ce66; margin-right: 5px"></i>
17
21
  clickable_label = """
18
22
  > <span style="color: #5a6772">
@@ -35,9 +39,11 @@ markdown_overview = """
35
39
  - **Architecture**: {}
36
40
  - **Task type**: {}
37
41
  - **Runtime**: {}
38
- - **Checkpoint file**: <a href="{}" target="_blank">{}</a>
42
+ - **Checkpoint file**: <a class="checkpoint-url" href="{}" target="_blank">{}</a>
39
43
  - **Ground Truth project**: <a href="/projects/{}/datasets" target="_blank">{}</a>, {}{}
40
44
  {}
45
+ - **IoU threshold**: {}
46
+ - **Optimal confidence threshold**: {} (calculated automatically), <a href="{}" target="_blank">learn more</a>.
41
47
 
42
48
  Learn more about Model Benchmark, implementation details, and how to use the charts in our <a href="{}" target="_blank">Technical Report</a>.
43
49
  """
@@ -97,18 +103,15 @@ markdown_experts = """
97
103
  markdown_how_to_use = """
98
104
  """
99
105
 
100
- markdown_outcome_counts = (
101
- """## Outcome Counts
106
+ markdown_outcome_counts = """## Outcome Counts
102
107
 
103
108
  This chart is used to evaluate the overall model performance by breaking down all predictions into <abbr title="{}">True Positives</abbr> (TP), <abbr title="{}">False Positives</abbr> (FP), and <abbr title="{}">False Negatives</abbr> (FN). This helps to visually assess the type of errors the model often encounters.
104
109
 
105
110
  """
106
- + clickable_label
107
- )
108
111
 
109
112
  markdown_R = """## Recall
110
113
 
111
- This section measures the ability of the model to detect **all relevant instances in the dataset**. In other words, it answers the question: Of all instances in the dataset, how many of them is the model managed to find out?”
114
+ This section measures the ability of the model to detect **all relevant instances in the dataset**. In other words, it answers the question: "Of all instances in the dataset, how many of them is the model managed to find out?"
112
115
 
113
116
  To measure this, we calculate **Recall**. Recall counts errors, when the model does not detect an object that actually is present in a dataset and should be detected. Recall is calculated as the portion of correct predictions (true positives) over all instances in the dataset (true positives + false negatives).
114
117
  """
@@ -118,8 +121,7 @@ notification_recall = {
118
121
  "description": "The model correctly found <b>{} of {}</b> total instances in the dataset.",
119
122
  }
120
123
 
121
- markdown_R_perclass = (
122
- """### Per-class Recall
124
+ markdown_R_perclass = """### Per-class Recall
123
125
 
124
126
  This chart further analyzes Recall, breaking it down to each class in separate.
125
127
 
@@ -128,15 +130,13 @@ Since the overall recall is calculated as an average across all classes, we prov
128
130
  _Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
129
131
 
130
132
  """
131
- + clickable_label
132
- )
133
133
 
134
134
 
135
135
  markdown_P = """## Precision
136
136
 
137
- This section measures the accuracy of all predictions made by the model. In other words, it answers the question: Of all predictions made by the model, how many of them are actually correct?”.
137
+ This section measures the accuracy of all predictions made by the model. In other words, it answers the question: "Of all predictions made by the model, how many of them are actually correct?".
138
138
 
139
- To measure this, we calculate **Precision**. Precision counts errors, when the model predicts an object (bounding box), but the image has no objects of the predicted class in this place. Precision is calculated as a portion of correct predictions (true positives) over all models predictions (true positives + false positives).
139
+ To measure this, we calculate **Precision**. Precision counts errors, when the model predicts an object (bounding box), but the image has no objects of the predicted class in this place. Precision is calculated as a portion of correct predictions (true positives) over all model's predictions (true positives + false positives).
140
140
  """
141
141
 
142
142
  notification_precision = {
@@ -144,8 +144,7 @@ notification_precision = {
144
144
  "description": "The model correctly predicted <b>{} of {}</b> predictions made by the model in total.",
145
145
  }
146
146
 
147
- markdown_P_perclass = (
148
- """### Per-class Precision
147
+ markdown_P_perclass = """### Per-class Precision
149
148
 
150
149
  This chart further analyzes Precision, breaking it down to each class in separate.
151
150
 
@@ -154,20 +153,15 @@ Since the overall precision is computed as an average across all classes, we pro
154
153
  _Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
155
154
 
156
155
  """
157
- + clickable_label
158
- )
159
156
 
160
157
 
161
- markdown_PR = (
162
- """## Recall vs. Precision
158
+ markdown_PR = """## Recall vs. Precision
163
159
 
164
160
  This section compares Precision and Recall in one graph, identifying **imbalance** between these two.
165
161
 
166
162
  _Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
167
163
 
168
164
  """
169
- + clickable_label
170
- )
171
165
 
172
166
 
173
167
  markdown_pr_curve = """## Precision-Recall Curve
@@ -199,39 +193,34 @@ notification_ap = {
199
193
  "description": "",
200
194
  }
201
195
 
202
- markdown_pr_by_class = (
203
- """### Precision-Recall Curve by Class
196
+ markdown_pr_by_class = """### Precision-Recall Curve by Class
204
197
 
205
198
  In this plot, you can evaluate PR curve for each class individually.
206
199
 
207
200
  """
208
- + clickable_label
209
- )
210
201
 
211
- markdown_confusion_matrix = (
212
- """## Confusion Matrix
202
+ markdown_confusion_matrix = """## Confusion Matrix
213
203
 
214
204
  Confusion matrix helps to find the number of confusions between different classes made by the model.
215
205
  Each row of the matrix represents the instances in a ground truth class, while each column represents the instances in a predicted class.
216
206
  The diagonal elements represent the number of correct predictions for each class (True Positives), and the off-diagonal elements show misclassifications.
217
207
 
218
208
  """
219
- + clickable_label
220
- )
221
209
 
210
+ markdown_frequently_confused_empty = """### Frequently Confused Classes
211
+
212
+ No frequently confused class pairs found
213
+ """
222
214
 
223
- markdown_frequently_confused = (
224
- """### Frequently Confused Classes
215
+ markdown_frequently_confused = """### Frequently Confused Classes
225
216
 
226
217
  This chart displays the most frequently confused pairs of classes. In general, it finds out which classes visually seem very similar to the model.
227
218
 
228
- The chart calculates the **probability of confusion** between different pairs of classes. For instance, if the probability of confusion for the pair {} - {} is {}, this means that when the model predicts either {} or {}”, there is a {}% chance that the model might mistakenly predict one instead of the other.
219
+ The chart calculates the **probability of confusion** between different pairs of classes. For instance, if the probability of confusion for the pair "{} - {}" is {}, this means that when the model predicts either "{}" or "{}", there is a {}% chance that the model might mistakenly predict one instead of the other.
229
220
 
230
221
  The measure is class-symmetric, meaning that the probability of confusing a {} with a {} is equal to the probability of confusing a {} with a {}.
231
222
 
232
223
  """
233
- + clickable_label
234
- )
235
224
 
236
225
 
237
226
  markdown_localization_accuracy = """## Localization Accuracy (IoU)
@@ -317,14 +306,11 @@ Additionally, it provides a view of how predicted probabilities are distributed.
317
306
  Ideally, the green histogram (TP predictions) should have higher confidence scores and be shifted to the right, indicating that the model is sure about its correct predictions, and the red histogram (FP predictions) should have lower confidence scores and be shifted to the left.
318
307
  """
319
308
 
320
- markdown_class_ap = (
321
- """## Average Precision by Class
309
+ markdown_class_ap = """## Average Precision by Class
322
310
 
323
311
  A quick visual comparison of the model performance across all classes. Each axis in the chart represents a different class, and the distance to the center indicates the <abbr title="{}">Average Precision</abbr> (AP) for that class.
324
312
 
325
313
  """
326
- + clickable_label
327
- )
328
314
 
329
315
 
330
316
  markdown_class_outcome_counts_1 = """### Outcome Counts by Class
@@ -338,16 +324,38 @@ markdown_normalization = """Normalization is used for better interclass comparis
338
324
  If normalization is off, the chart will display the total count of instances that correspond to outcome type (one of TP, FP or FN). This mode is identical to the main Outcome Counts graph on the top of the page. However, when normalization is off, you may encounter a class imbalance problem. Visually, bars that correspond to classes with many instances in the dataset will be much larger than others. This complicates the visual analysis.
339
325
  """
340
326
 
341
- markdown_class_outcome_counts_2 = (
342
- """You can switch the plot view between normalized and absolute values.
327
+ markdown_class_outcome_counts_2 = """You can switch the plot view between normalized and absolute values.
343
328
 
344
329
  _Bars in the chart are sorted by <abbr title="{}">F1-score</abbr> to keep a unified order of classes between different charts._
345
330
 
346
331
  """
347
- + clickable_label
348
- )
349
332
 
350
333
  empty = """### {}
351
334
 
352
335
  > {}
353
336
  """
337
+
338
+
339
+ markdown_speedtest_intro = """## Inference Speed
340
+
341
+ This is a speed test benchmark for this model. The model was tested with the following configuration:
342
+
343
+ - **Device**: {}
344
+ - **Hardware**: {}
345
+ - **Runtime**: {}
346
+ """
347
+
348
+ markdown_speedtest_table = """
349
+ The table below shows the speed test results. For each test, the time taken to process one batch of images is shown, as well as the model's throughput (i.e, the number of images processed per second, or FPS). Results are averaged across **{}** iterations.
350
+ """
351
+
352
+ markdown_real_time_inference = """## Real-time Inference
353
+
354
+ This chart compares different runtimes and devices (CPU or GPU)."""
355
+
356
+ # We additionally divide **predict** procedure into three stages: pre-process, inference, and post-process. Each bar in this chart consists of these three stages. For example, in the chart you can find how long the post-process phase lasts in a CPU device with an ONNXRuntime environment."""
357
+
358
+
359
+ markdown_speedtest_chart = """
360
+ This chart shows how the model's speed changes with different batch sizes . As the batch size increases, you can observe an increase in FPS (images per second).
361
+ """
@@ -0,0 +1,48 @@
1
+ from supervisely.nn.benchmark.object_detection.vis_metrics.confidence_distribution import (
2
+ ConfidenceDistribution,
3
+ )
4
+ from supervisely.nn.benchmark.object_detection.vis_metrics.confidence_score import (
5
+ ConfidenceScore,
6
+ )
7
+ from supervisely.nn.benchmark.object_detection.vis_metrics.confusion_matrix import (
8
+ ConfusionMatrix,
9
+ )
10
+ from supervisely.nn.benchmark.object_detection.vis_metrics.explore_predictions import (
11
+ ExplorePredictions,
12
+ )
13
+ from supervisely.nn.benchmark.object_detection.vis_metrics.f1_score_at_different_iou import (
14
+ F1ScoreAtDifferentIOU,
15
+ )
16
+ from supervisely.nn.benchmark.object_detection.vis_metrics.frequently_confused import (
17
+ FrequentlyConfused,
18
+ )
19
+ from supervisely.nn.benchmark.object_detection.vis_metrics.iou_distribution import (
20
+ IOUDistribution,
21
+ )
22
+ from supervisely.nn.benchmark.object_detection.vis_metrics.key_metrics import KeyMetrics
23
+ from supervisely.nn.benchmark.object_detection.vis_metrics.model_predictions import (
24
+ ModelPredictions,
25
+ )
26
+ from supervisely.nn.benchmark.object_detection.vis_metrics.outcome_counts import (
27
+ OutcomeCounts,
28
+ )
29
+ from supervisely.nn.benchmark.object_detection.vis_metrics.outcome_counts_per_class import (
30
+ PerClassOutcomeCounts,
31
+ )
32
+ from supervisely.nn.benchmark.object_detection.vis_metrics.overview import Overview
33
+ from supervisely.nn.benchmark.object_detection.vis_metrics.pr_curve import PRCurve
34
+ from supervisely.nn.benchmark.object_detection.vis_metrics.pr_curve_by_class import (
35
+ PRCurveByClass,
36
+ )
37
+ from supervisely.nn.benchmark.object_detection.vis_metrics.precision import Precision
38
+ from supervisely.nn.benchmark.object_detection.vis_metrics.precision_avg_per_class import (
39
+ PerClassAvgPrecision,
40
+ )
41
+ from supervisely.nn.benchmark.object_detection.vis_metrics.recall import Recall
42
+ from supervisely.nn.benchmark.object_detection.vis_metrics.recall_vs_precision import (
43
+ RecallVsPrecision,
44
+ )
45
+ from supervisely.nn.benchmark.object_detection.vis_metrics.reliability_diagram import (
46
+ ReliabilityDiagram,
47
+ )
48
+ from supervisely.nn.benchmark.object_detection.vis_metrics.speedtest import Speedtest
@@ -1,40 +1,37 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
4
-
5
3
  import numpy as np
6
4
 
7
- from supervisely.nn.benchmark.visualization.vis_metric_base import MetricVis
8
- from supervisely.nn.benchmark.visualization.vis_widgets import Schema, Widget
9
-
10
- if TYPE_CHECKING:
11
- from supervisely.nn.benchmark.visualization.visualizer import Visualizer
5
+ from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
6
+ from supervisely.nn.benchmark.visualization.widgets import ChartWidget, MarkdownWidget
12
7
 
13
8
 
14
- class ConfidenceDistribution(MetricVis):
9
+ class ConfidenceDistribution(DetectionVisMetric):
10
+ MARKDOWN = "confidence_distribution"
11
+ CHART = "confidence_distribution"
15
12
 
16
- def __init__(self, loader: Visualizer) -> None:
17
- super().__init__(loader)
18
- self.schema = Schema(
19
- self._loader.vis_texts,
20
- markdown_confidence_distribution=Widget.Markdown(
21
- title="Confidence Distribution",
22
- is_header=True,
23
- formats=[
24
- self._loader.vis_texts.definitions.true_positives,
25
- self._loader.vis_texts.definitions.false_positives,
26
- ],
13
+ @property
14
+ def md(self) -> MarkdownWidget:
15
+ return MarkdownWidget(
16
+ self.MARKDOWN,
17
+ "Confidence Distribution",
18
+ self.vis_texts.markdown_confidence_distribution.format(
19
+ self.vis_texts.definitions.true_positives,
20
+ self.vis_texts.definitions.false_positives,
27
21
  ),
28
- chart=Widget.Chart(),
29
22
  )
30
23
 
31
- def get_figure(self, widget: Widget): # -> Optional[go.Figure]:
24
+ @property
25
+ def chart(self) -> ChartWidget:
26
+ return ChartWidget(self.CHART, self._get_figure())
27
+
28
+ def _get_figure(self): # -> go.Figure:
32
29
  import plotly.graph_objects as go # pylint: disable=import-error
33
30
 
34
- f1_optimal_conf, best_f1 = self._loader.mp.m_full.get_f1_optimal_conf()
31
+ f1_optimal_conf, best_f1 = self.eval_result.mp.m_full.get_f1_optimal_conf()
35
32
 
36
33
  # Histogram of confidence scores (TP vs FP)
37
- scores_tp, scores_fp = self._loader.mp.m_full.scores_tp_and_fp()
34
+ scores_tp, scores_fp = self.eval_result.mp.m_full.scores_tp_and_fp()
38
35
 
39
36
  tp_y, tp_x = np.histogram(scores_tp, bins=40, range=[0, 1])
40
37
  fp_y, fp_x = np.histogram(scores_fp, bins=40, range=[0, 1])
@@ -102,7 +99,6 @@ class ConfidenceDistribution(MetricVis):
102
99
 
103
100
  fig.update_layout(
104
101
  barmode="overlay",
105
- # title="Histogram of Confidence Scores (TP vs FP)",
106
102
  width=800,
107
103
  height=500,
108
104
  )
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
4
+ from supervisely.nn.benchmark.visualization.widgets import (
5
+ ChartWidget,
6
+ CollapseWidget,
7
+ MarkdownWidget,
8
+ NotificationWidget,
9
+ )
10
+
11
+
12
+ class ConfidenceScore(DetectionVisMetric):
13
+ MARKDOWN_CONFIDENCE_SCORE = "confidence_score"
14
+ NOTIFICATION = "confidence_score"
15
+ MARKDOWN_CONFIDENCE_SCORE_2 = "confidence_score_2"
16
+ MARKDOWN_CONFIDENCE_SCORE_3 = "calibration_score_3"
17
+ COLLAPSE_TITLE = "confidence_score_collapse"
18
+ CHART = "confidence_score"
19
+
20
+ @property
21
+ def md_confidence_score(self) -> MarkdownWidget:
22
+ text = self.vis_texts.markdown_confidence_score_1
23
+ text = text.format(self.vis_texts.definitions.confidence_threshold)
24
+ return MarkdownWidget(self.MARKDOWN_CONFIDENCE_SCORE, "Confidence Score Profile", text)
25
+
26
+ @property
27
+ def notification(self) -> NotificationWidget:
28
+ title, _ = self.vis_texts.notification_f1.values()
29
+ return NotificationWidget(
30
+ self.NOTIFICATION,
31
+ title.format(self.eval_result.mp.f1_optimal_conf.round(4)),
32
+ )
33
+
34
+ @property
35
+ def chart(self) -> ChartWidget:
36
+ return ChartWidget(name=self.CHART, figure=self._get_figure())
37
+
38
+ @property
39
+ def md_confidence_score_2(self) -> MarkdownWidget:
40
+ return MarkdownWidget(
41
+ self.MARKDOWN_CONFIDENCE_SCORE_2,
42
+ "",
43
+ self.vis_texts.markdown_confidence_score_2,
44
+ )
45
+
46
+ @property
47
+ def collapse_conf_score(self) -> CollapseWidget:
48
+ md = MarkdownWidget(
49
+ self.COLLAPSE_TITLE,
50
+ "How to plot Confidence Profile?",
51
+ self.vis_texts.markdown_plot_confidence_profile,
52
+ )
53
+ return CollapseWidget([md])
54
+
55
+ @property
56
+ def md_confidence_score_3(self) -> MarkdownWidget:
57
+ return MarkdownWidget(
58
+ self.MARKDOWN_CONFIDENCE_SCORE_3,
59
+ "",
60
+ self.vis_texts.markdown_calibration_score_3,
61
+ )
62
+
63
+ def _get_figure(self): # -> go.Figure:
64
+ import plotly.express as px # pylint: disable=import-error
65
+
66
+ color_map = {
67
+ "Precision": "#1f77b4",
68
+ "Recall": "orange",
69
+ }
70
+
71
+ fig = px.line(
72
+ self.eval_result.dfsp_down,
73
+ x="scores",
74
+ y=["precision", "recall", "f1"],
75
+ labels={"value": "Value", "variable": "Metric", "scores": "Confidence Score"},
76
+ width=None,
77
+ height=500,
78
+ color_discrete_map=color_map,
79
+ )
80
+ fig.update_traces(
81
+ hovertemplate="Confidence Score: %{x:.2f}<br>Value: %{y:.2f}<extra></extra>"
82
+ )
83
+ fig.update_layout(yaxis=dict(range=[0, 1]), xaxis=dict(range=[0, 1], tick0=0, dtick=0.1))
84
+
85
+ if (
86
+ self.eval_result.mp.f1_optimal_conf is not None
87
+ and self.eval_result.mp.best_f1 is not None
88
+ ):
89
+ # Add vertical line for the best threshold
90
+ fig.add_shape(
91
+ type="line",
92
+ x0=self.eval_result.mp.f1_optimal_conf,
93
+ x1=self.eval_result.mp.f1_optimal_conf,
94
+ y0=0,
95
+ y1=self.eval_result.mp.best_f1,
96
+ line=dict(color="gray", width=2, dash="dash"),
97
+ )
98
+ fig.add_annotation(
99
+ x=self.eval_result.mp.f1_optimal_conf,
100
+ y=self.eval_result.mp.best_f1 + 0.04,
101
+ text=f"F1-optimal threshold: {self.eval_result.mp.f1_optimal_conf:.2f}",
102
+ showarrow=False,
103
+ )
104
+ fig.update_layout(
105
+ dragmode=False,
106
+ modebar=dict(
107
+ remove=[
108
+ "zoom2d",
109
+ "pan2d",
110
+ "select2d",
111
+ "lasso2d",
112
+ "zoomIn2d",
113
+ "zoomOut2d",
114
+ "autoScale2d",
115
+ "resetScale2d",
116
+ ]
117
+ ),
118
+ )
119
+ return fig
@@ -1,36 +1,44 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Optional
3
+ from typing import Dict
4
4
 
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
 
8
- from supervisely.nn.benchmark.visualization.vis_metric_base import MetricVis
9
- from supervisely.nn.benchmark.visualization.vis_widgets import Schema, Widget
8
+ from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
9
+ from supervisely.nn.benchmark.visualization.widgets import ChartWidget, MarkdownWidget
10
10
 
11
- if TYPE_CHECKING:
12
- from supervisely.nn.benchmark.visualization.visualizer import Visualizer
13
11
 
12
+ class ConfusionMatrix(DetectionVisMetric):
13
+ MARKDOWN = "confusion_matrix"
14
+ CHART = "confusion_matrix"
14
15
 
15
- class ConfusionMatrix(MetricVis):
16
-
17
- def __init__(self, loader: Visualizer) -> None:
18
- super().__init__(loader)
19
-
16
+ def __init__(self, *args, **kwargs) -> None:
17
+ super().__init__(*args, **kwargs)
20
18
  self.clickable = True
21
- self.schema = Schema(
22
- self._loader.vis_texts,
23
- markdown_confusion_matrix=Widget.Markdown(title="Confusion Matrix", is_header=True),
24
- chart=Widget.Chart(),
19
+ self._keypair_sep: str = "-"
20
+
21
+ @property
22
+ def md(self) -> MarkdownWidget:
23
+ text = self.vis_texts.markdown_confusion_matrix
24
+ return MarkdownWidget(self.MARKDOWN, "Confusion Matrix", text)
25
+
26
+ @property
27
+ def chart(self) -> ChartWidget:
28
+ chart = ChartWidget(self.CHART, self._get_figure())
29
+ chart.set_click_data(
30
+ self.explore_modal_table.id,
31
+ self.get_click_data(),
32
+ chart_click_extra="'getKey': (payload) => `${payload.points[0].x}${'-'}${payload.points[0].y}`, 'keySeparator': '-',",
25
33
  )
34
+ return chart
26
35
 
27
- def get_figure(self, widget: Widget.Chart): # -> Optional[go.Figure]:
36
+ def _get_figure(self): # -> go.Figure:
28
37
  import plotly.express as px # pylint: disable=import-error
29
38
 
30
- confusion_matrix = self._loader.mp.confusion_matrix()
31
- # Confusion Matrix
39
+ confusion_matrix = self.eval_result.mp.confusion_matrix()
32
40
  # TODO: Green-red
33
- cat_names = self._loader.mp.cat_names
41
+ cat_names = self.eval_result.mp.cat_names
34
42
  none_name = "(None)"
35
43
 
36
44
  with np.errstate(divide="ignore"):
@@ -53,6 +61,7 @@ class ConfusionMatrix(MetricVis):
53
61
  fig.update_traces(
54
62
  customdata=confusion_matrix,
55
63
  hovertemplate="Objects Count: %{customdata}<br>Predicted: %{y}<br>Ground Truth: %{x}",
64
+ colorscale="Viridis",
56
65
  )
57
66
 
58
67
  # Text on cells
@@ -62,18 +71,21 @@ class ConfusionMatrix(MetricVis):
62
71
  # fig.show()
63
72
  return fig
64
73
 
65
- def get_click_data(self, widget: Widget.Chart) -> Optional[dict]:
66
- res = dict(projectMeta=self._loader.dt_project_meta.to_json())
74
+ def get_click_data(self) -> Dict:
75
+ res = dict(projectMeta=self.eval_result.pred_project_meta.to_json())
67
76
  res["layoutTemplate"] = [None, None, None]
68
77
  res["clickData"] = {}
69
78
 
70
- for (pred_key, gt_key), matches_data in self._loader.click_data.confusion_matrix.items():
79
+ for (
80
+ pred_key,
81
+ gt_key,
82
+ ), matches_data in self.eval_result.click_data.confusion_matrix.items():
71
83
  key = gt_key + self._keypair_sep + pred_key
72
84
  res["clickData"][key] = {}
73
85
  res["clickData"][key]["imagesIds"] = []
74
86
  gt_title = f"GT: '{gt_key}'" if gt_key != "(None)" else "No GT Objects"
75
87
  pred_title = f"Predicted: '{pred_key}'" if pred_key != "(None)" else "No Predictions"
76
- res["clickData"][key]["title"] = f"Confusion Matrix. {gt_title} {pred_title}"
88
+ res["clickData"][key]["title"] = f"Confusion Matrix. {gt_title} {pred_title}"
77
89
 
78
90
  img_ids = set()
79
91
  obj_ids = set()
@@ -0,0 +1,129 @@
1
+ from typing import Dict
2
+
3
+ from supervisely.nn.benchmark.object_detection.base_vis_metric import DetectionVisMetric
4
+ from supervisely.nn.benchmark.visualization.widgets import GalleryWidget, MarkdownWidget
5
+
6
+
7
+ class ExplorePredictions(DetectionVisMetric):
8
+ MARKDOWN = "explore_predictions"
9
+ GALLERY = "explore_predictions"
10
+
11
+ def __init__(self, *args, **kwargs) -> None:
12
+ super().__init__(*args, **kwargs)
13
+ self.clickable = True
14
+
15
+ @property
16
+ def md(self) -> MarkdownWidget:
17
+ text = self.vis_texts.markdown_explorer
18
+ return MarkdownWidget(self.MARKDOWN, "Explore Predictions", text)
19
+
20
+ def gallery(self, opacity) -> GalleryWidget:
21
+ optimal_conf = self.eval_result.mp.f1_optimal_conf
22
+ default_filters = [{"confidence": [optimal_conf, 1]}]
23
+ gallery = GalleryWidget(
24
+ self.GALLERY, columns_number=3, filters=default_filters, opacity=opacity
25
+ )
26
+ gallery.add_image_left_header("Compare with GT")
27
+ gallery.set_project_meta(self.eval_result.filtered_project_meta)
28
+
29
+ gallery.set_images(
30
+ image_infos=self.eval_result.sample_images,
31
+ ann_infos=self.eval_result.sample_anns,
32
+ )
33
+ gallery._gallery._update_filters()
34
+
35
+ # set click data for diff gallery
36
+ self.explore_modal_table.set_click_data(
37
+ self.diff_modal_table.id,
38
+ self.get_diff_data(),
39
+ get_key="(payload) => `${payload.annotation.imageId}`",
40
+ )
41
+
42
+ # set click data for explore gallery
43
+ gallery.set_click_data(
44
+ self.diff_modal_table.id,
45
+ self.get_diff_data(),
46
+ get_key="(payload) => `${payload.annotation.image_id}`",
47
+ )
48
+
49
+ gallery.set_show_all_data(
50
+ self.explore_modal_table.id,
51
+ self.get_click_data(),
52
+ )
53
+ return gallery
54
+
55
+ def get_click_data(self) -> dict:
56
+ res = {}
57
+
58
+ res["layoutTemplate"] = [{"skipObjectTagsFiltering": ["outcome"]}] * 3
59
+ click_data = res.setdefault("clickData", {})
60
+ explore = click_data.setdefault("explore", {})
61
+ explore["filters"] = [
62
+ {
63
+ "type": "tag",
64
+ "tagId": "confidence",
65
+ "value": [self.eval_result.mp.f1_optimal_conf, 1],
66
+ }
67
+ ]
68
+ explore["title"] = "Explore all predictions"
69
+ images_ids = explore.setdefault("imagesIds", [])
70
+
71
+ images_ids.extend(
72
+ [d.pred_image_info.id for d in self.eval_result.matched_pair_data.values()]
73
+ )
74
+
75
+ return res
76
+
77
+ def get_diff_data(self) -> Dict:
78
+ res = {}
79
+
80
+ res["layoutTemplate"] = [
81
+ {"skipObjectTagsFiltering": True, "columnTitle": "Ground Truth"},
82
+ {"skipObjectTagsFiltering": ["outcome"], "columnTitle": "Prediction"},
83
+ {"skipObjectTagsFiltering": ["confidence"], "columnTitle": "Difference"},
84
+ ]
85
+
86
+ click_data = res.setdefault("clickData", {})
87
+
88
+ default_filters = [
89
+ {
90
+ "type": "tag",
91
+ "tagId": "confidence",
92
+ "value": [self.eval_result.mp.f1_optimal_conf, 1],
93
+ },
94
+ ]
95
+ for pairs_data in self.eval_result.matched_pair_data.values():
96
+ gt = pairs_data.gt_image_info
97
+ pred = pairs_data.pred_image_info
98
+ diff = pairs_data.diff_image_info
99
+ assert gt.name == pred.name == diff.name
100
+ for img_id in [gt.id, pred.id, diff.id]:
101
+ key = click_data.setdefault(str(img_id), {})
102
+ key["imagesIds"] = [gt.id, pred.id, diff.id]
103
+ key["filters"] = default_filters
104
+ key["title"] = f"Image: {gt.name}"
105
+
106
+ object_bindings = []
107
+ for img in [pred, diff]:
108
+ if img == pred:
109
+ ann_json = pairs_data.pred_annotation.to_json()
110
+ else:
111
+ ann_json = pairs_data.diff_annotation.to_json()
112
+ for obj in ann_json["objects"]:
113
+ for tag in obj["tags"]:
114
+ if tag["name"] == "matched_gt_id":
115
+ object_bindings.append(
116
+ [
117
+ {
118
+ "id": obj["id"],
119
+ "annotationKey": img.id,
120
+ },
121
+ {
122
+ "id": int(tag["value"]),
123
+ "annotationKey": gt.id if img == pred else pred.id,
124
+ },
125
+ ]
126
+ )
127
+ key["objectsBindings"] = object_bindings
128
+
129
+ return res