valor-lite 0.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

Files changed (49) hide show
  1. valor_lite/LICENSE +21 -0
  2. valor_lite/__init__.py +0 -0
  3. valor_lite/cache/__init__.py +11 -0
  4. valor_lite/cache/compute.py +154 -0
  5. valor_lite/cache/ephemeral.py +302 -0
  6. valor_lite/cache/persistent.py +529 -0
  7. valor_lite/classification/__init__.py +14 -0
  8. valor_lite/classification/annotation.py +45 -0
  9. valor_lite/classification/computation.py +378 -0
  10. valor_lite/classification/evaluator.py +879 -0
  11. valor_lite/classification/loader.py +97 -0
  12. valor_lite/classification/metric.py +535 -0
  13. valor_lite/classification/numpy_compatibility.py +13 -0
  14. valor_lite/classification/shared.py +184 -0
  15. valor_lite/classification/utilities.py +314 -0
  16. valor_lite/exceptions.py +20 -0
  17. valor_lite/object_detection/__init__.py +17 -0
  18. valor_lite/object_detection/annotation.py +238 -0
  19. valor_lite/object_detection/computation.py +841 -0
  20. valor_lite/object_detection/evaluator.py +805 -0
  21. valor_lite/object_detection/loader.py +292 -0
  22. valor_lite/object_detection/metric.py +850 -0
  23. valor_lite/object_detection/shared.py +185 -0
  24. valor_lite/object_detection/utilities.py +396 -0
  25. valor_lite/schemas.py +11 -0
  26. valor_lite/semantic_segmentation/__init__.py +15 -0
  27. valor_lite/semantic_segmentation/annotation.py +123 -0
  28. valor_lite/semantic_segmentation/computation.py +165 -0
  29. valor_lite/semantic_segmentation/evaluator.py +414 -0
  30. valor_lite/semantic_segmentation/loader.py +205 -0
  31. valor_lite/semantic_segmentation/metric.py +275 -0
  32. valor_lite/semantic_segmentation/shared.py +149 -0
  33. valor_lite/semantic_segmentation/utilities.py +88 -0
  34. valor_lite/text_generation/__init__.py +15 -0
  35. valor_lite/text_generation/annotation.py +56 -0
  36. valor_lite/text_generation/computation.py +611 -0
  37. valor_lite/text_generation/llm/__init__.py +0 -0
  38. valor_lite/text_generation/llm/exceptions.py +14 -0
  39. valor_lite/text_generation/llm/generation.py +903 -0
  40. valor_lite/text_generation/llm/instructions.py +814 -0
  41. valor_lite/text_generation/llm/integrations.py +226 -0
  42. valor_lite/text_generation/llm/utilities.py +43 -0
  43. valor_lite/text_generation/llm/validators.py +68 -0
  44. valor_lite/text_generation/manager.py +697 -0
  45. valor_lite/text_generation/metric.py +381 -0
  46. valor_lite-0.37.1.dist-info/METADATA +174 -0
  47. valor_lite-0.37.1.dist-info/RECORD +49 -0
  48. valor_lite-0.37.1.dist-info/WHEEL +5 -0
  49. valor_lite-0.37.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,185 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ import pyarrow as pa
6
+ import pyarrow.compute as pc
7
+ from numpy.typing import NDArray
8
+
9
+ from valor_lite.cache import FileCacheReader, MemoryCacheReader
10
+
11
+
12
+ @dataclass
13
+ class EvaluatorInfo:
14
+ number_of_datums: int = 0
15
+ number_of_groundtruth_annotations: int = 0
16
+ number_of_prediction_annotations: int = 0
17
+ number_of_labels: int = 0
18
+ number_of_rows: int = 0
19
+ metadata_fields: list[tuple[str, str | pa.DataType]] | None = None
20
+
21
+
22
+ def generate_detailed_cache_path(path: str | Path) -> Path:
23
+ return Path(path) / "detailed"
24
+
25
+
26
+ def generate_ranked_cache_path(path: str | Path) -> Path:
27
+ return Path(path) / "ranked"
28
+
29
+
30
+ def generate_temporary_cache_path(path: str | Path) -> Path:
31
+ return Path(path) / "tmp"
32
+
33
+
34
+ def generate_metadata_path(path: str | Path) -> Path:
35
+ return Path(path) / "metadata.json"
36
+
37
+
38
+ def generate_detailed_schema(
39
+ metadata_fields: list[tuple[str, str | pa.DataType]] | None
40
+ ) -> pa.Schema:
41
+ metadata_fields = metadata_fields if metadata_fields else []
42
+ reserved_fields = [
43
+ ("datum_uid", pa.string()),
44
+ ("datum_id", pa.int64()),
45
+ # groundtruth
46
+ ("gt_uid", pa.string()),
47
+ ("gt_id", pa.int64()),
48
+ ("gt_label", pa.string()),
49
+ ("gt_label_id", pa.int64()),
50
+ # prediction
51
+ ("pd_uid", pa.string()),
52
+ ("pd_id", pa.int64()),
53
+ ("pd_label", pa.string()),
54
+ ("pd_label_id", pa.int64()),
55
+ ("pd_score", pa.float64()),
56
+ # pair
57
+ ("iou", pa.float64()),
58
+ ]
59
+
60
+ # validate
61
+ reserved_field_names = {f[0] for f in reserved_fields}
62
+ metadata_field_names = {f[0] for f in metadata_fields}
63
+ if conflicting := reserved_field_names & metadata_field_names:
64
+ raise ValueError(
65
+ f"metadata fields {conflicting} conflict with reserved fields"
66
+ )
67
+
68
+ return pa.schema(reserved_fields + metadata_fields)
69
+
70
+
71
+ def generate_ranked_schema() -> pa.Schema:
72
+ reserved_fields = [
73
+ ("datum_uid", pa.string()),
74
+ ("datum_id", pa.int64()),
75
+ # groundtruth
76
+ ("gt_id", pa.int64()),
77
+ ("gt_label_id", pa.int64()),
78
+ # prediction
79
+ ("pd_id", pa.int64()),
80
+ ("pd_label_id", pa.int64()),
81
+ ("pd_score", pa.float64()),
82
+ # pair
83
+ ("iou", pa.float64()),
84
+ ("high_score", pa.bool_()),
85
+ ("iou_prev", pa.float64()),
86
+ ]
87
+ return pa.schema(reserved_fields)
88
+
89
+
90
+ def encode_metadata_fields(
91
+ metadata_fields: list[tuple[str, str | pa.DataType]] | None
92
+ ) -> dict[str, str]:
93
+ metadata_fields = metadata_fields if metadata_fields else []
94
+ return {k: str(v) for k, v in metadata_fields}
95
+
96
+
97
+ def decode_metadata_fields(
98
+ encoded_metadata_fields: dict[str, str]
99
+ ) -> list[tuple[str, str]]:
100
+ return [(k, v) for k, v in encoded_metadata_fields.items()]
101
+
102
+
103
+ def extract_labels(
104
+ reader: MemoryCacheReader | FileCacheReader,
105
+ index_to_label_override: dict[int, str] | None = None,
106
+ ) -> dict[int, str]:
107
+ if index_to_label_override is not None:
108
+ return index_to_label_override
109
+
110
+ index_to_label = {}
111
+ for tbl in reader.iterate_tables(
112
+ columns=[
113
+ "gt_label_id",
114
+ "gt_label",
115
+ "pd_label_id",
116
+ "pd_label",
117
+ ]
118
+ ):
119
+
120
+ # get gt labels
121
+ gt_label_ids = tbl["gt_label_id"].to_numpy()
122
+ gt_label_ids, gt_indices = np.unique(gt_label_ids, return_index=True)
123
+ gt_labels = tbl["gt_label"].take(gt_indices).to_pylist()
124
+ gt_labels = dict(zip(gt_label_ids.astype(int).tolist(), gt_labels))
125
+ gt_labels.pop(-1, None)
126
+ index_to_label.update(gt_labels)
127
+
128
+ # get pd labels
129
+ pd_label_ids = tbl["pd_label_id"].to_numpy()
130
+ pd_label_ids, pd_indices = np.unique(pd_label_ids, return_index=True)
131
+ pd_labels = tbl["pd_label"].take(pd_indices).to_pylist()
132
+ pd_labels = dict(zip(pd_label_ids.astype(int).tolist(), pd_labels))
133
+ pd_labels.pop(-1, None)
134
+ index_to_label.update(pd_labels)
135
+
136
+ return index_to_label
137
+
138
+
139
+ def extract_counts(
140
+ reader: MemoryCacheReader | FileCacheReader,
141
+ datums: pc.Expression | None = None,
142
+ groundtruths: pc.Expression | None = None,
143
+ predictions: pc.Expression | None = None,
144
+ ):
145
+ n_dts, n_gts, n_pds = 0, 0, 0
146
+ for tbl in reader.iterate_tables(filter=datums):
147
+ # count datums
148
+ n_dts += int(np.unique(tbl["datum_id"].to_numpy()).shape[0])
149
+
150
+ # count groundtruths
151
+ if groundtruths is not None:
152
+ gts = tbl.filter(groundtruths)["gt_id"].to_numpy()
153
+ else:
154
+ gts = tbl["gt_id"].to_numpy()
155
+ n_gts += int(np.unique(gts[gts >= 0]).shape[0])
156
+
157
+ # count predictions
158
+ if predictions is not None:
159
+ pds = tbl.filter(predictions)["pd_id"].to_numpy()
160
+ else:
161
+ pds = tbl["pd_id"].to_numpy()
162
+ n_pds += int(np.unique(pds[pds >= 0]).shape[0])
163
+
164
+ return n_dts, n_gts, n_pds
165
+
166
+
167
+ def extract_groundtruth_count_per_label(
168
+ reader: MemoryCacheReader | FileCacheReader,
169
+ number_of_labels: int,
170
+ datums: pc.Expression | None = None,
171
+ ) -> NDArray[np.uint64]:
172
+ gt_counts_per_lbl = np.zeros(number_of_labels, dtype=np.uint64)
173
+ for gts in reader.iterate_arrays(
174
+ numeric_columns=["gt_id", "gt_label_id"],
175
+ filter=datums,
176
+ ):
177
+ # count gts per label
178
+ unique_ann = np.unique(gts[gts[:, 0] >= 0], axis=0)
179
+ unique_labels, label_counts = np.unique(
180
+ unique_ann[:, 1], return_counts=True
181
+ )
182
+ for label_id, count in zip(unique_labels, label_counts):
183
+ gt_counts_per_lbl[int(label_id)] += int(count)
184
+
185
+ return gt_counts_per_lbl
@@ -0,0 +1,396 @@
1
+ from collections import defaultdict
2
+
3
+ import numpy as np
4
+ import pyarrow as pa
5
+ from numpy.typing import NDArray
6
+
7
+ from valor_lite.object_detection.metric import Metric, MetricType
8
+
9
+
10
+ def unpack_precision_recall_into_metric_lists(
11
+ counts: NDArray[np.uint64],
12
+ precision_recall_f1: NDArray[np.float64],
13
+ average_precision: NDArray[np.float64],
14
+ mean_average_precision: NDArray[np.float64],
15
+ average_recall: NDArray[np.float64],
16
+ mean_average_recall: NDArray[np.float64],
17
+ pr_curve: NDArray[np.float64],
18
+ iou_thresholds: list[float],
19
+ score_thresholds: list[float],
20
+ index_to_label: dict[int, str],
21
+ ):
22
+ metrics = defaultdict(list)
23
+
24
+ metrics[MetricType.AP] = [
25
+ Metric.average_precision(
26
+ value=float(average_precision[iou_idx][label_idx]),
27
+ iou_threshold=iou_threshold,
28
+ label=label,
29
+ )
30
+ for iou_idx, iou_threshold in enumerate(iou_thresholds)
31
+ for label_idx, label in index_to_label.items()
32
+ ]
33
+
34
+ metrics[MetricType.mAP] = [
35
+ Metric.mean_average_precision(
36
+ value=float(mean_average_precision[iou_idx]),
37
+ iou_threshold=iou_threshold,
38
+ )
39
+ for iou_idx, iou_threshold in enumerate(iou_thresholds)
40
+ ]
41
+
42
+ # TODO - (c.zaloom) will be removed in the future
43
+ metrics[MetricType.APAveragedOverIOUs] = [
44
+ Metric.average_precision_averaged_over_IOUs(
45
+ value=float(average_precision.mean(axis=0)[label_idx]),
46
+ iou_thresholds=iou_thresholds,
47
+ label=label,
48
+ )
49
+ for label_idx, label in index_to_label.items()
50
+ ]
51
+
52
+ # TODO - (c.zaloom) will be removed in the future
53
+ metrics[MetricType.mAPAveragedOverIOUs] = [
54
+ Metric.mean_average_precision_averaged_over_IOUs(
55
+ value=float(mean_average_precision.mean()),
56
+ iou_thresholds=iou_thresholds,
57
+ )
58
+ ]
59
+
60
+ metrics[MetricType.AR] = [
61
+ Metric.average_recall(
62
+ value=float(average_recall[score_idx, label_idx]),
63
+ iou_thresholds=iou_thresholds,
64
+ score_threshold=score_threshold,
65
+ label=label,
66
+ )
67
+ for score_idx, score_threshold in enumerate(score_thresholds)
68
+ for label_idx, label in index_to_label.items()
69
+ ]
70
+
71
+ metrics[MetricType.mAR] = [
72
+ Metric.mean_average_recall(
73
+ value=float(mean_average_recall[score_idx]),
74
+ iou_thresholds=iou_thresholds,
75
+ score_threshold=score_threshold,
76
+ )
77
+ for score_idx, score_threshold in enumerate(score_thresholds)
78
+ ]
79
+
80
+ # TODO - (c.zaloom) will be removed in the future
81
+ metrics[MetricType.ARAveragedOverScores] = [
82
+ Metric.average_recall_averaged_over_scores(
83
+ value=float(average_recall.mean(axis=0)[label_idx]),
84
+ score_thresholds=score_thresholds,
85
+ iou_thresholds=iou_thresholds,
86
+ label=label,
87
+ )
88
+ for label_idx, label in index_to_label.items()
89
+ ]
90
+
91
+ # TODO - (c.zaloom) will be removed in the future
92
+ metrics[MetricType.mARAveragedOverScores] = [
93
+ Metric.mean_average_recall_averaged_over_scores(
94
+ value=float(mean_average_recall.mean()),
95
+ score_thresholds=score_thresholds,
96
+ iou_thresholds=iou_thresholds,
97
+ )
98
+ ]
99
+
100
+ metrics[MetricType.PrecisionRecallCurve] = [
101
+ Metric.precision_recall_curve(
102
+ precisions=pr_curve[iou_idx, label_idx, :, 0].tolist(),
103
+ scores=pr_curve[iou_idx, label_idx, :, 1].tolist(),
104
+ iou_threshold=iou_threshold,
105
+ label=label,
106
+ )
107
+ for iou_idx, iou_threshold in enumerate(iou_thresholds)
108
+ for label_idx, label in index_to_label.items()
109
+ ]
110
+
111
+ for label_idx, label in index_to_label.items():
112
+ for score_idx, score_threshold in enumerate(score_thresholds):
113
+ for iou_idx, iou_threshold in enumerate(iou_thresholds):
114
+
115
+ row = counts[iou_idx, score_idx, :, label_idx]
116
+ kwargs = {
117
+ "label": label,
118
+ "iou_threshold": iou_threshold,
119
+ "score_threshold": score_threshold,
120
+ }
121
+ metrics[MetricType.Counts].append(
122
+ Metric.counts(
123
+ tp=int(row[0]),
124
+ fp=int(row[1]),
125
+ fn=int(row[2]),
126
+ **kwargs,
127
+ )
128
+ )
129
+
130
+ row = precision_recall_f1[iou_idx, score_idx, :, label_idx]
131
+ metrics[MetricType.Precision].append(
132
+ Metric.precision(
133
+ value=float(row[0]),
134
+ **kwargs,
135
+ )
136
+ )
137
+ metrics[MetricType.Recall].append(
138
+ Metric.recall(
139
+ value=float(row[1]),
140
+ **kwargs,
141
+ )
142
+ )
143
+ metrics[MetricType.F1].append(
144
+ Metric.f1_score(
145
+ value=float(row[2]),
146
+ **kwargs,
147
+ )
148
+ )
149
+
150
+ return metrics
151
+
152
+
153
+ def unpack_confusion_matrix(
154
+ confusion_matrices: NDArray[np.uint64],
155
+ unmatched_groundtruths: NDArray[np.uint64],
156
+ unmatched_predictions: NDArray[np.uint64],
157
+ index_to_label: dict[int, str],
158
+ iou_thresholds: list[float],
159
+ score_thresholds: list[float],
160
+ ) -> list[Metric]:
161
+ metrics = []
162
+ for iou_idx, iou_thresh in enumerate(iou_thresholds):
163
+ for score_idx, score_thresh in enumerate(score_thresholds):
164
+ cm_dict = {}
165
+ ugt_dict = {}
166
+ upd_dict = {}
167
+ for idx, label in index_to_label.items():
168
+ ugt_dict[label] = int(
169
+ unmatched_groundtruths[iou_idx, score_idx, idx]
170
+ )
171
+ upd_dict[label] = int(
172
+ unmatched_predictions[iou_idx, score_idx, idx]
173
+ )
174
+ for pidx, plabel in index_to_label.items():
175
+ if label not in cm_dict:
176
+ cm_dict[label] = {}
177
+ cm_dict[label][plabel] = int(
178
+ confusion_matrices[iou_idx, score_idx, idx, pidx]
179
+ )
180
+ metrics.append(
181
+ Metric.confusion_matrix(
182
+ confusion_matrix=cm_dict,
183
+ unmatched_ground_truths=ugt_dict,
184
+ unmatched_predictions=upd_dict,
185
+ iou_threshold=iou_thresh,
186
+ score_threshold=score_thresh,
187
+ )
188
+ )
189
+ return metrics
190
+
191
+
192
+ def create_mapping(
193
+ tbl: pa.Table,
194
+ pairs: NDArray[np.float64],
195
+ index: int,
196
+ id_col: str,
197
+ uid_col: str,
198
+ ) -> dict[int, str]:
199
+ col = pairs[:, index].astype(np.int64)
200
+ values, indices = np.unique(col, return_index=True)
201
+ indices = indices[values >= 0]
202
+ return {
203
+ tbl[id_col][idx].as_py(): tbl[uid_col][idx].as_py() for idx in indices
204
+ }
205
+
206
+
207
+ def unpack_examples(
208
+ detailed_pairs: NDArray[np.float64],
209
+ mask_tp: NDArray[np.bool_],
210
+ mask_fn: NDArray[np.bool_],
211
+ mask_fp: NDArray[np.bool_],
212
+ iou_thresholds: list[float],
213
+ score_thresholds: list[float],
214
+ index_to_datum_id: dict[int, str],
215
+ index_to_groundtruth_id: dict[int, str],
216
+ index_to_prediction_id: dict[int, str],
217
+ ) -> list[Metric]:
218
+ metrics = []
219
+ ids = detailed_pairs[:, :5].astype(np.int64)
220
+ unique_datums = np.unique(detailed_pairs[:, 0].astype(np.int64))
221
+ for datum_index in unique_datums:
222
+ mask_datum = detailed_pairs[:, 0] == datum_index
223
+ mask_datum_tp = mask_tp & mask_datum
224
+ mask_datum_fp = mask_fp & mask_datum
225
+ mask_datum_fn = mask_fn & mask_datum
226
+
227
+ datum_id = index_to_datum_id[datum_index]
228
+ for iou_idx, iou_thresh in enumerate(iou_thresholds):
229
+ for score_idx, score_thresh in enumerate(score_thresholds):
230
+
231
+ unique_tp = np.unique(
232
+ ids[np.ix_(mask_datum_tp[iou_idx, score_idx], (0, 1, 2, 3, 4))], axis=0 # type: ignore - numpy ix_ typing
233
+ )
234
+ unique_fp = np.unique(
235
+ ids[np.ix_(mask_datum_fp[iou_idx, score_idx], (0, 2, 4))], axis=0 # type: ignore - numpy ix_ typing
236
+ )
237
+ unique_fn = np.unique(
238
+ ids[np.ix_(mask_datum_fn[iou_idx, score_idx], (0, 1, 3))], axis=0 # type: ignore - numpy ix_ typing
239
+ )
240
+
241
+ tp = [
242
+ (
243
+ index_to_groundtruth_id[row[1]],
244
+ index_to_prediction_id[row[2]],
245
+ )
246
+ for row in unique_tp
247
+ ]
248
+ fp = [index_to_prediction_id[row[1]] for row in unique_fp]
249
+ fn = [index_to_groundtruth_id[row[1]] for row in unique_fn]
250
+ metrics.append(
251
+ Metric.examples(
252
+ datum_id=datum_id,
253
+ true_positives=tp,
254
+ false_negatives=fn,
255
+ false_positives=fp,
256
+ iou_threshold=iou_thresh,
257
+ score_threshold=score_thresh,
258
+ )
259
+ )
260
+ return metrics
261
+
262
+
263
+ def create_empty_confusion_matrix_with_examples(
264
+ iou_threhsold: float,
265
+ score_threshold: float,
266
+ index_to_label: dict[int, str],
267
+ ) -> Metric:
268
+ unmatched_groundtruths = dict()
269
+ unmatched_predictions = dict()
270
+ confusion_matrix = dict()
271
+ for label in index_to_label.values():
272
+ unmatched_groundtruths[label] = {"count": 0, "examples": []}
273
+ unmatched_predictions[label] = {"count": 0, "examples": []}
274
+ confusion_matrix[label] = {}
275
+ for plabel in index_to_label.values():
276
+ confusion_matrix[label][plabel] = {"count": 0, "examples": []}
277
+
278
+ return Metric.confusion_matrix_with_examples(
279
+ confusion_matrix=confusion_matrix,
280
+ unmatched_ground_truths=unmatched_groundtruths,
281
+ unmatched_predictions=unmatched_predictions,
282
+ iou_threshold=iou_threhsold,
283
+ score_threshold=score_threshold,
284
+ )
285
+
286
+
287
+ def _unpack_confusion_matrix_with_examples(
288
+ metric: Metric,
289
+ ids: NDArray[np.int32],
290
+ mask_matched: NDArray[np.bool_],
291
+ mask_fp_unmatched: NDArray[np.bool_],
292
+ mask_fn_unmatched: NDArray[np.bool_],
293
+ index_to_datum_id: dict[int, str],
294
+ index_to_groundtruth_id: dict[int, str],
295
+ index_to_prediction_id: dict[int, str],
296
+ index_to_label: dict[int, str],
297
+ ):
298
+ if not isinstance(metric.value, dict):
299
+ raise TypeError("expected metric to contain a dictionary value")
300
+
301
+ unique_matches = np.unique(
302
+ ids[np.ix_(mask_matched, (0, 1, 2, 3, 4))], axis=0 # type: ignore - numpy ix_ typing
303
+ )
304
+ unique_unmatched_predictions = np.unique(
305
+ ids[np.ix_(mask_fp_unmatched, (0, 2, 4))], axis=0 # type: ignore - numpy ix_ typing
306
+ )
307
+ unique_unmatched_groundtruths = np.unique(
308
+ ids[np.ix_(mask_fn_unmatched, (0, 1, 3))], axis=0 # type: ignore - numpy ix_ typing
309
+ )
310
+
311
+ n_matched = unique_matches.shape[0]
312
+ n_unmatched_predictions = unique_unmatched_predictions.shape[0]
313
+ n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
314
+ n_max = max(n_matched, n_unmatched_groundtruths, n_unmatched_predictions)
315
+
316
+ for idx in range(n_max):
317
+ if idx < n_unmatched_groundtruths:
318
+ label = index_to_label[unique_unmatched_groundtruths[idx, 2]]
319
+ metric.value["unmatched_ground_truths"][label]["count"] += 1
320
+ metric.value["unmatched_ground_truths"][label]["examples"].append(
321
+ {
322
+ "datum_id": index_to_datum_id[
323
+ unique_unmatched_groundtruths[idx, 0]
324
+ ],
325
+ "ground_truth_id": index_to_groundtruth_id[
326
+ unique_unmatched_groundtruths[idx, 1]
327
+ ],
328
+ }
329
+ )
330
+ if idx < n_unmatched_predictions:
331
+ label_id = unique_unmatched_predictions[idx, 2]
332
+ label = index_to_label[label_id]
333
+ metric.value["unmatched_predictions"][label]["count"] += 1
334
+ metric.value["unmatched_predictions"][label]["examples"].append(
335
+ {
336
+ "datum_id": index_to_datum_id[
337
+ unique_unmatched_predictions[idx, 0]
338
+ ],
339
+ "prediction_id": index_to_prediction_id[
340
+ unique_unmatched_predictions[idx, 1]
341
+ ],
342
+ }
343
+ )
344
+ if idx < n_matched:
345
+ glabel = index_to_label[unique_matches[idx, 3]]
346
+ plabel = index_to_label[unique_matches[idx, 4]]
347
+ metric.value["confusion_matrix"][glabel][plabel]["count"] += 1
348
+ metric.value["confusion_matrix"][glabel][plabel][
349
+ "examples"
350
+ ].append(
351
+ {
352
+ "datum_id": index_to_datum_id[unique_matches[idx, 0]],
353
+ "ground_truth_id": index_to_groundtruth_id[
354
+ unique_matches[idx, 1]
355
+ ],
356
+ "prediction_id": index_to_prediction_id[
357
+ unique_matches[idx, 2]
358
+ ],
359
+ }
360
+ )
361
+
362
+ return metric
363
+
364
+
365
+ def unpack_confusion_matrix_with_examples(
366
+ metrics: dict[int, dict[int, Metric]],
367
+ detailed_pairs: NDArray[np.float64],
368
+ mask_tp: NDArray[np.bool_],
369
+ mask_fp_fn_misclf: NDArray[np.bool_],
370
+ mask_fp_unmatched: NDArray[np.bool_],
371
+ mask_fn_unmatched: NDArray[np.bool_],
372
+ index_to_datum_id: dict[int, str],
373
+ index_to_groundtruth_id: dict[int, str],
374
+ index_to_prediction_id: dict[int, str],
375
+ index_to_label: dict[int, str],
376
+ ) -> list[Metric]:
377
+
378
+ ids = detailed_pairs[:, :5].astype(np.int32)
379
+
380
+ mask_matched = mask_tp | mask_fp_fn_misclf
381
+
382
+ return [
383
+ _unpack_confusion_matrix_with_examples(
384
+ metric=metric,
385
+ ids=ids,
386
+ mask_matched=mask_matched[iou_idx, score_idx],
387
+ mask_fp_unmatched=mask_fp_unmatched[iou_idx, score_idx],
388
+ mask_fn_unmatched=mask_fn_unmatched[iou_idx, score_idx],
389
+ index_to_datum_id=index_to_datum_id,
390
+ index_to_groundtruth_id=index_to_groundtruth_id,
391
+ index_to_prediction_id=index_to_prediction_id,
392
+ index_to_label=index_to_label,
393
+ )
394
+ for iou_idx, inner in metrics.items()
395
+ for score_idx, metric in inner.items()
396
+ ]
valor_lite/schemas.py ADDED
@@ -0,0 +1,11 @@
1
+ from dataclasses import asdict, dataclass
2
+
3
+
4
+ @dataclass
5
+ class BaseMetric:
6
+ type: str
7
+ value: int | float | dict
8
+ parameters: dict
9
+
10
+ def to_dict(self) -> dict:
11
+ return asdict(self)
@@ -0,0 +1,15 @@
1
+ from .annotation import Bitmask, Segmentation
2
+ from .evaluator import Builder, Evaluator, EvaluatorInfo
3
+ from .loader import Loader
4
+ from .metric import Metric, MetricType
5
+
6
+ __all__ = [
7
+ "Builder",
8
+ "Loader",
9
+ "Evaluator",
10
+ "Segmentation",
11
+ "Bitmask",
12
+ "Metric",
13
+ "MetricType",
14
+ "EvaluatorInfo",
15
+ ]