valor-lite 0.37.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/LICENSE +21 -0
- valor_lite/__init__.py +0 -0
- valor_lite/cache/__init__.py +11 -0
- valor_lite/cache/compute.py +154 -0
- valor_lite/cache/ephemeral.py +302 -0
- valor_lite/cache/persistent.py +529 -0
- valor_lite/classification/__init__.py +14 -0
- valor_lite/classification/annotation.py +45 -0
- valor_lite/classification/computation.py +378 -0
- valor_lite/classification/evaluator.py +879 -0
- valor_lite/classification/loader.py +97 -0
- valor_lite/classification/metric.py +535 -0
- valor_lite/classification/numpy_compatibility.py +13 -0
- valor_lite/classification/shared.py +184 -0
- valor_lite/classification/utilities.py +314 -0
- valor_lite/exceptions.py +20 -0
- valor_lite/object_detection/__init__.py +17 -0
- valor_lite/object_detection/annotation.py +238 -0
- valor_lite/object_detection/computation.py +841 -0
- valor_lite/object_detection/evaluator.py +805 -0
- valor_lite/object_detection/loader.py +292 -0
- valor_lite/object_detection/metric.py +850 -0
- valor_lite/object_detection/shared.py +185 -0
- valor_lite/object_detection/utilities.py +396 -0
- valor_lite/schemas.py +11 -0
- valor_lite/semantic_segmentation/__init__.py +15 -0
- valor_lite/semantic_segmentation/annotation.py +123 -0
- valor_lite/semantic_segmentation/computation.py +165 -0
- valor_lite/semantic_segmentation/evaluator.py +414 -0
- valor_lite/semantic_segmentation/loader.py +205 -0
- valor_lite/semantic_segmentation/metric.py +275 -0
- valor_lite/semantic_segmentation/shared.py +149 -0
- valor_lite/semantic_segmentation/utilities.py +88 -0
- valor_lite/text_generation/__init__.py +15 -0
- valor_lite/text_generation/annotation.py +56 -0
- valor_lite/text_generation/computation.py +611 -0
- valor_lite/text_generation/llm/__init__.py +0 -0
- valor_lite/text_generation/llm/exceptions.py +14 -0
- valor_lite/text_generation/llm/generation.py +903 -0
- valor_lite/text_generation/llm/instructions.py +814 -0
- valor_lite/text_generation/llm/integrations.py +226 -0
- valor_lite/text_generation/llm/utilities.py +43 -0
- valor_lite/text_generation/llm/validators.py +68 -0
- valor_lite/text_generation/manager.py +697 -0
- valor_lite/text_generation/metric.py +381 -0
- valor_lite-0.37.1.dist-info/METADATA +174 -0
- valor_lite-0.37.1.dist-info/RECORD +49 -0
- valor_lite-0.37.1.dist-info/WHEEL +5 -0
- valor_lite-0.37.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
import pyarrow.compute as pc
|
|
7
|
+
from numpy.typing import NDArray
|
|
8
|
+
|
|
9
|
+
from valor_lite.cache import FileCacheReader, MemoryCacheReader
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class EvaluatorInfo:
|
|
14
|
+
number_of_datums: int = 0
|
|
15
|
+
number_of_groundtruth_annotations: int = 0
|
|
16
|
+
number_of_prediction_annotations: int = 0
|
|
17
|
+
number_of_labels: int = 0
|
|
18
|
+
number_of_rows: int = 0
|
|
19
|
+
metadata_fields: list[tuple[str, str | pa.DataType]] | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def generate_detailed_cache_path(path: str | Path) -> Path:
|
|
23
|
+
return Path(path) / "detailed"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_ranked_cache_path(path: str | Path) -> Path:
|
|
27
|
+
return Path(path) / "ranked"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def generate_temporary_cache_path(path: str | Path) -> Path:
|
|
31
|
+
return Path(path) / "tmp"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def generate_metadata_path(path: str | Path) -> Path:
|
|
35
|
+
return Path(path) / "metadata.json"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def generate_detailed_schema(
|
|
39
|
+
metadata_fields: list[tuple[str, str | pa.DataType]] | None
|
|
40
|
+
) -> pa.Schema:
|
|
41
|
+
metadata_fields = metadata_fields if metadata_fields else []
|
|
42
|
+
reserved_fields = [
|
|
43
|
+
("datum_uid", pa.string()),
|
|
44
|
+
("datum_id", pa.int64()),
|
|
45
|
+
# groundtruth
|
|
46
|
+
("gt_uid", pa.string()),
|
|
47
|
+
("gt_id", pa.int64()),
|
|
48
|
+
("gt_label", pa.string()),
|
|
49
|
+
("gt_label_id", pa.int64()),
|
|
50
|
+
# prediction
|
|
51
|
+
("pd_uid", pa.string()),
|
|
52
|
+
("pd_id", pa.int64()),
|
|
53
|
+
("pd_label", pa.string()),
|
|
54
|
+
("pd_label_id", pa.int64()),
|
|
55
|
+
("pd_score", pa.float64()),
|
|
56
|
+
# pair
|
|
57
|
+
("iou", pa.float64()),
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
# validate
|
|
61
|
+
reserved_field_names = {f[0] for f in reserved_fields}
|
|
62
|
+
metadata_field_names = {f[0] for f in metadata_fields}
|
|
63
|
+
if conflicting := reserved_field_names & metadata_field_names:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"metadata fields {conflicting} conflict with reserved fields"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return pa.schema(reserved_fields + metadata_fields)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def generate_ranked_schema() -> pa.Schema:
|
|
72
|
+
reserved_fields = [
|
|
73
|
+
("datum_uid", pa.string()),
|
|
74
|
+
("datum_id", pa.int64()),
|
|
75
|
+
# groundtruth
|
|
76
|
+
("gt_id", pa.int64()),
|
|
77
|
+
("gt_label_id", pa.int64()),
|
|
78
|
+
# prediction
|
|
79
|
+
("pd_id", pa.int64()),
|
|
80
|
+
("pd_label_id", pa.int64()),
|
|
81
|
+
("pd_score", pa.float64()),
|
|
82
|
+
# pair
|
|
83
|
+
("iou", pa.float64()),
|
|
84
|
+
("high_score", pa.bool_()),
|
|
85
|
+
("iou_prev", pa.float64()),
|
|
86
|
+
]
|
|
87
|
+
return pa.schema(reserved_fields)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def encode_metadata_fields(
|
|
91
|
+
metadata_fields: list[tuple[str, str | pa.DataType]] | None
|
|
92
|
+
) -> dict[str, str]:
|
|
93
|
+
metadata_fields = metadata_fields if metadata_fields else []
|
|
94
|
+
return {k: str(v) for k, v in metadata_fields}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def decode_metadata_fields(
|
|
98
|
+
encoded_metadata_fields: dict[str, str]
|
|
99
|
+
) -> list[tuple[str, str]]:
|
|
100
|
+
return [(k, v) for k, v in encoded_metadata_fields.items()]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def extract_labels(
|
|
104
|
+
reader: MemoryCacheReader | FileCacheReader,
|
|
105
|
+
index_to_label_override: dict[int, str] | None = None,
|
|
106
|
+
) -> dict[int, str]:
|
|
107
|
+
if index_to_label_override is not None:
|
|
108
|
+
return index_to_label_override
|
|
109
|
+
|
|
110
|
+
index_to_label = {}
|
|
111
|
+
for tbl in reader.iterate_tables(
|
|
112
|
+
columns=[
|
|
113
|
+
"gt_label_id",
|
|
114
|
+
"gt_label",
|
|
115
|
+
"pd_label_id",
|
|
116
|
+
"pd_label",
|
|
117
|
+
]
|
|
118
|
+
):
|
|
119
|
+
|
|
120
|
+
# get gt labels
|
|
121
|
+
gt_label_ids = tbl["gt_label_id"].to_numpy()
|
|
122
|
+
gt_label_ids, gt_indices = np.unique(gt_label_ids, return_index=True)
|
|
123
|
+
gt_labels = tbl["gt_label"].take(gt_indices).to_pylist()
|
|
124
|
+
gt_labels = dict(zip(gt_label_ids.astype(int).tolist(), gt_labels))
|
|
125
|
+
gt_labels.pop(-1, None)
|
|
126
|
+
index_to_label.update(gt_labels)
|
|
127
|
+
|
|
128
|
+
# get pd labels
|
|
129
|
+
pd_label_ids = tbl["pd_label_id"].to_numpy()
|
|
130
|
+
pd_label_ids, pd_indices = np.unique(pd_label_ids, return_index=True)
|
|
131
|
+
pd_labels = tbl["pd_label"].take(pd_indices).to_pylist()
|
|
132
|
+
pd_labels = dict(zip(pd_label_ids.astype(int).tolist(), pd_labels))
|
|
133
|
+
pd_labels.pop(-1, None)
|
|
134
|
+
index_to_label.update(pd_labels)
|
|
135
|
+
|
|
136
|
+
return index_to_label
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def extract_counts(
|
|
140
|
+
reader: MemoryCacheReader | FileCacheReader,
|
|
141
|
+
datums: pc.Expression | None = None,
|
|
142
|
+
groundtruths: pc.Expression | None = None,
|
|
143
|
+
predictions: pc.Expression | None = None,
|
|
144
|
+
):
|
|
145
|
+
n_dts, n_gts, n_pds = 0, 0, 0
|
|
146
|
+
for tbl in reader.iterate_tables(filter=datums):
|
|
147
|
+
# count datums
|
|
148
|
+
n_dts += int(np.unique(tbl["datum_id"].to_numpy()).shape[0])
|
|
149
|
+
|
|
150
|
+
# count groundtruths
|
|
151
|
+
if groundtruths is not None:
|
|
152
|
+
gts = tbl.filter(groundtruths)["gt_id"].to_numpy()
|
|
153
|
+
else:
|
|
154
|
+
gts = tbl["gt_id"].to_numpy()
|
|
155
|
+
n_gts += int(np.unique(gts[gts >= 0]).shape[0])
|
|
156
|
+
|
|
157
|
+
# count predictions
|
|
158
|
+
if predictions is not None:
|
|
159
|
+
pds = tbl.filter(predictions)["pd_id"].to_numpy()
|
|
160
|
+
else:
|
|
161
|
+
pds = tbl["pd_id"].to_numpy()
|
|
162
|
+
n_pds += int(np.unique(pds[pds >= 0]).shape[0])
|
|
163
|
+
|
|
164
|
+
return n_dts, n_gts, n_pds
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def extract_groundtruth_count_per_label(
|
|
168
|
+
reader: MemoryCacheReader | FileCacheReader,
|
|
169
|
+
number_of_labels: int,
|
|
170
|
+
datums: pc.Expression | None = None,
|
|
171
|
+
) -> NDArray[np.uint64]:
|
|
172
|
+
gt_counts_per_lbl = np.zeros(number_of_labels, dtype=np.uint64)
|
|
173
|
+
for gts in reader.iterate_arrays(
|
|
174
|
+
numeric_columns=["gt_id", "gt_label_id"],
|
|
175
|
+
filter=datums,
|
|
176
|
+
):
|
|
177
|
+
# count gts per label
|
|
178
|
+
unique_ann = np.unique(gts[gts[:, 0] >= 0], axis=0)
|
|
179
|
+
unique_labels, label_counts = np.unique(
|
|
180
|
+
unique_ann[:, 1], return_counts=True
|
|
181
|
+
)
|
|
182
|
+
for label_id, count in zip(unique_labels, label_counts):
|
|
183
|
+
gt_counts_per_lbl[int(label_id)] += int(count)
|
|
184
|
+
|
|
185
|
+
return gt_counts_per_lbl
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pyarrow as pa
|
|
5
|
+
from numpy.typing import NDArray
|
|
6
|
+
|
|
7
|
+
from valor_lite.object_detection.metric import Metric, MetricType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def unpack_precision_recall_into_metric_lists(
|
|
11
|
+
counts: NDArray[np.uint64],
|
|
12
|
+
precision_recall_f1: NDArray[np.float64],
|
|
13
|
+
average_precision: NDArray[np.float64],
|
|
14
|
+
mean_average_precision: NDArray[np.float64],
|
|
15
|
+
average_recall: NDArray[np.float64],
|
|
16
|
+
mean_average_recall: NDArray[np.float64],
|
|
17
|
+
pr_curve: NDArray[np.float64],
|
|
18
|
+
iou_thresholds: list[float],
|
|
19
|
+
score_thresholds: list[float],
|
|
20
|
+
index_to_label: dict[int, str],
|
|
21
|
+
):
|
|
22
|
+
metrics = defaultdict(list)
|
|
23
|
+
|
|
24
|
+
metrics[MetricType.AP] = [
|
|
25
|
+
Metric.average_precision(
|
|
26
|
+
value=float(average_precision[iou_idx][label_idx]),
|
|
27
|
+
iou_threshold=iou_threshold,
|
|
28
|
+
label=label,
|
|
29
|
+
)
|
|
30
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
31
|
+
for label_idx, label in index_to_label.items()
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
metrics[MetricType.mAP] = [
|
|
35
|
+
Metric.mean_average_precision(
|
|
36
|
+
value=float(mean_average_precision[iou_idx]),
|
|
37
|
+
iou_threshold=iou_threshold,
|
|
38
|
+
)
|
|
39
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
# TODO - (c.zaloom) will be removed in the future
|
|
43
|
+
metrics[MetricType.APAveragedOverIOUs] = [
|
|
44
|
+
Metric.average_precision_averaged_over_IOUs(
|
|
45
|
+
value=float(average_precision.mean(axis=0)[label_idx]),
|
|
46
|
+
iou_thresholds=iou_thresholds,
|
|
47
|
+
label=label,
|
|
48
|
+
)
|
|
49
|
+
for label_idx, label in index_to_label.items()
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
# TODO - (c.zaloom) will be removed in the future
|
|
53
|
+
metrics[MetricType.mAPAveragedOverIOUs] = [
|
|
54
|
+
Metric.mean_average_precision_averaged_over_IOUs(
|
|
55
|
+
value=float(mean_average_precision.mean()),
|
|
56
|
+
iou_thresholds=iou_thresholds,
|
|
57
|
+
)
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
metrics[MetricType.AR] = [
|
|
61
|
+
Metric.average_recall(
|
|
62
|
+
value=float(average_recall[score_idx, label_idx]),
|
|
63
|
+
iou_thresholds=iou_thresholds,
|
|
64
|
+
score_threshold=score_threshold,
|
|
65
|
+
label=label,
|
|
66
|
+
)
|
|
67
|
+
for score_idx, score_threshold in enumerate(score_thresholds)
|
|
68
|
+
for label_idx, label in index_to_label.items()
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
metrics[MetricType.mAR] = [
|
|
72
|
+
Metric.mean_average_recall(
|
|
73
|
+
value=float(mean_average_recall[score_idx]),
|
|
74
|
+
iou_thresholds=iou_thresholds,
|
|
75
|
+
score_threshold=score_threshold,
|
|
76
|
+
)
|
|
77
|
+
for score_idx, score_threshold in enumerate(score_thresholds)
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
# TODO - (c.zaloom) will be removed in the future
|
|
81
|
+
metrics[MetricType.ARAveragedOverScores] = [
|
|
82
|
+
Metric.average_recall_averaged_over_scores(
|
|
83
|
+
value=float(average_recall.mean(axis=0)[label_idx]),
|
|
84
|
+
score_thresholds=score_thresholds,
|
|
85
|
+
iou_thresholds=iou_thresholds,
|
|
86
|
+
label=label,
|
|
87
|
+
)
|
|
88
|
+
for label_idx, label in index_to_label.items()
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# TODO - (c.zaloom) will be removed in the future
|
|
92
|
+
metrics[MetricType.mARAveragedOverScores] = [
|
|
93
|
+
Metric.mean_average_recall_averaged_over_scores(
|
|
94
|
+
value=float(mean_average_recall.mean()),
|
|
95
|
+
score_thresholds=score_thresholds,
|
|
96
|
+
iou_thresholds=iou_thresholds,
|
|
97
|
+
)
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
metrics[MetricType.PrecisionRecallCurve] = [
|
|
101
|
+
Metric.precision_recall_curve(
|
|
102
|
+
precisions=pr_curve[iou_idx, label_idx, :, 0].tolist(),
|
|
103
|
+
scores=pr_curve[iou_idx, label_idx, :, 1].tolist(),
|
|
104
|
+
iou_threshold=iou_threshold,
|
|
105
|
+
label=label,
|
|
106
|
+
)
|
|
107
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
108
|
+
for label_idx, label in index_to_label.items()
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
for label_idx, label in index_to_label.items():
|
|
112
|
+
for score_idx, score_threshold in enumerate(score_thresholds):
|
|
113
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds):
|
|
114
|
+
|
|
115
|
+
row = counts[iou_idx, score_idx, :, label_idx]
|
|
116
|
+
kwargs = {
|
|
117
|
+
"label": label,
|
|
118
|
+
"iou_threshold": iou_threshold,
|
|
119
|
+
"score_threshold": score_threshold,
|
|
120
|
+
}
|
|
121
|
+
metrics[MetricType.Counts].append(
|
|
122
|
+
Metric.counts(
|
|
123
|
+
tp=int(row[0]),
|
|
124
|
+
fp=int(row[1]),
|
|
125
|
+
fn=int(row[2]),
|
|
126
|
+
**kwargs,
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
row = precision_recall_f1[iou_idx, score_idx, :, label_idx]
|
|
131
|
+
metrics[MetricType.Precision].append(
|
|
132
|
+
Metric.precision(
|
|
133
|
+
value=float(row[0]),
|
|
134
|
+
**kwargs,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
metrics[MetricType.Recall].append(
|
|
138
|
+
Metric.recall(
|
|
139
|
+
value=float(row[1]),
|
|
140
|
+
**kwargs,
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
metrics[MetricType.F1].append(
|
|
144
|
+
Metric.f1_score(
|
|
145
|
+
value=float(row[2]),
|
|
146
|
+
**kwargs,
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return metrics
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def unpack_confusion_matrix(
|
|
154
|
+
confusion_matrices: NDArray[np.uint64],
|
|
155
|
+
unmatched_groundtruths: NDArray[np.uint64],
|
|
156
|
+
unmatched_predictions: NDArray[np.uint64],
|
|
157
|
+
index_to_label: dict[int, str],
|
|
158
|
+
iou_thresholds: list[float],
|
|
159
|
+
score_thresholds: list[float],
|
|
160
|
+
) -> list[Metric]:
|
|
161
|
+
metrics = []
|
|
162
|
+
for iou_idx, iou_thresh in enumerate(iou_thresholds):
|
|
163
|
+
for score_idx, score_thresh in enumerate(score_thresholds):
|
|
164
|
+
cm_dict = {}
|
|
165
|
+
ugt_dict = {}
|
|
166
|
+
upd_dict = {}
|
|
167
|
+
for idx, label in index_to_label.items():
|
|
168
|
+
ugt_dict[label] = int(
|
|
169
|
+
unmatched_groundtruths[iou_idx, score_idx, idx]
|
|
170
|
+
)
|
|
171
|
+
upd_dict[label] = int(
|
|
172
|
+
unmatched_predictions[iou_idx, score_idx, idx]
|
|
173
|
+
)
|
|
174
|
+
for pidx, plabel in index_to_label.items():
|
|
175
|
+
if label not in cm_dict:
|
|
176
|
+
cm_dict[label] = {}
|
|
177
|
+
cm_dict[label][plabel] = int(
|
|
178
|
+
confusion_matrices[iou_idx, score_idx, idx, pidx]
|
|
179
|
+
)
|
|
180
|
+
metrics.append(
|
|
181
|
+
Metric.confusion_matrix(
|
|
182
|
+
confusion_matrix=cm_dict,
|
|
183
|
+
unmatched_ground_truths=ugt_dict,
|
|
184
|
+
unmatched_predictions=upd_dict,
|
|
185
|
+
iou_threshold=iou_thresh,
|
|
186
|
+
score_threshold=score_thresh,
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
return metrics
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def create_mapping(
|
|
193
|
+
tbl: pa.Table,
|
|
194
|
+
pairs: NDArray[np.float64],
|
|
195
|
+
index: int,
|
|
196
|
+
id_col: str,
|
|
197
|
+
uid_col: str,
|
|
198
|
+
) -> dict[int, str]:
|
|
199
|
+
col = pairs[:, index].astype(np.int64)
|
|
200
|
+
values, indices = np.unique(col, return_index=True)
|
|
201
|
+
indices = indices[values >= 0]
|
|
202
|
+
return {
|
|
203
|
+
tbl[id_col][idx].as_py(): tbl[uid_col][idx].as_py() for idx in indices
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def unpack_examples(
|
|
208
|
+
detailed_pairs: NDArray[np.float64],
|
|
209
|
+
mask_tp: NDArray[np.bool_],
|
|
210
|
+
mask_fn: NDArray[np.bool_],
|
|
211
|
+
mask_fp: NDArray[np.bool_],
|
|
212
|
+
iou_thresholds: list[float],
|
|
213
|
+
score_thresholds: list[float],
|
|
214
|
+
index_to_datum_id: dict[int, str],
|
|
215
|
+
index_to_groundtruth_id: dict[int, str],
|
|
216
|
+
index_to_prediction_id: dict[int, str],
|
|
217
|
+
) -> list[Metric]:
|
|
218
|
+
metrics = []
|
|
219
|
+
ids = detailed_pairs[:, :5].astype(np.int64)
|
|
220
|
+
unique_datums = np.unique(detailed_pairs[:, 0].astype(np.int64))
|
|
221
|
+
for datum_index in unique_datums:
|
|
222
|
+
mask_datum = detailed_pairs[:, 0] == datum_index
|
|
223
|
+
mask_datum_tp = mask_tp & mask_datum
|
|
224
|
+
mask_datum_fp = mask_fp & mask_datum
|
|
225
|
+
mask_datum_fn = mask_fn & mask_datum
|
|
226
|
+
|
|
227
|
+
datum_id = index_to_datum_id[datum_index]
|
|
228
|
+
for iou_idx, iou_thresh in enumerate(iou_thresholds):
|
|
229
|
+
for score_idx, score_thresh in enumerate(score_thresholds):
|
|
230
|
+
|
|
231
|
+
unique_tp = np.unique(
|
|
232
|
+
ids[np.ix_(mask_datum_tp[iou_idx, score_idx], (0, 1, 2, 3, 4))], axis=0 # type: ignore - numpy ix_ typing
|
|
233
|
+
)
|
|
234
|
+
unique_fp = np.unique(
|
|
235
|
+
ids[np.ix_(mask_datum_fp[iou_idx, score_idx], (0, 2, 4))], axis=0 # type: ignore - numpy ix_ typing
|
|
236
|
+
)
|
|
237
|
+
unique_fn = np.unique(
|
|
238
|
+
ids[np.ix_(mask_datum_fn[iou_idx, score_idx], (0, 1, 3))], axis=0 # type: ignore - numpy ix_ typing
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
tp = [
|
|
242
|
+
(
|
|
243
|
+
index_to_groundtruth_id[row[1]],
|
|
244
|
+
index_to_prediction_id[row[2]],
|
|
245
|
+
)
|
|
246
|
+
for row in unique_tp
|
|
247
|
+
]
|
|
248
|
+
fp = [index_to_prediction_id[row[1]] for row in unique_fp]
|
|
249
|
+
fn = [index_to_groundtruth_id[row[1]] for row in unique_fn]
|
|
250
|
+
metrics.append(
|
|
251
|
+
Metric.examples(
|
|
252
|
+
datum_id=datum_id,
|
|
253
|
+
true_positives=tp,
|
|
254
|
+
false_negatives=fn,
|
|
255
|
+
false_positives=fp,
|
|
256
|
+
iou_threshold=iou_thresh,
|
|
257
|
+
score_threshold=score_thresh,
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
return metrics
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def create_empty_confusion_matrix_with_examples(
|
|
264
|
+
iou_threhsold: float,
|
|
265
|
+
score_threshold: float,
|
|
266
|
+
index_to_label: dict[int, str],
|
|
267
|
+
) -> Metric:
|
|
268
|
+
unmatched_groundtruths = dict()
|
|
269
|
+
unmatched_predictions = dict()
|
|
270
|
+
confusion_matrix = dict()
|
|
271
|
+
for label in index_to_label.values():
|
|
272
|
+
unmatched_groundtruths[label] = {"count": 0, "examples": []}
|
|
273
|
+
unmatched_predictions[label] = {"count": 0, "examples": []}
|
|
274
|
+
confusion_matrix[label] = {}
|
|
275
|
+
for plabel in index_to_label.values():
|
|
276
|
+
confusion_matrix[label][plabel] = {"count": 0, "examples": []}
|
|
277
|
+
|
|
278
|
+
return Metric.confusion_matrix_with_examples(
|
|
279
|
+
confusion_matrix=confusion_matrix,
|
|
280
|
+
unmatched_ground_truths=unmatched_groundtruths,
|
|
281
|
+
unmatched_predictions=unmatched_predictions,
|
|
282
|
+
iou_threshold=iou_threhsold,
|
|
283
|
+
score_threshold=score_threshold,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _unpack_confusion_matrix_with_examples(
|
|
288
|
+
metric: Metric,
|
|
289
|
+
ids: NDArray[np.int32],
|
|
290
|
+
mask_matched: NDArray[np.bool_],
|
|
291
|
+
mask_fp_unmatched: NDArray[np.bool_],
|
|
292
|
+
mask_fn_unmatched: NDArray[np.bool_],
|
|
293
|
+
index_to_datum_id: dict[int, str],
|
|
294
|
+
index_to_groundtruth_id: dict[int, str],
|
|
295
|
+
index_to_prediction_id: dict[int, str],
|
|
296
|
+
index_to_label: dict[int, str],
|
|
297
|
+
):
|
|
298
|
+
if not isinstance(metric.value, dict):
|
|
299
|
+
raise TypeError("expected metric to contain a dictionary value")
|
|
300
|
+
|
|
301
|
+
unique_matches = np.unique(
|
|
302
|
+
ids[np.ix_(mask_matched, (0, 1, 2, 3, 4))], axis=0 # type: ignore - numpy ix_ typing
|
|
303
|
+
)
|
|
304
|
+
unique_unmatched_predictions = np.unique(
|
|
305
|
+
ids[np.ix_(mask_fp_unmatched, (0, 2, 4))], axis=0 # type: ignore - numpy ix_ typing
|
|
306
|
+
)
|
|
307
|
+
unique_unmatched_groundtruths = np.unique(
|
|
308
|
+
ids[np.ix_(mask_fn_unmatched, (0, 1, 3))], axis=0 # type: ignore - numpy ix_ typing
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
n_matched = unique_matches.shape[0]
|
|
312
|
+
n_unmatched_predictions = unique_unmatched_predictions.shape[0]
|
|
313
|
+
n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
|
|
314
|
+
n_max = max(n_matched, n_unmatched_groundtruths, n_unmatched_predictions)
|
|
315
|
+
|
|
316
|
+
for idx in range(n_max):
|
|
317
|
+
if idx < n_unmatched_groundtruths:
|
|
318
|
+
label = index_to_label[unique_unmatched_groundtruths[idx, 2]]
|
|
319
|
+
metric.value["unmatched_ground_truths"][label]["count"] += 1
|
|
320
|
+
metric.value["unmatched_ground_truths"][label]["examples"].append(
|
|
321
|
+
{
|
|
322
|
+
"datum_id": index_to_datum_id[
|
|
323
|
+
unique_unmatched_groundtruths[idx, 0]
|
|
324
|
+
],
|
|
325
|
+
"ground_truth_id": index_to_groundtruth_id[
|
|
326
|
+
unique_unmatched_groundtruths[idx, 1]
|
|
327
|
+
],
|
|
328
|
+
}
|
|
329
|
+
)
|
|
330
|
+
if idx < n_unmatched_predictions:
|
|
331
|
+
label_id = unique_unmatched_predictions[idx, 2]
|
|
332
|
+
label = index_to_label[label_id]
|
|
333
|
+
metric.value["unmatched_predictions"][label]["count"] += 1
|
|
334
|
+
metric.value["unmatched_predictions"][label]["examples"].append(
|
|
335
|
+
{
|
|
336
|
+
"datum_id": index_to_datum_id[
|
|
337
|
+
unique_unmatched_predictions[idx, 0]
|
|
338
|
+
],
|
|
339
|
+
"prediction_id": index_to_prediction_id[
|
|
340
|
+
unique_unmatched_predictions[idx, 1]
|
|
341
|
+
],
|
|
342
|
+
}
|
|
343
|
+
)
|
|
344
|
+
if idx < n_matched:
|
|
345
|
+
glabel = index_to_label[unique_matches[idx, 3]]
|
|
346
|
+
plabel = index_to_label[unique_matches[idx, 4]]
|
|
347
|
+
metric.value["confusion_matrix"][glabel][plabel]["count"] += 1
|
|
348
|
+
metric.value["confusion_matrix"][glabel][plabel][
|
|
349
|
+
"examples"
|
|
350
|
+
].append(
|
|
351
|
+
{
|
|
352
|
+
"datum_id": index_to_datum_id[unique_matches[idx, 0]],
|
|
353
|
+
"ground_truth_id": index_to_groundtruth_id[
|
|
354
|
+
unique_matches[idx, 1]
|
|
355
|
+
],
|
|
356
|
+
"prediction_id": index_to_prediction_id[
|
|
357
|
+
unique_matches[idx, 2]
|
|
358
|
+
],
|
|
359
|
+
}
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
return metric
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def unpack_confusion_matrix_with_examples(
|
|
366
|
+
metrics: dict[int, dict[int, Metric]],
|
|
367
|
+
detailed_pairs: NDArray[np.float64],
|
|
368
|
+
mask_tp: NDArray[np.bool_],
|
|
369
|
+
mask_fp_fn_misclf: NDArray[np.bool_],
|
|
370
|
+
mask_fp_unmatched: NDArray[np.bool_],
|
|
371
|
+
mask_fn_unmatched: NDArray[np.bool_],
|
|
372
|
+
index_to_datum_id: dict[int, str],
|
|
373
|
+
index_to_groundtruth_id: dict[int, str],
|
|
374
|
+
index_to_prediction_id: dict[int, str],
|
|
375
|
+
index_to_label: dict[int, str],
|
|
376
|
+
) -> list[Metric]:
|
|
377
|
+
|
|
378
|
+
ids = detailed_pairs[:, :5].astype(np.int32)
|
|
379
|
+
|
|
380
|
+
mask_matched = mask_tp | mask_fp_fn_misclf
|
|
381
|
+
|
|
382
|
+
return [
|
|
383
|
+
_unpack_confusion_matrix_with_examples(
|
|
384
|
+
metric=metric,
|
|
385
|
+
ids=ids,
|
|
386
|
+
mask_matched=mask_matched[iou_idx, score_idx],
|
|
387
|
+
mask_fp_unmatched=mask_fp_unmatched[iou_idx, score_idx],
|
|
388
|
+
mask_fn_unmatched=mask_fn_unmatched[iou_idx, score_idx],
|
|
389
|
+
index_to_datum_id=index_to_datum_id,
|
|
390
|
+
index_to_groundtruth_id=index_to_groundtruth_id,
|
|
391
|
+
index_to_prediction_id=index_to_prediction_id,
|
|
392
|
+
index_to_label=index_to_label,
|
|
393
|
+
)
|
|
394
|
+
for iou_idx, inner in metrics.items()
|
|
395
|
+
for score_idx, metric in inner.items()
|
|
396
|
+
]
|
valor_lite/schemas.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .annotation import Bitmask, Segmentation
|
|
2
|
+
from .evaluator import Builder, Evaluator, EvaluatorInfo
|
|
3
|
+
from .loader import Loader
|
|
4
|
+
from .metric import Metric, MetricType
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"Builder",
|
|
8
|
+
"Loader",
|
|
9
|
+
"Evaluator",
|
|
10
|
+
"Segmentation",
|
|
11
|
+
"Bitmask",
|
|
12
|
+
"Metric",
|
|
13
|
+
"MetricType",
|
|
14
|
+
"EvaluatorInfo",
|
|
15
|
+
]
|