valor-lite 0.36.6__py3-none-any.whl → 0.37.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. valor_lite/cache/__init__.py +11 -0
  2. valor_lite/cache/compute.py +211 -0
  3. valor_lite/cache/ephemeral.py +302 -0
  4. valor_lite/cache/persistent.py +536 -0
  5. valor_lite/classification/__init__.py +5 -10
  6. valor_lite/classification/annotation.py +4 -0
  7. valor_lite/classification/computation.py +233 -251
  8. valor_lite/classification/evaluator.py +882 -0
  9. valor_lite/classification/loader.py +97 -0
  10. valor_lite/classification/metric.py +141 -4
  11. valor_lite/classification/shared.py +184 -0
  12. valor_lite/classification/utilities.py +221 -118
  13. valor_lite/exceptions.py +5 -0
  14. valor_lite/object_detection/__init__.py +5 -4
  15. valor_lite/object_detection/annotation.py +13 -1
  16. valor_lite/object_detection/computation.py +368 -299
  17. valor_lite/object_detection/evaluator.py +804 -0
  18. valor_lite/object_detection/loader.py +292 -0
  19. valor_lite/object_detection/metric.py +152 -3
  20. valor_lite/object_detection/shared.py +206 -0
  21. valor_lite/object_detection/utilities.py +182 -100
  22. valor_lite/semantic_segmentation/__init__.py +5 -4
  23. valor_lite/semantic_segmentation/annotation.py +7 -0
  24. valor_lite/semantic_segmentation/computation.py +20 -110
  25. valor_lite/semantic_segmentation/evaluator.py +414 -0
  26. valor_lite/semantic_segmentation/loader.py +205 -0
  27. valor_lite/semantic_segmentation/shared.py +149 -0
  28. valor_lite/semantic_segmentation/utilities.py +6 -23
  29. {valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
  30. valor_lite-0.37.5.dist-info/RECORD +49 -0
  31. {valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
  32. valor_lite/classification/manager.py +0 -545
  33. valor_lite/object_detection/manager.py +0 -864
  34. valor_lite/profiling.py +0 -374
  35. valor_lite/semantic_segmentation/benchmark.py +0 -237
  36. valor_lite/semantic_segmentation/manager.py +0 -446
  37. valor_lite-0.36.6.dist-info/RECORD +0 -41
  38. {valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0
@@ -1,54 +1,25 @@
1
1
  from collections import defaultdict
2
2
 
3
3
  import numpy as np
4
+ import pyarrow as pa
4
5
  from numpy.typing import NDArray
5
6
 
6
- from valor_lite.classification.computation import PairClassification
7
7
  from valor_lite.classification.metric import Metric, MetricType
8
8
 
9
9
 
10
- def unpack_precision_recall_rocauc_into_metric_lists(
11
- results: tuple[
12
- NDArray[np.int32],
13
- NDArray[np.float64],
14
- NDArray[np.float64],
15
- NDArray[np.float64],
16
- NDArray[np.float64],
17
- NDArray[np.float64],
18
- float,
19
- ],
10
+ def unpack_precision_recall(
11
+ counts: NDArray[np.uint64],
12
+ precision: NDArray[np.float64],
13
+ recall: NDArray[np.float64],
14
+ accuracy: NDArray[np.float64],
15
+ f1_score: NDArray[np.float64],
20
16
  score_thresholds: list[float],
21
17
  hardmax: bool,
22
- label_metadata: NDArray[np.int32],
23
- index_to_label: list[str],
18
+ index_to_label: dict[int, str],
24
19
  ) -> dict[MetricType, list[Metric]]:
25
- (
26
- counts,
27
- precision,
28
- recall,
29
- accuracy,
30
- f1_score,
31
- rocauc,
32
- mean_rocauc,
33
- ) = results
34
20
 
35
21
  metrics = defaultdict(list)
36
22
 
37
- metrics[MetricType.ROCAUC] = [
38
- Metric.roc_auc(
39
- value=float(rocauc[label_idx]),
40
- label=label,
41
- )
42
- for label_idx, label in enumerate(index_to_label)
43
- if label_metadata[label_idx, 0] > 0
44
- ]
45
-
46
- metrics[MetricType.mROCAUC] = [
47
- Metric.mean_roc_auc(
48
- value=float(mean_rocauc),
49
- )
50
- ]
51
-
52
23
  metrics[MetricType.Accuracy] = [
53
24
  Metric.accuracy(
54
25
  value=float(accuracy[score_idx]),
@@ -58,9 +29,8 @@ def unpack_precision_recall_rocauc_into_metric_lists(
58
29
  for score_idx, score_threshold in enumerate(score_thresholds)
59
30
  ]
60
31
 
61
- for label_idx, label in enumerate(index_to_label):
32
+ for label_idx, label in index_to_label.items():
62
33
  for score_idx, score_threshold in enumerate(score_thresholds):
63
-
64
34
  kwargs = {
65
35
  "label": label,
66
36
  "hardmax": hardmax,
@@ -77,10 +47,6 @@ def unpack_precision_recall_rocauc_into_metric_lists(
77
47
  )
78
48
  )
79
49
 
80
- # if no groundtruths exists for a label, skip it.
81
- if label_metadata[label_idx, 0] == 0:
82
- continue
83
-
84
50
  metrics[MetricType.Precision].append(
85
51
  Metric.precision(
86
52
  value=float(precision[score_idx, label_idx]),
@@ -102,110 +68,247 @@ def unpack_precision_recall_rocauc_into_metric_lists(
102
68
  return metrics
103
69
 
104
70
 
105
- def _create_empty_confusion_matrix(index_to_labels: list[str]):
106
- unmatched_ground_truths = dict()
71
+ def unpack_rocauc(
72
+ rocauc: NDArray[np.float64],
73
+ mean_rocauc: float,
74
+ index_to_label: dict[int, str],
75
+ ) -> dict[MetricType, list[Metric]]:
76
+ metrics = {}
77
+ metrics[MetricType.ROCAUC] = [
78
+ Metric.roc_auc(
79
+ value=float(rocauc[label_idx]),
80
+ label=label,
81
+ )
82
+ for label_idx, label in index_to_label.items()
83
+ ]
84
+ metrics[MetricType.mROCAUC] = [
85
+ Metric.mean_roc_auc(
86
+ value=float(mean_rocauc),
87
+ )
88
+ ]
89
+ return metrics
90
+
91
+
92
+ def unpack_confusion_matrix(
93
+ confusion_matrices: NDArray[np.uint64],
94
+ unmatched_groundtruths: NDArray[np.uint64],
95
+ index_to_label: dict[int, str],
96
+ score_thresholds: list[float],
97
+ hardmax: bool,
98
+ ) -> list[Metric]:
99
+ metrics = []
100
+ for score_idx, score_thresh in enumerate(score_thresholds):
101
+ cm_dict = {}
102
+ ugt_dict = {}
103
+ for idx, label in index_to_label.items():
104
+ ugt_dict[label] = int(unmatched_groundtruths[score_idx, idx])
105
+ for pidx, plabel in index_to_label.items():
106
+ if label not in cm_dict:
107
+ cm_dict[label] = {}
108
+ cm_dict[label][plabel] = int(
109
+ confusion_matrices[score_idx, idx, pidx]
110
+ )
111
+ metrics.append(
112
+ Metric.confusion_matrix(
113
+ confusion_matrix=cm_dict,
114
+ unmatched_ground_truths=ugt_dict,
115
+ score_threshold=score_thresh,
116
+ hardmax=hardmax,
117
+ )
118
+ )
119
+ return metrics
120
+
121
+
122
+ def create_mapping(
123
+ tbl: pa.Table,
124
+ pairs: NDArray[np.float64],
125
+ index: int,
126
+ id_col: str,
127
+ uid_col: str,
128
+ ) -> dict[int, str]:
129
+ col = pairs[:, index].astype(np.int64)
130
+ values, indices = np.unique(col, return_index=True)
131
+ indices = indices[values >= 0]
132
+ return {
133
+ tbl[id_col][idx].as_py(): tbl[uid_col][idx].as_py() for idx in indices
134
+ }
135
+
136
+
137
+ def unpack_examples(
138
+ ids: NDArray[np.int64],
139
+ mask_tp: NDArray[np.bool_],
140
+ mask_fn: NDArray[np.bool_],
141
+ mask_fp: NDArray[np.bool_],
142
+ score_thresholds: list[float],
143
+ hardmax: bool,
144
+ index_to_datum_id: dict[int, str],
145
+ index_to_label: dict[int, str],
146
+ ) -> list[Metric]:
147
+ metrics = []
148
+ unique_datums = np.unique(ids[:, 0])
149
+ for datum_index in unique_datums:
150
+ mask_datum = ids[:, 0] == datum_index
151
+ mask_datum_tp = mask_tp & mask_datum
152
+ mask_datum_fp = mask_fp & mask_datum
153
+ mask_datum_fn = mask_fn & mask_datum
154
+
155
+ datum_id = index_to_datum_id[datum_index]
156
+ for score_idx, score_thresh in enumerate(score_thresholds):
157
+
158
+ unique_tp = np.unique(
159
+ # extract true-positive (datum_id, gt_id, pd_id) pairs
160
+ ids[np.ix_(mask_datum_tp[score_idx], (0, 1, 2))],
161
+ axis=0,
162
+ )
163
+ unique_fp = np.unique(
164
+ # extract false-positive (datum_id, pd_id) pairs
165
+ ids[np.ix_(mask_datum_fp[score_idx], (0, 2))],
166
+ axis=0,
167
+ )
168
+ unique_fn = np.unique(
169
+ # extract false-negative (datum_id, gt_id)
170
+ ids[np.ix_(mask_datum_fn[score_idx], (0, 1))],
171
+ axis=0,
172
+ )
173
+
174
+ tp = [index_to_label[row[1]] for row in unique_tp]
175
+ fp = [
176
+ index_to_label[row[1]]
177
+ for row in unique_fp
178
+ if index_to_label[row[1]] not in tp
179
+ ]
180
+ fn = [
181
+ index_to_label[row[1]]
182
+ for row in unique_fn
183
+ if index_to_label[row[1]] not in tp
184
+ ]
185
+ metrics.append(
186
+ Metric.examples(
187
+ datum_id=datum_id,
188
+ true_positives=tp,
189
+ false_negatives=fn,
190
+ false_positives=fp,
191
+ score_threshold=score_thresh,
192
+ hardmax=hardmax,
193
+ )
194
+ )
195
+ return metrics
196
+
197
+
198
+ def create_empty_confusion_matrix_with_examples(
199
+ score_threshold: float,
200
+ hardmax: bool,
201
+ index_to_label: dict[int, str],
202
+ ) -> Metric:
203
+ unmatched_groundtruths = dict()
107
204
  confusion_matrix = dict()
108
- for label in index_to_labels:
109
- unmatched_ground_truths[label] = {"count": 0, "examples": []}
205
+ for label in index_to_label.values():
206
+ unmatched_groundtruths[label] = {"count": 0, "examples": []}
110
207
  confusion_matrix[label] = {}
111
- for plabel in index_to_labels:
208
+ for plabel in index_to_label.values():
112
209
  confusion_matrix[label][plabel] = {"count": 0, "examples": []}
113
- return (
114
- confusion_matrix,
115
- unmatched_ground_truths,
210
+
211
+ return Metric.confusion_matrix_with_examples(
212
+ confusion_matrix=confusion_matrix,
213
+ unmatched_ground_truths=unmatched_groundtruths,
214
+ score_threshold=score_threshold,
215
+ hardmax=hardmax,
116
216
  )
117
217
 
118
218
 
119
- def _unpack_confusion_matrix(
120
- ids: NDArray[np.int32],
219
+ def _unpack_confusion_matrix_with_examples(
220
+ metric: Metric,
221
+ ids: NDArray[np.int64],
121
222
  scores: NDArray[np.float64],
223
+ winners: NDArray[np.bool_],
122
224
  mask_matched: NDArray[np.bool_],
123
- mask_fn_unmatched: NDArray[np.bool_],
124
- index_to_datum_id: list[str],
125
- index_to_label: list[str],
126
- score_threshold: float,
225
+ mask_unmatched_fn: NDArray[np.bool_],
226
+ index_to_datum_id: dict[int, str],
227
+ index_to_label: dict[int, str],
127
228
  ):
128
- (
129
- confusion_matrix,
130
- unmatched_ground_truths,
131
- ) = _create_empty_confusion_matrix(index_to_label)
132
-
133
- unique_matches, unique_match_indices = np.unique(
134
- ids[np.ix_(mask_matched, (0, 1, 2))], # type: ignore - numpy ix_ typing
135
- axis=0,
136
- return_index=True,
137
- )
138
- (
139
- unique_unmatched_groundtruths,
140
- unique_unmatched_groundtruth_indices,
141
- ) = np.unique(
142
- ids[np.ix_(mask_fn_unmatched, (0, 1))], # type: ignore - numpy ix_ typing
143
- axis=0,
144
- return_index=True,
145
- )
229
+ if not isinstance(metric.value, dict):
230
+ raise TypeError("expected metric to contain a dictionary value")
146
231
 
147
- n_matched = unique_matches.shape[0]
148
- n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
149
- n_max = max(n_matched, n_unmatched_groundtruths)
232
+ mask_valid_gts = ids[:, 1] >= 0
233
+ mask_valid_pds = ids[:, 2] >= 0
234
+
235
+ valid_matches = ids[mask_valid_gts & mask_valid_pds]
236
+ valid_gts = ids[mask_valid_gts]
237
+
238
+ n_matched = 0
239
+ unique_matches = np.empty((1, 3))
240
+ if valid_matches.size > 0:
241
+ unique_matches, unique_match_indices = np.unique(
242
+ # extract matched (datum_id, gt_id, pd_id) pairs
243
+ valid_matches[np.ix_(mask_matched, (0, 1, 2))], # type: ignore[reportArgumentType]
244
+ axis=0,
245
+ return_index=True,
246
+ )
247
+ scores = scores[mask_matched][unique_match_indices]
248
+ n_matched = unique_matches.shape[0]
249
+
250
+ n_unmatched_groundtruths = 0
251
+ unique_unmatched_groundtruths = np.empty((1, 2))
252
+ if valid_gts.size > 0:
253
+ unique_unmatched_groundtruths = np.unique(
254
+ # extract unmatched false-negative (datum_id, gt_id) pairs
255
+ valid_gts[np.ix_(mask_unmatched_fn, (0, 1))], # type: ignore[reportArgumentType]
256
+ axis=0,
257
+ )
258
+ unique_unmatched_groundtruths = unique_unmatched_groundtruths[
259
+ unique_unmatched_groundtruths[:, 1] >= 0
260
+ ]
261
+ n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
150
262
 
263
+ n_max = max(n_matched, n_unmatched_groundtruths)
151
264
  for idx in range(n_max):
152
265
  if idx < n_matched:
266
+ datum_id = index_to_datum_id[unique_matches[idx, 0]]
153
267
  glabel = index_to_label[unique_matches[idx, 1]]
154
268
  plabel = index_to_label[unique_matches[idx, 2]]
155
- confusion_matrix[glabel][plabel]["count"] += 1
156
- confusion_matrix[glabel][plabel]["examples"].append(
269
+ score = float(scores[idx])
270
+
271
+ metric.value["confusion_matrix"][glabel][plabel]["count"] += 1
272
+ metric.value["confusion_matrix"][glabel][plabel][
273
+ "examples"
274
+ ].append(
157
275
  {
158
- "datum_id": index_to_datum_id[unique_matches[idx, 0]],
159
- "score": float(scores[unique_match_indices[idx]]),
276
+ "datum_id": datum_id,
277
+ "score": score,
160
278
  }
161
279
  )
162
280
  if idx < n_unmatched_groundtruths:
281
+ datum_id = index_to_datum_id[unique_unmatched_groundtruths[idx, 0]]
163
282
  label = index_to_label[unique_unmatched_groundtruths[idx, 1]]
164
- unmatched_ground_truths[label]["count"] += 1
165
- unmatched_ground_truths[label]["examples"].append(
166
- {
167
- "datum_id": index_to_datum_id[
168
- unique_unmatched_groundtruths[idx, 0]
169
- ],
170
- }
283
+
284
+ metric.value["unmatched_ground_truths"][label]["count"] += 1
285
+ metric.value["unmatched_ground_truths"][label]["examples"].append(
286
+ {"datum_id": datum_id}
171
287
  )
172
288
 
173
- return Metric.confusion_matrix(
174
- confusion_matrix=confusion_matrix,
175
- unmatched_ground_truths=unmatched_ground_truths,
176
- score_threshold=score_threshold,
177
- )
289
+ return metric
178
290
 
179
291
 
180
- def unpack_confusion_matrix_into_metric_list(
181
- result: NDArray[np.uint8],
182
- detailed_pairs: NDArray[np.float64],
183
- score_thresholds: list[float],
184
- index_to_datum_id: list[str],
185
- index_to_label: list[str],
292
+ def unpack_confusion_matrix_with_examples(
293
+ metrics: dict[int, Metric],
294
+ ids: NDArray[np.int64],
295
+ scores: NDArray[np.float64],
296
+ winners: NDArray[np.bool_],
297
+ mask_matched: NDArray[np.bool_],
298
+ mask_unmatched_fn: NDArray[np.bool_],
299
+ index_to_datum_id: dict[int, str],
300
+ index_to_label: dict[int, str],
186
301
  ) -> list[Metric]:
187
-
188
- ids = detailed_pairs[:, :3].astype(np.int32)
189
-
190
- mask_matched = (
191
- np.bitwise_and(
192
- result, PairClassification.TP | PairClassification.FP_FN_MISCLF
193
- )
194
- > 0
195
- )
196
- mask_fn_unmatched = (
197
- np.bitwise_and(result, PairClassification.FN_UNMATCHED) > 0
198
- )
199
-
200
302
  return [
201
- _unpack_confusion_matrix(
303
+ _unpack_confusion_matrix_with_examples(
304
+ metric,
202
305
  ids=ids,
203
- scores=detailed_pairs[:, 3],
306
+ scores=scores,
307
+ winners=winners,
204
308
  mask_matched=mask_matched[score_idx, :],
205
- mask_fn_unmatched=mask_fn_unmatched[score_idx, :],
309
+ mask_unmatched_fn=mask_unmatched_fn[score_idx, :],
206
310
  index_to_datum_id=index_to_datum_id,
207
311
  index_to_label=index_to_label,
208
- score_threshold=score_threshold,
209
312
  )
210
- for score_idx, score_threshold in enumerate(score_thresholds)
313
+ for score_idx, metric in metrics.items()
211
314
  ]
valor_lite/exceptions.py CHANGED
@@ -5,6 +5,11 @@ class EmptyEvaluatorError(Exception):
5
5
  )
6
6
 
7
7
 
8
+ class EmptyCacheError(Exception):
9
+ def __init__(self):
10
+ super().__init__("cache contains no data")
11
+
12
+
8
13
  class EmptyFilterError(Exception):
9
14
  def __init__(self, message: str):
10
15
  super().__init__(message)
@@ -1,6 +1,8 @@
1
1
  from .annotation import Bitmask, BoundingBox, Detection, Polygon
2
- from .manager import DataLoader, Evaluator, Filter, Metadata
2
+ from .evaluator import Evaluator
3
+ from .loader import Loader
3
4
  from .metric import Metric, MetricType
5
+ from .shared import EvaluatorInfo
4
6
 
5
7
  __all__ = [
6
8
  "Bitmask",
@@ -9,8 +11,7 @@ __all__ = [
9
11
  "Polygon",
10
12
  "Metric",
11
13
  "MetricType",
12
- "DataLoader",
14
+ "Loader",
13
15
  "Evaluator",
14
- "Filter",
15
- "Metadata",
16
+ "EvaluatorInfo",
16
17
  ]
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass, field
2
- from typing import Generic, TypeVar
2
+ from typing import Any, Generic, TypeVar
3
3
 
4
4
  import numpy as np
5
5
  from numpy.typing import NDArray
@@ -27,6 +27,8 @@ class BoundingBox:
27
27
  List of labels associated with the bounding box.
28
28
  scores : list of float, optional
29
29
  Confidence scores corresponding to each label. Defaults to an empty list.
30
+ metadata : dict[str, Any], optional
31
+ A dictionary containing any metadata to be used within filtering operations.
30
32
 
31
33
  Examples
32
34
  --------
@@ -50,6 +52,7 @@ class BoundingBox:
50
52
  ymax: float
51
53
  labels: list[str]
52
54
  scores: list[float] = field(default_factory=list)
55
+ metadata: dict[str, Any] | None = None
53
56
 
54
57
  def __post_init__(self):
55
58
  if len(self.scores) == 0 and len(self.labels) != 1:
@@ -89,6 +92,8 @@ class Polygon:
89
92
  List of labels associated with the polygon.
90
93
  scores : list of float, optional
91
94
  Confidence scores corresponding to each label. Defaults to an empty list.
95
+ metadata : dict[str, Any], optional
96
+ A dictionary containing any metadata to be used within filtering operations.
92
97
 
93
98
  Examples
94
99
  --------
@@ -109,6 +114,7 @@ class Polygon:
109
114
  shape: ShapelyPolygon
110
115
  labels: list[str]
111
116
  scores: list[float] = field(default_factory=list)
117
+ metadata: dict[str, Any] | None = None
112
118
 
113
119
  def __post_init__(self):
114
120
  if not isinstance(self.shape, ShapelyPolygon):
@@ -141,6 +147,8 @@ class Bitmask:
141
147
  List of labels associated with the mask.
142
148
  scores : list of float, optional
143
149
  Confidence scores corresponding to each label. Defaults to an empty list.
150
+ metadata : dict[str, Any], optional
151
+ A dictionary containing any metadata to be used within filtering operations.
144
152
 
145
153
  Examples
146
154
  --------
@@ -161,6 +169,7 @@ class Bitmask:
161
169
  mask: NDArray[np.bool_]
162
170
  labels: list[str]
163
171
  scores: list[float] = field(default_factory=list)
172
+ metadata: dict[str, Any] | None = None
164
173
 
165
174
  def __post_init__(self):
166
175
 
@@ -200,6 +209,8 @@ class Detection(Generic[AnnotationType]):
200
209
  List of ground truth annotations.
201
210
  predictions : list[BoundingBox] | list[Polygon] | list[Bitmask]
202
211
  List of predicted annotations.
212
+ metadata : dict[str, Any], optional
213
+ A dictionary containing any metadata to be used within filtering operations.
203
214
 
204
215
  Examples
205
216
  --------
@@ -217,6 +228,7 @@ class Detection(Generic[AnnotationType]):
217
228
  uid: str
218
229
  groundtruths: list[AnnotationType]
219
230
  predictions: list[AnnotationType]
231
+ metadata: dict[str, Any] | None = None
220
232
 
221
233
  def __post_init__(self):
222
234
  for prediction in self.predictions: