valor-lite 0.33.7__py3-none-any.whl → 0.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,31 +14,49 @@ class MetricType(Enum):
14
14
  F1 = "F1"
15
15
  ConfusionMatrix = "ConfusionMatrix"
16
16
 
17
- @classmethod
18
- def base(cls):
19
- return [
20
- cls.Counts,
21
- cls.ROCAUC,
22
- cls.mROCAUC,
23
- cls.Precision,
24
- cls.Recall,
25
- cls.Accuracy,
26
- cls.F1,
27
- ]
28
-
29
17
 
30
18
  @dataclass
31
19
  class Counts:
20
+ """
21
+ Confusion matrix counts at specified score thresholds for binary classification.
22
+
23
+ This class stores the true positive (`tp`), false positive (`fp`), false negative (`fn`), and true
24
+ negative (`tn`) counts computed at various score thresholds for a binary classification task.
25
+
26
+ Attributes
27
+ ----------
28
+ tp : list[int]
29
+ True positive counts at each score threshold.
30
+ fp : list[int]
31
+ False positive counts at each score threshold.
32
+ fn : list[int]
33
+ False negative counts at each score threshold.
34
+ tn : list[int]
35
+ True negative counts at each score threshold.
36
+ score_thresholds : list[float]
37
+ Score thresholds at which the counts are computed.
38
+ hardmax : bool
39
+ Indicates whether hardmax thresholding was used.
40
+ label : str
41
+ The class label for which the counts are computed.
42
+
43
+ Methods
44
+ -------
45
+ to_metric()
46
+ Converts the instance to a generic `Metric` object.
47
+ to_dict()
48
+ Converts the instance to a dictionary representation.
49
+ """
50
+
32
51
  tp: list[int]
33
52
  fp: list[int]
34
53
  fn: list[int]
35
54
  tn: list[int]
36
55
  score_thresholds: list[float]
37
56
  hardmax: bool
38
- label: tuple[str, str]
57
+ label: str
39
58
 
40
- @property
41
- def metric(self) -> Metric:
59
+ def to_metric(self) -> Metric:
42
60
  return Metric(
43
61
  type=type(self).__name__,
44
62
  value={
@@ -50,15 +68,12 @@ class Counts:
50
68
  parameters={
51
69
  "score_thresholds": self.score_thresholds,
52
70
  "hardmax": self.hardmax,
53
- "label": {
54
- "key": self.label[0],
55
- "value": self.label[1],
56
- },
71
+ "label": self.label,
57
72
  },
58
73
  )
59
74
 
60
75
  def to_dict(self) -> dict:
61
- return self.metric.to_dict()
76
+ return self.to_metric().to_dict()
62
77
 
63
78
 
64
79
  @dataclass
@@ -66,86 +81,297 @@ class _ThresholdValue:
66
81
  value: list[float]
67
82
  score_thresholds: list[float]
68
83
  hardmax: bool
69
- label: tuple[str, str]
84
+ label: str
70
85
 
71
- @property
72
- def metric(self) -> Metric:
86
+ def to_metric(self) -> Metric:
73
87
  return Metric(
74
88
  type=type(self).__name__,
75
89
  value=self.value,
76
90
  parameters={
77
91
  "score_thresholds": self.score_thresholds,
78
92
  "hardmax": self.hardmax,
79
- "label": {
80
- "key": self.label[0],
81
- "value": self.label[1],
82
- },
93
+ "label": self.label,
83
94
  },
84
95
  )
85
96
 
86
97
  def to_dict(self) -> dict:
87
- return self.metric.to_dict()
98
+ return self.to_metric().to_dict()
88
99
 
89
100
 
90
101
  class Precision(_ThresholdValue):
102
+ """
103
+ Precision metric for a specific class label.
104
+
105
+ This class calculates the precision at various score thresholds for a binary
106
+ classification task. Precision is defined as the ratio of true positives to the
107
+ sum of true positives and false positives.
108
+
109
+ Attributes
110
+ ----------
111
+ value : list[float]
112
+ Precision values computed at each score threshold.
113
+ score_thresholds : list[float]
114
+ Score thresholds at which the precision values are computed.
115
+ hardmax : bool
116
+ Indicates whether hardmax thresholding was used.
117
+ label : str
118
+ The class label for which the precision is computed.
119
+
120
+ Methods
121
+ -------
122
+ to_metric()
123
+ Converts the instance to a generic `Metric` object.
124
+ to_dict()
125
+ Converts the instance to a dictionary representation.
126
+ """
127
+
91
128
  pass
92
129
 
93
130
 
94
131
  class Recall(_ThresholdValue):
132
+ """
133
+ Recall metric for a specific class label.
134
+
135
+ This class calculates the recall at various score thresholds for a binary
136
+ classification task. Recall is defined as the ratio of true positives to the
137
+ sum of true positives and false negatives.
138
+
139
+ Attributes
140
+ ----------
141
+ value : list[float]
142
+ Recall values computed at each score threshold.
143
+ score_thresholds : list[float]
144
+ Score thresholds at which the recall values are computed.
145
+ hardmax : bool
146
+ Indicates whether hardmax thresholding was used.
147
+ label : str
148
+ The class label for which the recall is computed.
149
+
150
+ Methods
151
+ -------
152
+ to_metric()
153
+ Converts the instance to a generic `Metric` object.
154
+ to_dict()
155
+ Converts the instance to a dictionary representation.
156
+ """
157
+
95
158
  pass
96
159
 
97
160
 
98
161
  class Accuracy(_ThresholdValue):
162
+ """
163
+ Accuracy metric for a specific class label.
164
+
165
+ This class calculates the accuracy at various score thresholds for a binary
166
+ classification task. Accuracy is defined as the ratio of the sum of true positives and
167
+ true negatives over all predictions.
168
+
169
+ Attributes
170
+ ----------
171
+ value : list[float]
172
+ Accuracy values computed at each score threshold.
173
+ score_thresholds : list[float]
174
+ Score thresholds at which the accuracy values are computed.
175
+ hardmax : bool
176
+ Indicates whether hardmax thresholding was used.
177
+ label : str
178
+ The class label for which the accuracy is computed.
179
+
180
+ Methods
181
+ -------
182
+ to_metric()
183
+ Converts the instance to a generic `Metric` object.
184
+ to_dict()
185
+ Converts the instance to a dictionary representation.
186
+ """
187
+
99
188
  pass
100
189
 
101
190
 
102
191
  class F1(_ThresholdValue):
192
+ """
193
+ F1 score for a specific class label.
194
+
195
+ This class calculates the F1 score at various score thresholds for a binary
196
+ classification task.
197
+
198
+ Attributes
199
+ ----------
200
+ value : list[float]
201
+ F1 scores computed at each score threshold.
202
+ score_thresholds : list[float]
203
+ Score thresholds at which the F1 scores are computed.
204
+ hardmax : bool
205
+ Indicates whether hardmax thresholding was used.
206
+ label : str
207
+ The class label for which the F1 score is computed.
208
+
209
+ Methods
210
+ -------
211
+ to_metric()
212
+ Converts the instance to a generic `Metric` object.
213
+ to_dict()
214
+ Converts the instance to a dictionary representation.
215
+ """
216
+
103
217
  pass
104
218
 
105
219
 
106
220
  @dataclass
107
221
  class ROCAUC:
222
+ """
223
+ Receiver Operating Characteristic Area Under the Curve (ROC AUC).
224
+
225
+ This class calculates the ROC AUC score for a specific class label in a multiclass classification task.
226
+ ROC AUC is a performance measurement for classification problems at various threshold settings.
227
+ It reflects the ability of the classifier to distinguish between the positive and negative classes.
228
+
229
+ Parameters
230
+ ----------
231
+ value : float
232
+ The computed ROC AUC score.
233
+ label : str
234
+ The class label for which the ROC AUC is computed.
235
+
236
+ Methods
237
+ -------
238
+ to_metric()
239
+ Converts the instance to a generic `Metric` object.
240
+ to_dict()
241
+ Converts the instance to a dictionary representation.
242
+ """
243
+
108
244
  value: float
109
- label: tuple[str, str]
245
+ label: str
110
246
 
111
- @property
112
- def metric(self) -> Metric:
247
+ def to_metric(self) -> Metric:
113
248
  return Metric(
114
249
  type=type(self).__name__,
115
250
  value=self.value,
116
- parameters={
117
- "label": {
118
- "key": self.label[0],
119
- "value": self.label[1],
120
- },
121
- },
251
+ parameters={"label": self.label},
122
252
  )
123
253
 
124
254
  def to_dict(self) -> dict:
125
- return self.metric.to_dict()
255
+ return self.to_metric().to_dict()
126
256
 
127
257
 
128
258
  @dataclass
129
259
  class mROCAUC:
260
+ """
261
+ Mean Receiver Operating Characteristic Area Under the Curve (mROC AUC).
262
+
263
+ This class calculates the mean ROC AUC score over all classes in a multiclass classification task.
264
+ It provides an aggregate measure of the model's ability to distinguish between classes.
265
+
266
+ Parameters
267
+ ----------
268
+ value : float
269
+ The computed mean ROC AUC score.
270
+
271
+ Methods
272
+ -------
273
+ to_metric()
274
+ Converts the instance to a generic `Metric` object.
275
+ to_dict()
276
+ Converts the instance to a dictionary representation.
277
+ """
278
+
130
279
  value: float
131
- label_key: str
132
280
 
133
- @property
134
- def metric(self) -> Metric:
281
+ def to_metric(self) -> Metric:
135
282
  return Metric(
136
283
  type=type(self).__name__,
137
284
  value=self.value,
138
- parameters={
139
- "label_key": self.label_key,
140
- },
285
+ parameters={},
141
286
  )
142
287
 
143
288
  def to_dict(self) -> dict:
144
- return self.metric.to_dict()
289
+ return self.to_metric().to_dict()
145
290
 
146
291
 
147
292
  @dataclass
148
293
  class ConfusionMatrix:
294
+ """
295
+ The confusion matrix and related metrics for the classification task.
296
+
297
+ This class encapsulates detailed information about the model's performance, including correct
298
+ predictions, misclassifications, hallucinations (false positives), and missing predictions
299
+ (false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
300
+
301
+ Confusion Matrix Structure:
302
+ {
303
+ ground_truth_label: {
304
+ predicted_label: {
305
+ 'count': int,
306
+ 'examples': [
307
+ {
308
+ 'datum': str,
309
+ 'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
310
+ 'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
311
+ 'score': float,
312
+ },
313
+ ...
314
+ ],
315
+ },
316
+ ...
317
+ },
318
+ ...
319
+ }
320
+
321
+ Hallucinations Structure:
322
+ {
323
+ prediction_label: {
324
+ 'count': int,
325
+ 'examples': [
326
+ {
327
+ 'datum': str,
328
+ 'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
329
+ 'score': float,
330
+ },
331
+ ...
332
+ ],
333
+ },
334
+ ...
335
+ }
336
+
337
+ Missing Prediction Structure:
338
+ {
339
+ ground_truth_label: {
340
+ 'count': int,
341
+ 'examples': [
342
+ {
343
+ 'datum': str,
344
+ 'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
345
+ },
346
+ ...
347
+ ],
348
+ },
349
+ ...
350
+ }
351
+
352
+ Attributes
353
+ ----------
354
+ confusion_matrix : dict
355
+ A nested dictionary where the first key is the ground truth label value, the second key
356
+ is the prediction label value, and the innermost dictionary contains either a `count`
357
+ or a list of `examples`. Each example includes the datum UID and prediction score.
358
+ missing_predictions : dict
359
+ A dictionary where each key is a ground truth label value for which the model failed to predict
360
+ (false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
361
+ Each example includes the datum UID.
362
+ score_threshold : float
363
+ The confidence score threshold used to filter predictions.
364
+ number_of_examples : int
365
+ The maximum number of examples per element.
366
+
367
+ Methods
368
+ -------
369
+ to_metric()
370
+ Converts the instance to a generic `Metric` object.
371
+ to_dict()
372
+ Converts the instance to a dictionary representation.
373
+ """
374
+
149
375
  confusion_matrix: dict[
150
376
  str, # ground truth label value
151
377
  dict[
@@ -170,11 +396,9 @@ class ConfusionMatrix:
170
396
  ],
171
397
  ]
172
398
  score_threshold: float
173
- label_key: str
174
399
  number_of_examples: int
175
400
 
176
- @property
177
- def metric(self) -> Metric:
401
+ def to_metric(self) -> Metric:
178
402
  return Metric(
179
403
  type=type(self).__name__,
180
404
  value={
@@ -183,9 +407,8 @@ class ConfusionMatrix:
183
407
  },
184
408
  parameters={
185
409
  "score_threshold": self.score_threshold,
186
- "label_key": self.label_key,
187
410
  },
188
411
  )
189
412
 
190
413
  def to_dict(self) -> dict:
191
- return self.metric.to_dict()
414
+ return self.to_metric().to_dict()
@@ -0,0 +1,274 @@
1
+ from dataclasses import dataclass, field
2
+
3
+ import numpy as np
4
+ from numpy.typing import NDArray
5
+ from shapely.geometry import Polygon as ShapelyPolygon
6
+
7
+
8
+ @dataclass
9
+ class BoundingBox:
10
+ """
11
+ Represents a bounding box with associated labels and optional scores.
12
+
13
+ Parameters
14
+ ----------
15
+ xmin : float
16
+ The minimum x-coordinate of the bounding box.
17
+ xmax : float
18
+ The maximum x-coordinate of the bounding box.
19
+ ymin : float
20
+ The minimum y-coordinate of the bounding box.
21
+ ymax : float
22
+ The maximum y-coordinate of the bounding box.
23
+ labels : list of str
24
+ List of labels associated with the bounding box.
25
+ scores : list of float, optional
26
+ Confidence scores corresponding to each label. Defaults to an empty list.
27
+
28
+ Examples
29
+ --------
30
+ Ground Truth Example:
31
+
32
+ >>> bbox = BoundingBox(xmin=10.0, xmax=50.0, ymin=20.0, ymax=60.0, labels=['cat'])
33
+
34
+ Prediction Example:
35
+
36
+ >>> bbox = BoundingBox(
37
+ ... xmin=10.0, xmax=50.0, ymin=20.0, ymax=60.0,
38
+ ... labels=['cat', 'dog'], scores=[0.9, 0.1]
39
+ ... )
40
+ """
41
+
42
+ xmin: float
43
+ xmax: float
44
+ ymin: float
45
+ ymax: float
46
+ labels: list[str]
47
+ scores: list[float] = field(default_factory=list)
48
+
49
+ def __post_init__(self):
50
+ if len(self.scores) == 0 and len(self.labels) != 1:
51
+ raise ValueError(
52
+ "Ground truths must be defined with no scores and a single label. If you meant to define a prediction, then please include one score for every label provided."
53
+ )
54
+ if len(self.scores) > 0 and len(self.labels) != len(self.scores):
55
+ raise ValueError(
56
+ "If scores are defined, there must be a 1:1 pairing with labels."
57
+ )
58
+
59
+ @property
60
+ def extrema(self) -> tuple[float, float, float, float]:
61
+ """
62
+ Returns the bounding box extrema.
63
+
64
+ Returns
65
+ -------
66
+ tuple[float, float, float, float]
67
+ A tuple in the form (xmin, xmax, ymin, ymax).
68
+ """
69
+ return (self.xmin, self.xmax, self.ymin, self.ymax)
70
+
71
+ @property
72
+ def annotation(self) -> tuple[float, float, float, float]:
73
+ """
74
+ Returns the annotation's data representation.
75
+
76
+ Returns
77
+ -------
78
+ tuple[float, float, float, float]
79
+ A tuple in the form (xmin, xmax, ymin, ymax).
80
+ """
81
+ return self.extrema
82
+
83
+
84
+ @dataclass
85
+ class Polygon:
86
+ """
87
+ Represents a polygon shape with associated labels and optional scores.
88
+
89
+ Parameters
90
+ ----------
91
+ shape : ShapelyPolygon
92
+ A Shapely polygon object representing the shape.
93
+ labels : list of str
94
+ List of labels associated with the polygon.
95
+ scores : list of float, optional
96
+ Confidence scores corresponding to each label. Defaults to an empty list.
97
+
98
+ Examples
99
+ --------
100
+ Ground Truth Example:
101
+
102
+ >>> from shapely.geometry import Polygon as ShapelyPolygon
103
+ >>> shape = ShapelyPolygon([(0, 0), (1, 0), (1, 1), (0, 1)])
104
+ >>> polygon = Polygon(shape=shape, labels=['building'])
105
+
106
+ Prediction Example:
107
+
108
+ >>> polygon = Polygon(
109
+ ... shape=shape, labels=['building'], scores=[0.95]
110
+ ... )
111
+ """
112
+
113
+ shape: ShapelyPolygon
114
+ labels: list[str]
115
+ scores: list[float] = field(default_factory=list)
116
+
117
+ def __post_init__(self):
118
+ if not isinstance(self.shape, ShapelyPolygon):
119
+ raise TypeError("shape must be of type shapely.geometry.Polygon.")
120
+ if self.shape.is_empty:
121
+ raise ValueError("Polygon is empty.")
122
+
123
+ if len(self.scores) == 0 and len(self.labels) != 1:
124
+ raise ValueError(
125
+ "Ground truths must be defined with no scores and a single label. If you meant to define a prediction, then please include one score for every label provided."
126
+ )
127
+ if len(self.scores) > 0 and len(self.labels) != len(self.scores):
128
+ raise ValueError(
129
+ "If scores are defined, there must be a 1:1 pairing with labels."
130
+ )
131
+
132
+ @property
133
+ def extrema(self) -> tuple[float, float, float, float]:
134
+ """
135
+ Returns the polygon's bounding box extrema.
136
+
137
+ Returns
138
+ -------
139
+ tuple[float, float, float, float]
140
+ A tuple in the form (xmin, xmax, ymin, ymax).
141
+ """
142
+ xmin, ymin, xmax, ymax = self.shape.bounds
143
+ return (xmin, xmax, ymin, ymax)
144
+
145
+ @property
146
+ def annotation(self) -> ShapelyPolygon:
147
+ """
148
+ Returns the annotation's data representation.
149
+
150
+ Returns
151
+ -------
152
+ shapely.geometry.Polygon
153
+ The polygon shape.
154
+ """
155
+ return self.shape
156
+
157
+
158
+ @dataclass
159
+ class Bitmask:
160
+ """
161
+ Represents a binary mask with associated labels and optional scores.
162
+
163
+ Parameters
164
+ ----------
165
+ mask : NDArray[np.bool_]
166
+ A NumPy array of boolean values representing the mask.
167
+ labels : list of str
168
+ List of labels associated with the mask.
169
+ scores : list of float, optional
170
+ Confidence scores corresponding to each label. Defaults to an empty list.
171
+
172
+ Examples
173
+ --------
174
+ Ground Truth Example:
175
+
176
+ >>> import numpy as np
177
+ >>> mask = np.array([[True, False], [False, True]], dtype=np.bool_)
178
+ >>> bitmask = Bitmask(mask=mask, labels=['tree'])
179
+
180
+ Prediction Example:
181
+
182
+ >>> bitmask = Bitmask(
183
+ ... mask=mask, labels=['tree'], scores=[0.85]
184
+ ... )
185
+ """
186
+
187
+ mask: NDArray[np.bool_]
188
+ labels: list[str]
189
+ scores: list[float] = field(default_factory=list)
190
+
191
+ def __post_init__(self):
192
+
193
+ if (
194
+ not isinstance(self.mask, np.ndarray)
195
+ or self.mask.dtype != np.bool_
196
+ ):
197
+ raise ValueError(
198
+ "Expected mask to be of type `NDArray[np.bool_]`."
199
+ )
200
+ elif not self.mask.any():
201
+ raise ValueError("Mask does not define any object instances.")
202
+
203
+ if len(self.scores) == 0 and len(self.labels) != 1:
204
+ raise ValueError(
205
+ "Ground truths must be defined with no scores and a single label. If you meant to define a prediction, then please include one score for every label provided."
206
+ )
207
+ if len(self.scores) > 0 and len(self.labels) != len(self.scores):
208
+ raise ValueError(
209
+ "If scores are defined, there must be a 1:1 pairing with labels."
210
+ )
211
+
212
+ @property
213
+ def extrema(self) -> tuple[float, float, float, float]:
214
+ """
215
+ Returns the bounding box extrema of the mask.
216
+
217
+ Returns
218
+ -------
219
+ tuple[float, float, float, float]
220
+ A tuple in the form (xmin, xmax, ymin, ymax).
221
+ """
222
+ rows, cols = np.nonzero(self.mask)
223
+ return (cols.min(), cols.max(), rows.min(), rows.max())
224
+
225
+ @property
226
+ def annotation(self) -> NDArray[np.bool_]:
227
+ """
228
+ Returns the annotation's data representation.
229
+
230
+ Returns
231
+ -------
232
+ NDArray[np.bool_]
233
+ The binary mask array.
234
+ """
235
+ return self.mask
236
+
237
+
238
+ @dataclass
239
+ class Detection:
240
+ """
241
+ Detection data structure holding ground truths and predictions for object detection tasks.
242
+
243
+ Parameters
244
+ ----------
245
+ uid : str
246
+ Unique identifier for the image or sample.
247
+ groundtruths : list of BoundingBox or Bitmask or Polygon
248
+ List of ground truth annotations.
249
+ predictions : list of BoundingBox or Bitmask or Polygon
250
+ List of predicted annotations.
251
+
252
+ Examples
253
+ --------
254
+ >>> bbox_gt = BoundingBox(xmin=10, xmax=50, ymin=20, ymax=60, labels=['cat'])
255
+ >>> bbox_pred = BoundingBox(
256
+ ... xmin=12, xmax=48, ymin=22, ymax=58, labels=['cat'], scores=[0.9]
257
+ ... )
258
+ >>> detection = Detection(
259
+ ... uid='image_001',
260
+ ... groundtruths=[bbox_gt],
261
+ ... predictions=[bbox_pred]
262
+ ... )
263
+ """
264
+
265
+ uid: str
266
+ groundtruths: list[BoundingBox] | list[Bitmask] | list[Polygon]
267
+ predictions: list[BoundingBox] | list[Bitmask] | list[Polygon]
268
+
269
+ def __post_init__(self):
270
+ for prediction in self.predictions:
271
+ if len(prediction.scores) != len(prediction.labels):
272
+ raise ValueError(
273
+ "Predictions must provide a score for every label."
274
+ )