valor-lite 0.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

Files changed (49) hide show
  1. valor_lite/LICENSE +21 -0
  2. valor_lite/__init__.py +0 -0
  3. valor_lite/cache/__init__.py +11 -0
  4. valor_lite/cache/compute.py +154 -0
  5. valor_lite/cache/ephemeral.py +302 -0
  6. valor_lite/cache/persistent.py +529 -0
  7. valor_lite/classification/__init__.py +14 -0
  8. valor_lite/classification/annotation.py +45 -0
  9. valor_lite/classification/computation.py +378 -0
  10. valor_lite/classification/evaluator.py +879 -0
  11. valor_lite/classification/loader.py +97 -0
  12. valor_lite/classification/metric.py +535 -0
  13. valor_lite/classification/numpy_compatibility.py +13 -0
  14. valor_lite/classification/shared.py +184 -0
  15. valor_lite/classification/utilities.py +314 -0
  16. valor_lite/exceptions.py +20 -0
  17. valor_lite/object_detection/__init__.py +17 -0
  18. valor_lite/object_detection/annotation.py +238 -0
  19. valor_lite/object_detection/computation.py +841 -0
  20. valor_lite/object_detection/evaluator.py +805 -0
  21. valor_lite/object_detection/loader.py +292 -0
  22. valor_lite/object_detection/metric.py +850 -0
  23. valor_lite/object_detection/shared.py +185 -0
  24. valor_lite/object_detection/utilities.py +396 -0
  25. valor_lite/schemas.py +11 -0
  26. valor_lite/semantic_segmentation/__init__.py +15 -0
  27. valor_lite/semantic_segmentation/annotation.py +123 -0
  28. valor_lite/semantic_segmentation/computation.py +165 -0
  29. valor_lite/semantic_segmentation/evaluator.py +414 -0
  30. valor_lite/semantic_segmentation/loader.py +205 -0
  31. valor_lite/semantic_segmentation/metric.py +275 -0
  32. valor_lite/semantic_segmentation/shared.py +149 -0
  33. valor_lite/semantic_segmentation/utilities.py +88 -0
  34. valor_lite/text_generation/__init__.py +15 -0
  35. valor_lite/text_generation/annotation.py +56 -0
  36. valor_lite/text_generation/computation.py +611 -0
  37. valor_lite/text_generation/llm/__init__.py +0 -0
  38. valor_lite/text_generation/llm/exceptions.py +14 -0
  39. valor_lite/text_generation/llm/generation.py +903 -0
  40. valor_lite/text_generation/llm/instructions.py +814 -0
  41. valor_lite/text_generation/llm/integrations.py +226 -0
  42. valor_lite/text_generation/llm/utilities.py +43 -0
  43. valor_lite/text_generation/llm/validators.py +68 -0
  44. valor_lite/text_generation/manager.py +697 -0
  45. valor_lite/text_generation/metric.py +381 -0
  46. valor_lite-0.37.1.dist-info/METADATA +174 -0
  47. valor_lite-0.37.1.dist-info/RECORD +49 -0
  48. valor_lite-0.37.1.dist-info/WHEEL +5 -0
  49. valor_lite-0.37.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,378 @@
1
+ from enum import IntFlag
2
+
3
+ import numpy as np
4
+ from numpy.typing import NDArray
5
+
6
+ import valor_lite.classification.numpy_compatibility as npc
7
+
8
+
9
+ def compute_rocauc(
10
+ rocauc: NDArray[np.float64],
11
+ array: NDArray[np.float64],
12
+ gt_count_per_label: NDArray[np.uint64],
13
+ pd_count_per_label: NDArray[np.uint64],
14
+ n_labels: int,
15
+ prev: NDArray[np.uint64],
16
+ ) -> tuple[NDArray[np.float64], NDArray[np.uint64]]:
17
+ """
18
+ Compute ROCAUC.
19
+
20
+ Parameters
21
+ ----------
22
+ rocauc : NDArray[np.float64]
23
+ The running ROCAUC.
24
+ array : NDArray[np.float64]
25
+ An sorted array of ROCAUC intermediate values with shape (n_pairs, 3).
26
+ Index 0 - Prediction Label Index
27
+ Index 1 - Cumulative FP
28
+ Index 2 - Cumulative TP
29
+ gt_count_per_label : NDArray[np.uint64]
30
+ The number of ground truth occurences per label.
31
+ pd_count_per_label : NDArray[np.uint64]
32
+ The number of prediction occurences per label.
33
+ n_labels : int
34
+ The total number of unqiue labels.
35
+ prev : NDArray[np.uint64]
36
+ The previous cumulative sum for FP's and TP's. Used as intermediate in chunking operations.
37
+
38
+ Returns
39
+ -------
40
+ NDArray[np.float64]
41
+ ROCAUC.
42
+ NDArray[np.uint64]
43
+ The previous cumulative sum for FP's. Used as intermediate in chunking operations.
44
+ NDArray[np.uint64]
45
+ The previous cumulative sum for TP's. Used as intermediate in chunking operations.
46
+ """
47
+ pd_labels = array[:, 0]
48
+ cumulative_fp = array[:, 1]
49
+ cumulative_tp = array[:, 2]
50
+
51
+ positive_count = gt_count_per_label
52
+ negative_count = pd_count_per_label - gt_count_per_label
53
+
54
+ for label_idx in range(n_labels):
55
+ mask_pds = pd_labels == label_idx
56
+ n_masked_pds = mask_pds.sum()
57
+ if pd_count_per_label[label_idx] == 0 or n_masked_pds == 0:
58
+ continue
59
+
60
+ fps = cumulative_fp[mask_pds]
61
+ tps = cumulative_tp[mask_pds]
62
+ if prev[label_idx, 0] > 0 or prev[label_idx, 1] > 0:
63
+ fps = np.r_[prev[label_idx, 0], fps]
64
+ tps = np.r_[prev[label_idx, 1], tps]
65
+
66
+ prev[label_idx, 0] = fps[-1]
67
+ prev[label_idx, 1] = tps[-1]
68
+
69
+ if fps.size == 1:
70
+ continue
71
+
72
+ fpr = np.zeros_like(fps, dtype=np.float64)
73
+ np.divide(
74
+ fps,
75
+ negative_count[label_idx],
76
+ where=negative_count[label_idx] > 0,
77
+ out=fpr,
78
+ )
79
+ tpr = np.zeros_like(tps, dtype=np.float64)
80
+ np.divide(
81
+ tps,
82
+ positive_count[label_idx],
83
+ where=positive_count[label_idx] > 0,
84
+ out=tpr,
85
+ )
86
+
87
+ # compute rocauc
88
+ rocauc[label_idx] += npc.trapezoid(x=fpr, y=tpr, axis=0)
89
+
90
+ return rocauc, prev
91
+
92
+
93
+ def compute_counts(
94
+ ids: NDArray[np.int64],
95
+ scores: NDArray[np.float64],
96
+ winners: NDArray[np.bool_],
97
+ score_thresholds: NDArray[np.float64],
98
+ hardmax: bool,
99
+ n_labels: int,
100
+ ) -> NDArray[np.uint64]:
101
+ """
102
+ Computes counts of TP, FP and FN's per label.
103
+
104
+ Parameters
105
+ ----------
106
+ ids : NDArray[np.int64]
107
+ A sorted array of classification pairs with shape (n_pairs, 3).
108
+ Index 0 - Datum Index
109
+ Index 1 - GroundTruth Label Index
110
+ Index 2 - Prediction Label Index
111
+ scores : NDArray[np.float64]
112
+ A sorted array of classification scores with shape (n_pairs,).
113
+ winner : NDArray[np.bool_]
114
+ Marks predictions with highest score over a datum.
115
+ score_thresholds : NDArray[np.float64]
116
+ A 1-D array contains score thresholds to compute metrics over.
117
+ hardmax : bool
118
+ Option to only allow a single positive prediction.
119
+ n_labels : int
120
+ The total number of unqiue labels.
121
+
122
+ Returns
123
+ -------
124
+ NDArray[np.int32]
125
+ TP, FP, FN, TN counts.
126
+ """
127
+ n_scores = score_thresholds.shape[0]
128
+ counts = np.zeros((n_scores, n_labels, 4), dtype=np.uint64)
129
+ if ids.size == 0:
130
+ return counts
131
+
132
+ gt_labels = ids[:, 1]
133
+ pd_labels = ids[:, 2]
134
+
135
+ mask_matching_labels = np.isclose(gt_labels, pd_labels)
136
+ mask_score_nonzero = ~np.isclose(scores, 0.0)
137
+ mask_hardmax = winners > 0.5
138
+ mask_valid_gts = gt_labels >= 0
139
+ mask_valid_pds = pd_labels >= 0
140
+
141
+ # calculate metrics at various score thresholds
142
+ for score_idx in range(n_scores):
143
+ mask_score_threshold = scores >= score_thresholds[score_idx]
144
+ mask_score = mask_score_nonzero & mask_score_threshold
145
+
146
+ if hardmax:
147
+ mask_score &= mask_hardmax
148
+
149
+ mask_tp = mask_matching_labels & mask_score
150
+ mask_fp = ~mask_matching_labels & mask_score
151
+ mask_fn = (mask_matching_labels & ~mask_score) | mask_fp
152
+ mask_tn = ~mask_matching_labels & ~mask_score
153
+
154
+ mask_fn &= mask_valid_gts
155
+ mask_fp &= mask_valid_pds
156
+
157
+ fn = np.unique(ids[mask_fn][:, [0, 1]].astype(int), axis=0)
158
+ tn = np.unique(ids[mask_tn][:, [0, 2]].astype(int), axis=0)
159
+
160
+ counts[score_idx, :, 0] = np.bincount(
161
+ pd_labels[mask_tp], minlength=n_labels
162
+ )
163
+ counts[score_idx, :, 1] = np.bincount(
164
+ pd_labels[mask_fp], minlength=n_labels
165
+ )
166
+ counts[score_idx, :, 2] = np.bincount(fn[:, 1], minlength=n_labels)
167
+ counts[score_idx, :, 3] = np.bincount(tn[:, 1], minlength=n_labels)
168
+
169
+ return counts
170
+
171
+
172
+ def compute_precision(counts: NDArray[np.uint64]) -> NDArray[np.float64]:
173
+ """
174
+ Compute precision metric using result of compute_counts.
175
+ """
176
+ n_scores, n_labels, _ = counts.shape
177
+ precision = np.zeros((n_scores, n_labels), dtype=np.float64)
178
+ np.divide(
179
+ counts[:, :, 0],
180
+ (counts[:, :, 0] + counts[:, :, 1]),
181
+ where=(counts[:, :, 0] + counts[:, :, 1]) > 0,
182
+ out=precision,
183
+ )
184
+ return precision
185
+
186
+
187
+ def compute_recall(counts: NDArray[np.uint64]) -> NDArray[np.float64]:
188
+ """
189
+ Compute recall metric using result of compute_counts.
190
+ """
191
+ n_scores, n_labels, _ = counts.shape
192
+ recall = np.zeros((n_scores, n_labels), dtype=np.float64)
193
+ np.divide(
194
+ counts[:, :, 0],
195
+ (counts[:, :, 0] + counts[:, :, 2]),
196
+ where=(counts[:, :, 0] + counts[:, :, 2]) > 0,
197
+ out=recall,
198
+ )
199
+ return recall
200
+
201
+
202
+ def compute_f1_score(
203
+ precision: NDArray[np.float64], recall: NDArray[np.float64]
204
+ ) -> NDArray[np.float64]:
205
+ """
206
+ Compute f1 metric using result of compute_precision and compute_recall.
207
+ """
208
+ f1_score = np.zeros_like(recall)
209
+ np.divide(
210
+ (2 * precision * recall),
211
+ (precision + recall),
212
+ where=(precision + recall) > 1e-9,
213
+ out=f1_score,
214
+ )
215
+ return f1_score
216
+
217
+
218
+ def compute_accuracy(
219
+ counts: NDArray[np.uint64], n_datums: int
220
+ ) -> NDArray[np.float64]:
221
+ """
222
+ Compute accuracy metric using result of compute_counts.
223
+ """
224
+ n_scores, _, _ = counts.shape
225
+ accuracy = np.zeros(n_scores, dtype=np.float64)
226
+ if n_datums == 0:
227
+ return accuracy
228
+ np.divide(
229
+ counts[:, :, 0].sum(axis=1),
230
+ n_datums,
231
+ out=accuracy,
232
+ )
233
+ return accuracy
234
+
235
+
236
+ class PairClassification(IntFlag):
237
+ TP = 1 << 0
238
+ FP_FN_MISCLF = 1 << 1
239
+ FN_UNMATCHED = 1 << 2
240
+
241
+
242
+ def compute_pair_classifications(
243
+ ids: NDArray[np.int64],
244
+ scores: NDArray[np.float64],
245
+ winners: NDArray[np.bool_],
246
+ score_thresholds: NDArray[np.float64],
247
+ hardmax: bool,
248
+ ) -> tuple[NDArray[np.bool_], NDArray[np.bool_], NDArray[np.bool_]]:
249
+ """
250
+ Classifiy ID pairs as TP, FP or FN.
251
+
252
+ Parameters
253
+ ----------
254
+ ids : NDArray[np.int64]
255
+ A sorted array of classification pairs with shape (n_pairs, 3).
256
+ Index 0 - Datum Index
257
+ Index 1 - GroundTruth Label Index
258
+ Index 2 - Prediction Label Index
259
+ scores : NDArray[np.float64]
260
+ A sorted array of classification scores with shape (n_pairs,).
261
+ winner : NDArray[np.bool_]
262
+ Marks predictions with highest score over a datum.
263
+ score_thresholds : NDArray[np.float64]
264
+ A 1-D array containing score thresholds.
265
+ hardmax : bool
266
+ Option to only allow a single positive prediction.
267
+
268
+ Returns
269
+ -------
270
+ NDArray[bool]
271
+ True-positive mask.
272
+ NDArray[bool]
273
+ Misclassification FP, FN mask.
274
+ NDArray[bool]
275
+ Unmatched FN mask.
276
+ """
277
+ n_pairs = ids.shape[0]
278
+ n_scores = score_thresholds.shape[0]
279
+
280
+ gt_labels = ids[:, 1]
281
+ pd_labels = ids[:, 2]
282
+ groundtruths = ids[:, [0, 1]]
283
+
284
+ pair_classifications = np.zeros(
285
+ (n_scores, n_pairs),
286
+ dtype=np.uint8,
287
+ )
288
+
289
+ mask_label_match = np.isclose(gt_labels, pd_labels)
290
+ mask_score = scores > 1e-9
291
+ for score_idx in range(n_scores):
292
+ mask_score &= scores >= score_thresholds[score_idx]
293
+ if hardmax:
294
+ mask_score &= winners
295
+
296
+ mask_true_positives = mask_label_match & mask_score
297
+ mask_misclassifications = ~mask_label_match & mask_score
298
+ mask_unmatched_groundtruths = ~(
299
+ (
300
+ groundtruths.reshape(-1, 1, 2)
301
+ == groundtruths[mask_score].reshape(1, -1, 2)
302
+ )
303
+ .all(axis=2)
304
+ .any(axis=1)
305
+ )
306
+
307
+ # classify pairings
308
+ pair_classifications[score_idx, mask_true_positives] |= np.uint8(
309
+ PairClassification.TP
310
+ )
311
+ pair_classifications[score_idx, mask_misclassifications] |= np.uint8(
312
+ PairClassification.FP_FN_MISCLF
313
+ )
314
+ pair_classifications[
315
+ score_idx, mask_unmatched_groundtruths
316
+ ] |= np.uint8(PairClassification.FN_UNMATCHED)
317
+
318
+ mask_tp = np.bitwise_and(pair_classifications, PairClassification.TP) > 0
319
+ mask_fp_fn_misclf = (
320
+ np.bitwise_and(pair_classifications, PairClassification.FP_FN_MISCLF)
321
+ > 0
322
+ )
323
+ mask_fn_unmatched = (
324
+ np.bitwise_and(pair_classifications, PairClassification.FN_UNMATCHED)
325
+ > 0
326
+ )
327
+
328
+ return (
329
+ mask_tp,
330
+ mask_fp_fn_misclf,
331
+ mask_fn_unmatched,
332
+ )
333
+
334
+
335
+ def compute_confusion_matrix(
336
+ ids: NDArray[np.int64],
337
+ mask_tp: NDArray[np.bool_],
338
+ mask_fp_fn_misclf: NDArray[np.bool_],
339
+ mask_fn_unmatched: NDArray[np.bool_],
340
+ score_thresholds: NDArray[np.float64],
341
+ n_labels: int,
342
+ ):
343
+ """
344
+ Compute confusion matrix using output of compute_pair_classifications.
345
+ """
346
+ n_scores = score_thresholds.size
347
+
348
+ # initialize arrays
349
+ confusion_matrices = np.zeros(
350
+ (n_scores, n_labels, n_labels), dtype=np.uint64
351
+ )
352
+ unmatched_groundtruths = np.zeros((n_scores, n_labels), dtype=np.uint64)
353
+
354
+ mask_matched = mask_tp | mask_fp_fn_misclf
355
+ for score_idx in range(n_scores):
356
+ # matched annotations
357
+ unique_pairs = np.unique(
358
+ ids[np.ix_(mask_matched[score_idx], (0, 1, 2))], # type: ignore - numpy ix_ typing
359
+ axis=0,
360
+ )
361
+ unique_labels, unique_label_counts = np.unique(
362
+ unique_pairs[:, (1, 2)], axis=0, return_counts=True
363
+ )
364
+ confusion_matrices[
365
+ score_idx, unique_labels[:, 0], unique_labels[:, 1]
366
+ ] = unique_label_counts
367
+
368
+ # unmatched groundtruths
369
+ unique_pairs = np.unique(
370
+ ids[np.ix_(mask_fn_unmatched[score_idx], (0, 1))], # type: ignore - numpy ix_ typing
371
+ axis=0,
372
+ )
373
+ unique_labels, unique_label_counts = np.unique(
374
+ unique_pairs[:, 1], return_counts=True
375
+ )
376
+ unmatched_groundtruths[score_idx, unique_labels] = unique_label_counts
377
+
378
+ return confusion_matrices, unmatched_groundtruths