valor-lite 0.36.6__py3-none-any.whl → 0.37.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. valor_lite/cache/__init__.py +11 -0
  2. valor_lite/cache/compute.py +211 -0
  3. valor_lite/cache/ephemeral.py +302 -0
  4. valor_lite/cache/persistent.py +536 -0
  5. valor_lite/classification/__init__.py +5 -10
  6. valor_lite/classification/annotation.py +4 -0
  7. valor_lite/classification/computation.py +233 -251
  8. valor_lite/classification/evaluator.py +882 -0
  9. valor_lite/classification/loader.py +97 -0
  10. valor_lite/classification/metric.py +141 -4
  11. valor_lite/classification/shared.py +184 -0
  12. valor_lite/classification/utilities.py +221 -118
  13. valor_lite/exceptions.py +5 -0
  14. valor_lite/object_detection/__init__.py +5 -4
  15. valor_lite/object_detection/annotation.py +13 -1
  16. valor_lite/object_detection/computation.py +368 -299
  17. valor_lite/object_detection/evaluator.py +804 -0
  18. valor_lite/object_detection/loader.py +292 -0
  19. valor_lite/object_detection/metric.py +152 -3
  20. valor_lite/object_detection/shared.py +206 -0
  21. valor_lite/object_detection/utilities.py +182 -100
  22. valor_lite/semantic_segmentation/__init__.py +5 -4
  23. valor_lite/semantic_segmentation/annotation.py +7 -0
  24. valor_lite/semantic_segmentation/computation.py +20 -110
  25. valor_lite/semantic_segmentation/evaluator.py +414 -0
  26. valor_lite/semantic_segmentation/loader.py +205 -0
  27. valor_lite/semantic_segmentation/shared.py +149 -0
  28. valor_lite/semantic_segmentation/utilities.py +6 -23
  29. {valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
  30. valor_lite-0.37.5.dist-info/RECORD +49 -0
  31. {valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
  32. valor_lite/classification/manager.py +0 -545
  33. valor_lite/object_detection/manager.py +0 -864
  34. valor_lite/profiling.py +0 -374
  35. valor_lite/semantic_segmentation/benchmark.py +0 -237
  36. valor_lite/semantic_segmentation/manager.py +0 -446
  37. valor_lite-0.36.6.dist-info/RECORD +0 -41
  38. {valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,149 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ import pyarrow as pa
6
+ import pyarrow.compute as pc
7
+
8
+ from valor_lite.cache import FileCacheReader, MemoryCacheReader
9
+
10
+
11
+ @dataclass
12
+ class EvaluatorInfo:
13
+ number_of_rows: int = 0
14
+ number_of_datums: int = 0
15
+ number_of_labels: int = 0
16
+ number_of_pixels: int = 0
17
+ number_of_groundtruth_pixels: int = 0
18
+ number_of_prediction_pixels: int = 0
19
+ metadata_fields: list[tuple[str, str | pa.DataType]] | None = None
20
+
21
+
22
+ def generate_cache_path(path: str | Path) -> Path:
23
+ """Generate cache path from parent directory."""
24
+ return Path(path) / "counts"
25
+
26
+
27
+ def generate_metadata_path(path: str | Path) -> Path:
28
+ """Generate metadata path from parent directory."""
29
+ return Path(path) / "metadata.json"
30
+
31
+
32
+ def generate_schema(
33
+ metadata_fields: list[tuple[str, str | pa.DataType]] | None
34
+ ) -> pa.Schema:
35
+ """Generate PyArrow schema from metadata fields."""
36
+
37
+ metadata_fields = metadata_fields if metadata_fields else []
38
+ reserved_fields = [
39
+ ("datum_uid", pa.string()),
40
+ ("datum_id", pa.int64()),
41
+ # groundtruth
42
+ ("gt_label", pa.string()),
43
+ ("gt_label_id", pa.int64()),
44
+ # prediction
45
+ ("pd_label", pa.string()),
46
+ ("pd_label_id", pa.int64()),
47
+ # pair
48
+ ("count", pa.uint64()),
49
+ ]
50
+
51
+ # validate
52
+ reserved_field_names = {f[0] for f in reserved_fields}
53
+ metadata_field_names = {f[0] for f in metadata_fields}
54
+ if conflicting := reserved_field_names & metadata_field_names:
55
+ raise ValueError(
56
+ f"metadata fields {conflicting} conflict with reserved fields"
57
+ )
58
+
59
+ return pa.schema(
60
+ [
61
+ *reserved_fields,
62
+ *metadata_fields,
63
+ ]
64
+ )
65
+
66
+
67
+ def encode_metadata_fields(
68
+ metadata_fields: list[tuple[str, str | pa.DataType]] | None
69
+ ) -> dict[str, str]:
70
+ """Encode metadata fields into JSON format."""
71
+ metadata_fields = metadata_fields if metadata_fields else []
72
+ return {k: str(v) for k, v in metadata_fields}
73
+
74
+
75
+ def decode_metadata_fields(
76
+ encoded_metadata_fields: dict[str, str]
77
+ ) -> list[tuple[str, str | pa.DataType]]:
78
+ """Decode metadata fields from JSON format."""
79
+ return [(k, v) for k, v in encoded_metadata_fields.items()]
80
+
81
+
82
+ def extract_labels(
83
+ reader: MemoryCacheReader | FileCacheReader,
84
+ index_to_label_override: dict[int, str] | None = None,
85
+ ) -> dict[int, str]:
86
+ if index_to_label_override is not None:
87
+ return index_to_label_override
88
+
89
+ index_to_label = {}
90
+ for tbl in reader.iterate_tables(
91
+ columns=[
92
+ "gt_label_id",
93
+ "gt_label",
94
+ "pd_label_id",
95
+ "pd_label",
96
+ ]
97
+ ):
98
+
99
+ # get gt labels
100
+ gt_label_ids = tbl["gt_label_id"].to_numpy()
101
+ gt_label_ids, gt_indices = np.unique(gt_label_ids, return_index=True)
102
+ gt_labels = tbl["gt_label"].take(gt_indices).to_pylist()
103
+ gt_labels = dict(zip(gt_label_ids.astype(int).tolist(), gt_labels))
104
+ gt_labels.pop(-1, None)
105
+ index_to_label.update(gt_labels)
106
+
107
+ # get pd labels
108
+ pd_label_ids = tbl["pd_label_id"].to_numpy()
109
+ pd_label_ids, pd_indices = np.unique(pd_label_ids, return_index=True)
110
+ pd_labels = tbl["pd_label"].take(pd_indices).to_pylist()
111
+ pd_labels = dict(zip(pd_label_ids.astype(int).tolist(), pd_labels))
112
+ pd_labels.pop(-1, None)
113
+ index_to_label.update(pd_labels)
114
+
115
+ return index_to_label
116
+
117
+
118
+ def extract_counts(
119
+ reader: MemoryCacheReader | FileCacheReader,
120
+ datums: pc.Expression | None = None,
121
+ groundtruths: pc.Expression | None = None,
122
+ predictions: pc.Expression | None = None,
123
+ ):
124
+ n_dts, n_total, n_gts, n_pds = 0, 0, 0, 0
125
+ for tbl in reader.iterate_tables(filter=datums):
126
+
127
+ # count datums
128
+ n_dts += int(np.unique(tbl["datum_id"].to_numpy()).shape[0])
129
+
130
+ # count pixels
131
+ n_total += int(tbl["count"].to_numpy().sum())
132
+
133
+ # count groundtruth pixels
134
+ gt_tbl = tbl
135
+ gt_expr = pc.field("gt_label_id") >= 0
136
+ if groundtruths is not None:
137
+ gt_expr &= groundtruths
138
+ gt_tbl = tbl.filter(gt_expr)
139
+ n_gts += int(gt_tbl["count"].to_numpy().sum())
140
+
141
+ # count prediction pixels
142
+ pd_tbl = tbl
143
+ pd_expr = pc.field("pd_label_id") >= 0
144
+ if predictions is not None:
145
+ pd_expr &= predictions
146
+ pd_tbl = tbl.filter(pd_expr)
147
+ n_pds += int(pd_tbl["count"].to_numpy().sum())
148
+
149
+ return n_dts, n_total, n_gts, n_pds
@@ -1,15 +1,11 @@
1
1
  from collections import defaultdict
2
2
 
3
- import numpy as np
4
- from numpy.typing import NDArray
5
-
6
3
  from valor_lite.semantic_segmentation.metric import Metric, MetricType
7
4
 
8
5
 
9
6
  def unpack_precision_recall_iou_into_metric_lists(
10
7
  results: tuple,
11
- label_metadata: NDArray[np.int64],
12
- index_to_label: list[str],
8
+ index_to_label: dict[int, str],
13
9
  ) -> dict[MetricType, list[Metric]]:
14
10
 
15
11
  n_labels = len(index_to_label)
@@ -39,24 +35,20 @@ def unpack_precision_recall_iou_into_metric_lists(
39
35
  "iou": float(ious[gt_label_idx, pd_label_idx])
40
36
  }
41
37
  for pd_label_idx in range(n_labels)
42
- if label_metadata[pd_label_idx, 0] > 0
43
38
  }
44
39
  for gt_label_idx in range(n_labels)
45
- if label_metadata[gt_label_idx, 0] > 0
46
40
  },
47
41
  unmatched_predictions={
48
42
  index_to_label[pd_label_idx]: {
49
43
  "ratio": float(unmatched_prediction_ratios[pd_label_idx])
50
44
  }
51
45
  for pd_label_idx in range(n_labels)
52
- if label_metadata[pd_label_idx, 0] > 0
53
46
  },
54
47
  unmatched_ground_truths={
55
48
  index_to_label[gt_label_idx]: {
56
49
  "ratio": float(unmatched_ground_truth_ratios[gt_label_idx])
57
50
  }
58
51
  for gt_label_idx in range(n_labels)
59
- if label_metadata[gt_label_idx, 0] > 0
60
52
  },
61
53
  )
62
54
  ]
@@ -67,38 +59,29 @@ def unpack_precision_recall_iou_into_metric_lists(
67
59
  )
68
60
  ]
69
61
 
70
- for label_idx, label in enumerate(index_to_label):
71
-
72
- kwargs = {
73
- "label": label,
74
- }
75
-
76
- # if no groundtruths exists for a label, skip it.
77
- if label_metadata[label_idx, 0] == 0:
78
- continue
79
-
62
+ for label_idx, label in index_to_label.items():
80
63
  metrics[MetricType.Precision].append(
81
64
  Metric.precision(
82
65
  value=float(precision[label_idx]),
83
- **kwargs,
66
+ label=label,
84
67
  )
85
68
  )
86
69
  metrics[MetricType.Recall].append(
87
70
  Metric.recall(
88
71
  value=float(recall[label_idx]),
89
- **kwargs,
72
+ label=label,
90
73
  )
91
74
  )
92
75
  metrics[MetricType.F1].append(
93
76
  Metric.f1_score(
94
77
  value=float(f1_score[label_idx]),
95
- **kwargs,
78
+ label=label,
96
79
  )
97
80
  )
98
81
  metrics[MetricType.IOU].append(
99
82
  Metric.iou(
100
83
  value=float(ious[label_idx, label_idx]),
101
- **kwargs,
84
+ label=label,
102
85
  )
103
86
  )
104
87
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: valor-lite
3
- Version: 0.36.6
3
+ Version: 0.37.5
4
4
  Summary: Evaluate machine learning models.
5
5
  Project-URL: homepage, https://www.striveworks.com
6
6
  Requires-Python: >=3.10
@@ -8,6 +8,7 @@ Description-Content-Type: text/markdown
8
8
  Requires-Dist: numpy
9
9
  Requires-Dist: tqdm
10
10
  Requires-Dist: shapely
11
+ Requires-Dist: pyarrow
11
12
  Provides-Extra: nlp
12
13
  Requires-Dist: evaluate; extra == "nlp"
13
14
  Requires-Dist: nltk; extra == "nlp"
@@ -31,6 +32,7 @@ Provides-Extra: benchmark
31
32
  Requires-Dist: requests; extra == "benchmark"
32
33
  Provides-Extra: dev
33
34
  Requires-Dist: valor-lite[benchmark,docs,mistral,nlp,openai,test]; extra == "dev"
35
+ Requires-Dist: pyarrow-stubs; extra == "dev"
34
36
 
35
37
  # valor-lite: Fast, local machine learning evaluation.
36
38
 
@@ -0,0 +1,49 @@
1
+ valor_lite/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
2
+ valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ valor_lite/exceptions.py,sha256=Odg4m7VfTYtGunzA-JpNWGoDwvXFvCamkQu8WLncE-A,496
4
+ valor_lite/schemas.py,sha256=pB0MrPx5qFLbwBWDiOUUm-vmXdWvbJLFCBmKgbcbI5g,198
5
+ valor_lite/cache/__init__.py,sha256=OzVJq6WMXOmhJPQLOVu9y0xEui41PDpwGnkrU6A3-dA,266
6
+ valor_lite/cache/compute.py,sha256=1OP7h_00asTydA3ZSPZeMgqiwlvxu6RfzsOyvqDAwEg,6906
7
+ valor_lite/cache/ephemeral.py,sha256=BPUtVVE3RuU0_gB87CPZUBTbnUbcdz4MMsUhs_YLzDE,8164
8
+ valor_lite/cache/persistent.py,sha256=3XkMvGtUdU5ZK1Q4p2ipU97p-MYd3KS9ZTDfV46SxG0,15319
9
+ valor_lite/classification/__init__.py,sha256=5cAK1exDJGGhZWFcswj977-qVtEafUTsNIg7stJV2oc,293
10
+ valor_lite/classification/annotation.py,sha256=93NzpfpBhkyLq33gLtAqkCl0TTmraDGy03cSjAxMWec,1304
11
+ valor_lite/classification/computation.py,sha256=x2IINIDHP_52GTCdkTgu8ujx3o3HGosdWE0_oQvNZuU,11498
12
+ valor_lite/classification/evaluator.py,sha256=h0B3NTR746mVIraICPLDw_xls1RBAeFyc39cBm0vcpw,29150
13
+ valor_lite/classification/loader.py,sha256=t976u0XGBuQGrG6VgxPViu-Ipbj5sUm_ShW8FV0YGxg,3355
14
+ valor_lite/classification/metric.py,sha256=4ZoP9f36DqKnDOif40kjwznJc0fw93F1yx3gvX_lnz8,16104
15
+ valor_lite/classification/numpy_compatibility.py,sha256=roqtTetsm1_HxuaejrthQdydjsRIy-FpXpGb86cLh_E,365
16
+ valor_lite/classification/shared.py,sha256=J3lrcJQrSSEXc1qv7s9CIU3UF_XEZxd6flki1I0nhfA,5653
17
+ valor_lite/classification/utilities.py,sha256=NZP830x0noKSZCC6-Hc_K34JzxLsHgU8Zcq0YjTlH90,10218
18
+ valor_lite/object_detection/__init__.py,sha256=GARztCi3rYlq3gltJ0lDBQK0bVYr5Upwc_M3_Cl_RMg,363
19
+ valor_lite/object_detection/annotation.py,sha256=--uvHOS7vPIz3cqArp8dZM4Ax9_lzW0HCaz626ZULDM,7684
20
+ valor_lite/object_detection/computation.py,sha256=8A9p4uG2lf0S7SIR3paF8jBHJJ6f7z_CQ0ZmOioT1nM,26528
21
+ valor_lite/object_detection/evaluator.py,sha256=LIL2a5jyXvCx7BuPmBEz1B63PPanIJeUcSp3QJTmaCE,27821
22
+ valor_lite/object_detection/loader.py,sha256=kPGG8UXCBDZl10qfDv3YqailgdHk3O4DyZQ8pqkVhJc,11428
23
+ valor_lite/object_detection/metric.py,sha256=-bcupJvS_BSWNPN2trkO8EWD-z7TDFGGim7fIHpzPzw,27924
24
+ valor_lite/object_detection/shared.py,sha256=X1lE3GxOzmS_3PZt2L9Su1Z-jSvz7cQ0IInGFSxu_fY,6292
25
+ valor_lite/object_detection/utilities.py,sha256=zoez6MaBx0IwxJf-zDQMkCh80lQf0zB4Fl1xluFTtnY,14526
26
+ valor_lite/semantic_segmentation/__init__.py,sha256=OeAKuANM2mvw3JX4pi-eudc82YMqsXJwK1DIjgl2oeI,318
27
+ valor_lite/semantic_segmentation/annotation.py,sha256=XB54BcBu_soQvbP3DrbXCruw-sypJBC6KhLqRWX1Vmw,4384
28
+ valor_lite/semantic_segmentation/computation.py,sha256=dhbwybDe5kQGXzUSOjm15UGjDoGGt5zPtvPyvp8kFc4,4690
29
+ valor_lite/semantic_segmentation/evaluator.py,sha256=jXFBPv_pmIOj85wJFFikbNNWDq8wPG2rcLiaWrgDcUs,13569
30
+ valor_lite/semantic_segmentation/loader.py,sha256=rDtbeGhQEDhGjcjx9RHiSATQM25uftF13c1pm09axUM,7582
31
+ valor_lite/semantic_segmentation/metric.py,sha256=T9RfPJf4WgqGQTXYvSy08vJG5bjXXJnyYZeW0mlxMa8,7132
32
+ valor_lite/semantic_segmentation/shared.py,sha256=i9F7nAoH9Yhabqj-SVtnQfm2ST0xkVFUcQVBVLabPeE,4539
33
+ valor_lite/semantic_segmentation/utilities.py,sha256=MjqSlS1wy0RwJGiuUpZrT3AUMXEahO7-lh2oeNeqGV8,2470
34
+ valor_lite/text_generation/__init__.py,sha256=pGhpWCSZjLM0pPHCtPykAfos55B8ie3mi9EzbNxfj-U,356
35
+ valor_lite/text_generation/annotation.py,sha256=O5aXiwCS4WjA-fqn4ly-O0MsTHoIOmqxqCaAp9IeI3M,1270
36
+ valor_lite/text_generation/computation.py,sha256=hGDkPfzWY9SDTdozd-nArexJ3ZSNlCIWqHGoD8vO2Cc,18652
37
+ valor_lite/text_generation/manager.py,sha256=C4QwvronGHXmYSkaRmUGy7TN0C0aeyDx9Hb-ClNYXK4,24810
38
+ valor_lite/text_generation/metric.py,sha256=C9gbWejjOJ23JVLecuUhYW5rkx30NUCfRtgsM46uMds,10409
39
+ valor_lite/text_generation/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ valor_lite/text_generation/llm/exceptions.py,sha256=w4eYSJIJQ_jWuCmquCB6ETr_st_LzbDRlhnlPeqwmfo,349
41
+ valor_lite/text_generation/llm/generation.py,sha256=XKPjCxPUZHiWInQSO7wLOb0YtMFLu50s8rHZe1Yz0s0,28954
42
+ valor_lite/text_generation/llm/instructions.py,sha256=fz2onBZZWcl5W8iy7zEWkPGU9N07ez6O7SxZA5M2xe4,34056
43
+ valor_lite/text_generation/llm/integrations.py,sha256=-rTfdAjq1zH-4ixwYuMQEOQ80pIFzMTe0BYfroVx3Pg,6974
44
+ valor_lite/text_generation/llm/utilities.py,sha256=bjqatGgtVTcl1PrMwiDKTYPGJXKrBrx7PDtzIblGSys,1178
45
+ valor_lite/text_generation/llm/validators.py,sha256=Wzr5RlfF58_2wOU-uTw7C8skan_fYdhy4Gfn0jSJ8HM,2700
46
+ valor_lite-0.37.5.dist-info/METADATA,sha256=nHuvYmvGYZAWh9Wc5LQqI6vRWlSd9gGhW3PUk5FgBoM,5139
47
+ valor_lite-0.37.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
48
+ valor_lite-0.37.5.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
49
+ valor_lite-0.37.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5