valor-lite 0.32.2a2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. valor_lite-0.32.2a2/LICENSE +21 -0
  2. valor_lite-0.32.2a2/PKG-INFO +40 -0
  3. valor_lite-0.32.2a2/README.md +1 -0
  4. valor_lite-0.32.2a2/benchmarks/.gitignore +2 -0
  5. valor_lite-0.32.2a2/benchmarks/benchmark_objdet.py +330 -0
  6. valor_lite-0.32.2a2/examples/.gitignore +1 -0
  7. valor_lite-0.32.2a2/examples/coco-yolo.ipynb +442 -0
  8. valor_lite-0.32.2a2/pyproject.toml +38 -0
  9. valor_lite-0.32.2a2/setup.cfg +4 -0
  10. valor_lite-0.32.2a2/tests/detection/__init__.py +0 -0
  11. valor_lite-0.32.2a2/tests/detection/conftest.py +504 -0
  12. valor_lite-0.32.2a2/tests/detection/test_average_precision.py +623 -0
  13. valor_lite-0.32.2a2/tests/detection/test_average_recall.py +246 -0
  14. valor_lite-0.32.2a2/tests/detection/test_counts.py +457 -0
  15. valor_lite-0.32.2a2/tests/detection/test_dataloader.py +34 -0
  16. valor_lite-0.32.2a2/tests/detection/test_detailed_pr_curve.py +882 -0
  17. valor_lite-0.32.2a2/tests/detection/test_evaluator.py +31 -0
  18. valor_lite-0.32.2a2/tests/detection/test_filtering.py +401 -0
  19. valor_lite-0.32.2a2/tests/detection/test_iou.py +30 -0
  20. valor_lite-0.32.2a2/tests/detection/test_pr_curve.py +177 -0
  21. valor_lite-0.32.2a2/tests/detection/test_precision.py +389 -0
  22. valor_lite-0.32.2a2/tests/detection/test_recall.py +389 -0
  23. valor_lite-0.32.2a2/tests/detection/test_schemas.py +105 -0
  24. valor_lite-0.32.2a2/tests/detection/test_stability.py +87 -0
  25. valor_lite-0.32.2a2/valor_lite/__init__.py +0 -0
  26. valor_lite-0.32.2a2/valor_lite/detection/__init__.py +56 -0
  27. valor_lite-0.32.2a2/valor_lite/detection/annotation.py +54 -0
  28. valor_lite-0.32.2a2/valor_lite/detection/computation.py +506 -0
  29. valor_lite-0.32.2a2/valor_lite/detection/manager.py +845 -0
  30. valor_lite-0.32.2a2/valor_lite/detection/metric.py +357 -0
  31. valor_lite-0.32.2a2/valor_lite/schemas.py +15 -0
  32. valor_lite-0.32.2a2/valor_lite.egg-info/PKG-INFO +40 -0
  33. valor_lite-0.32.2a2/valor_lite.egg-info/SOURCES.txt +34 -0
  34. valor_lite-0.32.2a2/valor_lite.egg-info/dependency_links.txt +1 -0
  35. valor_lite-0.32.2a2/valor_lite.egg-info/requires.txt +11 -0
  36. valor_lite-0.32.2a2/valor_lite.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Striveworks
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,40 @@
1
+ Metadata-Version: 2.1
2
+ Name: valor-lite
3
+ Version: 0.32.2a2
4
+ Summary: Compute valor metrics directly in your client.
5
+ License: MIT License
6
+
7
+ Copyright (c) 2023 Striveworks
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
27
+ Project-URL: homepage, https://www.striveworks.com
28
+ Requires-Python: >=3.10
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: Pillow>=9.1.0
32
+ Requires-Dist: importlib_metadata; python_version < "3.8"
33
+ Requires-Dist: tqdm
34
+ Requires-Dist: requests
35
+ Requires-Dist: numpy
36
+ Provides-Extra: test
37
+ Requires-Dist: pytest; extra == "test"
38
+ Requires-Dist: coverage; extra == "test"
39
+
40
+ # valor-lite: Compute classification, object detection, and segmentation metrics locally.
@@ -0,0 +1 @@
1
+ # valor-lite: Compute classification, object detection, and segmentation metrics locally.
@@ -0,0 +1,2 @@
1
+ *.json
2
+ *.jsonl
@@ -0,0 +1,330 @@
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from time import time
8
+
9
+ import requests
10
+ from tqdm import tqdm
11
+ from valor_lite.detection import DataLoader
12
+
13
+
14
+ class AnnotationType(str, Enum):
15
+ NONE = "none"
16
+ BOX = "box"
17
+ POLYGON = "polygon"
18
+ MULTIPOLYGON = "multipolygon"
19
+ RASTER = "raster"
20
+
21
+
22
+ def time_it(fn):
23
+ def wrapper(*args, **kwargs):
24
+ start = time()
25
+ results = fn(*args, **kwargs)
26
+ return (time() - start, results)
27
+
28
+ return wrapper
29
+
30
+
31
+ def download_data_if_not_exists(
32
+ file_name: str,
33
+ file_path: Path,
34
+ url: str,
35
+ ):
36
+ """Download the data from a public bucket if it doesn't exist locally."""
37
+
38
+ if not os.path.exists(file_path):
39
+ response = requests.get(url, stream=True)
40
+ if response.status_code == 200:
41
+ total_size = int(response.headers.get("content-length", 0))
42
+ with open(file_path, "wb") as f:
43
+ with tqdm(
44
+ total=total_size,
45
+ unit="B",
46
+ unit_scale=True,
47
+ unit_divisor=1024,
48
+ desc=file_name,
49
+ ) as pbar:
50
+ for chunk in response.iter_content(chunk_size=1024):
51
+ if chunk:
52
+ f.write(chunk)
53
+ pbar.update(1024)
54
+ else:
55
+ raise RuntimeError(response)
56
+ else:
57
+ print(f"{file_name} already exists locally.")
58
+
59
+ # sort file by datum uid
60
+ with open(file_path, "r") as f:
61
+ lines = [x for x in f]
62
+ with open(file_path, "w") as f:
63
+ for line in sorted(
64
+ lines, key=lambda x: int(json.loads(x)["datum"]["uid"])
65
+ ):
66
+ f.write(line)
67
+
68
+
69
+ def write_results_to_file(write_path: Path, results: list[dict]):
70
+ """Write results to manager_results.json"""
71
+ current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
72
+ if os.path.isfile(write_path):
73
+ with open(write_path, "r") as file:
74
+ file.seek(0)
75
+ data = json.load(file)
76
+ else:
77
+ data = {}
78
+
79
+ data[current_datetime] = results
80
+
81
+ with open(write_path, "w+") as file:
82
+ json.dump(data, file, indent=4)
83
+
84
+
85
+ @time_it
86
+ def ingest(
87
+ manager: DataLoader,
88
+ gt_path: Path,
89
+ pd_path: Path,
90
+ limit: int,
91
+ chunk_size: int,
92
+ ):
93
+ accumulated_time = 0.0
94
+ with open(gt_path, "r") as gf:
95
+ with open(pd_path, "r") as pf:
96
+
97
+ count = 0
98
+ groundtruths = []
99
+ predictions = []
100
+ for gline, pline in zip(gf, pf):
101
+
102
+ # groundtruth
103
+ gt_dict = json.loads(gline)
104
+ groundtruths.append(gt_dict)
105
+
106
+ # prediction
107
+ pd_dict = json.loads(pline)
108
+ predictions.append(pd_dict)
109
+
110
+ count += 1
111
+ if count >= limit and limit > 0:
112
+ break
113
+ elif len(groundtruths) < chunk_size or chunk_size == -1:
114
+ continue
115
+
116
+ timer, _ = time_it(manager.add_data_from_valor_dict)(
117
+ zip(groundtruths, predictions), True
118
+ )
119
+ accumulated_time += timer
120
+ groundtruths = []
121
+ predictions = []
122
+
123
+ if groundtruths:
124
+ timer, _ = time_it(manager.add_data_from_valor_dict)(
125
+ zip(groundtruths, predictions), True
126
+ )
127
+ accumulated_time += timer
128
+
129
+ return accumulated_time
130
+
131
+
132
+ @dataclass
133
+ class Benchmark:
134
+ limit: int
135
+ n_datums: int
136
+ n_groundtruths: int
137
+ n_predictions: int
138
+ n_labels: int
139
+ gt_type: AnnotationType
140
+ pd_type: AnnotationType
141
+ chunk_size: int
142
+ ingestion: float
143
+ preprocessing: float
144
+ precomputation: float
145
+ evaluation: float
146
+ detailed_curves: list[tuple[int, float]]
147
+
148
+ def result(self) -> dict:
149
+ return {
150
+ "limit": self.limit,
151
+ "n_datums": self.n_datums,
152
+ "n_groundtruths": self.n_groundtruths,
153
+ "n_predictions": self.n_predictions,
154
+ "n_labels": self.n_labels,
155
+ "dtype": {
156
+ "groundtruth": self.gt_type.value,
157
+ "prediction": self.pd_type.value,
158
+ },
159
+ "chunk_size": self.chunk_size,
160
+ "ingestion": {
161
+ "loading_from_file": f"{round(self.ingestion - self.preprocessing, 2)} seconds",
162
+ "numpy_conversion + IoU": f"{round(self.preprocessing, 2)} seconds",
163
+ "ranking_pairs": f"{round(self.precomputation, 2)} seconds",
164
+ "total": f"{round(self.ingestion + self.precomputation, 2)} seconds",
165
+ },
166
+ "base_evaluation": f"{round(self.evaluation, 2)} seconds",
167
+ "detailed_pr_curve": [
168
+ {
169
+ "n_points": 10,
170
+ "n_examples": curve[0],
171
+ "computation": f"{round(curve[1], 2)} seconds",
172
+ }
173
+ for curve in self.detailed_curves
174
+ ],
175
+ }
176
+
177
+
178
+ def run_benchmarking_analysis(
179
+ limits_to_test: list[int],
180
+ combinations: list[tuple[AnnotationType, AnnotationType]] | None = None,
181
+ results_file: str = "manager_results.json",
182
+ chunk_size: int = -1,
183
+ compute_pr: bool = True,
184
+ compute_detailed: bool = True,
185
+ ingestion_timeout=30,
186
+ evaluation_timeout=30,
187
+ ):
188
+ """Time various function calls and export the results."""
189
+ current_directory = Path(__file__).parent
190
+ write_path = current_directory / Path(results_file)
191
+
192
+ gt_box_filename = "gt_objdet_coco_bbox.jsonl"
193
+ gt_polygon_filename = "gt_objdet_coco_polygon.jsonl"
194
+ gt_multipolygon_filename = "gt_objdet_coco_raster_multipolygon.jsonl"
195
+ gt_raster_filename = "gt_objdet_coco_raster_bitmask.jsonl"
196
+ pd_box_filename = "pd_objdet_yolo_bbox.jsonl"
197
+ pd_polygon_filename = "pd_objdet_yolo_polygon.jsonl"
198
+ pd_multipolygon_filename = "pd_objdet_yolo_multipolygon.jsonl"
199
+ pd_raster_filename = "pd_objdet_yolo_raster.jsonl"
200
+
201
+ groundtruth_caches = {
202
+ AnnotationType.BOX: gt_box_filename,
203
+ AnnotationType.POLYGON: gt_polygon_filename,
204
+ AnnotationType.MULTIPOLYGON: gt_multipolygon_filename,
205
+ AnnotationType.RASTER: gt_raster_filename,
206
+ }
207
+ prediction_caches = {
208
+ AnnotationType.BOX: pd_box_filename,
209
+ AnnotationType.POLYGON: pd_polygon_filename,
210
+ AnnotationType.MULTIPOLYGON: pd_multipolygon_filename,
211
+ AnnotationType.RASTER: pd_raster_filename,
212
+ }
213
+
214
+ # default is to perform all combinations
215
+ if combinations is None:
216
+ combinations = [
217
+ (gt_type, pd_type)
218
+ for gt_type in groundtruth_caches
219
+ for pd_type in prediction_caches
220
+ ]
221
+
222
+ # cache data locally
223
+ filenames = [
224
+ *list(groundtruth_caches.values()),
225
+ *list(prediction_caches.values()),
226
+ ]
227
+ for filename in filenames:
228
+ file_path = current_directory / Path(filename)
229
+ url = f"https://pub-fae71003f78140bdaedf32a7c8d331d2.r2.dev/{filename}"
230
+ download_data_if_not_exists(
231
+ file_name=filename, file_path=file_path, url=url
232
+ )
233
+
234
+ # iterate through datum limits
235
+ results = list()
236
+ for limit in limits_to_test:
237
+ for gt_type, pd_type in combinations:
238
+
239
+ gt_filename = groundtruth_caches[gt_type]
240
+ pd_filename = prediction_caches[pd_type]
241
+
242
+ # === Base Evaluation ===
243
+ manager = DataLoader()
244
+
245
+ # ingest + preprocess
246
+ (ingest_time, preprocessing_time,) = ingest(
247
+ manager=manager,
248
+ gt_path=current_directory / Path(gt_filename),
249
+ pd_path=current_directory / Path(pd_filename),
250
+ limit=limit,
251
+ chunk_size=chunk_size,
252
+ ) # type: ignore - time_it wrapper
253
+
254
+ finalization_time, evaluator = time_it(manager.finalize)()
255
+
256
+ if ingest_time > ingestion_timeout and ingestion_timeout != -1:
257
+ raise TimeoutError(
258
+ f"Base precomputation timed out with limit of {limit}."
259
+ )
260
+
261
+ # test detailed pr curve with no samples
262
+ detailed_pr_curve_time_no_samples, _ = time_it(
263
+ evaluator.compute_detailed_pr_curve
264
+ )()
265
+
266
+ # test detailed pr curve with 3 samples
267
+ detailed_pr_curve_time_three_samples, _ = time_it(
268
+ evaluator.compute_detailed_pr_curve
269
+ )(n_samples=3)
270
+
271
+ # evaluate
272
+ eval_time, metrics = time_it(evaluator.evaluate)()
273
+ # print(metrics)
274
+ if eval_time > evaluation_timeout and evaluation_timeout != -1:
275
+ raise TimeoutError(
276
+ f"Base evaluation timed out with {evaluator.n_datums} datums."
277
+ )
278
+
279
+ results.append(
280
+ Benchmark(
281
+ limit=limit,
282
+ n_datums=evaluator.n_datums,
283
+ n_groundtruths=evaluator.n_groundtruths,
284
+ n_predictions=evaluator.n_predictions,
285
+ n_labels=evaluator.n_labels,
286
+ gt_type=gt_type,
287
+ pd_type=pd_type,
288
+ chunk_size=chunk_size,
289
+ ingestion=ingest_time,
290
+ preprocessing=preprocessing_time,
291
+ precomputation=finalization_time,
292
+ evaluation=eval_time,
293
+ detailed_curves=[
294
+ (0, detailed_pr_curve_time_no_samples),
295
+ (3, detailed_pr_curve_time_three_samples),
296
+ ],
297
+ ).result()
298
+ )
299
+
300
+ write_results_to_file(write_path=write_path, results=results)
301
+
302
+
303
+ if __name__ == "__main__":
304
+
305
+ # run bounding box benchmark
306
+ run_benchmarking_analysis(
307
+ combinations=[
308
+ (AnnotationType.BOX, AnnotationType.BOX),
309
+ ],
310
+ limits_to_test=[5000, 5000],
311
+ compute_detailed=False,
312
+ )
313
+
314
+ # # run polygon benchmark
315
+ # run_benchmarking_analysis(
316
+ # combinations=[
317
+ # (AnnotationType.POLYGON, AnnotationType.POLYGON),
318
+ # ],
319
+ # limits_to_test=[5000, 5000],
320
+ # compute_detailed=False,
321
+ # )
322
+
323
+ # # run raster benchmark
324
+ # run_benchmarking_analysis(
325
+ # combinations=[
326
+ # (AnnotationType.RASTER, AnnotationType.RASTER),
327
+ # ],
328
+ # limits_to_test=[500, 500],
329
+ # compute_detailed=False,
330
+ # )
@@ -0,0 +1 @@
1
+ !*.ipynb