valor-lite 0.33.4__tar.gz → 0.33.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

Files changed (58) hide show
  1. {valor_lite-0.33.4 → valor_lite-0.33.6}/PKG-INFO +1 -1
  2. valor_lite-0.33.6/benchmarks/benchmark_classification.py +254 -0
  3. {valor_lite-0.33.4 → valor_lite-0.33.6}/benchmarks/benchmark_objdet.py +4 -4
  4. valor_lite-0.33.6/examples/object-detection.ipynb +1088 -0
  5. valor_lite-0.33.6/examples/tabular_classification.ipynb +618 -0
  6. valor_lite-0.33.6/tests/classification/conftest.py +378 -0
  7. valor_lite-0.33.6/tests/classification/test_accuracy.py +316 -0
  8. valor_lite-0.33.6/tests/classification/test_confusion_matrix.py +651 -0
  9. valor_lite-0.33.6/tests/classification/test_counts.py +870 -0
  10. valor_lite-0.33.6/tests/classification/test_dataloader.py +49 -0
  11. valor_lite-0.33.6/tests/classification/test_evaluator.py +25 -0
  12. valor_lite-0.33.6/tests/classification/test_f1.py +322 -0
  13. valor_lite-0.33.6/tests/classification/test_filtering.py +560 -0
  14. valor_lite-0.33.6/tests/classification/test_precision.py +321 -0
  15. valor_lite-0.33.6/tests/classification/test_recall.py +322 -0
  16. valor_lite-0.33.6/tests/classification/test_rocauc.py +348 -0
  17. valor_lite-0.33.6/tests/classification/test_schemas.py +21 -0
  18. valor_lite-0.33.6/tests/classification/test_stability.py +75 -0
  19. valor_lite-0.33.6/tests/detection/test_confusion_matrix.py +1706 -0
  20. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_evaluator.py +1 -1
  21. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_filtering.py +70 -35
  22. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_iou.py +2 -1
  23. valor_lite-0.33.6/valor_lite/classification/__init__.py +30 -0
  24. valor_lite-0.33.6/valor_lite/classification/annotation.py +13 -0
  25. valor_lite-0.33.6/valor_lite/classification/computation.py +411 -0
  26. valor_lite-0.33.6/valor_lite/classification/manager.py +842 -0
  27. valor_lite-0.33.6/valor_lite/classification/metric.py +191 -0
  28. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite/detection/__init__.py +11 -6
  29. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite/detection/computation.py +208 -152
  30. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite/detection/manager.py +354 -133
  31. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite/detection/metric.py +60 -34
  32. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite.egg-info/PKG-INFO +1 -1
  33. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite.egg-info/SOURCES.txt +22 -2
  34. valor_lite-0.33.4/examples/coco-yolo.ipynb +0 -442
  35. valor_lite-0.33.4/tests/detection/test_detailed_counts.py +0 -1807
  36. {valor_lite-0.33.4 → valor_lite-0.33.6}/LICENSE +0 -0
  37. {valor_lite-0.33.4 → valor_lite-0.33.6}/README.md +0 -0
  38. {valor_lite-0.33.4 → valor_lite-0.33.6}/benchmarks/.gitignore +0 -0
  39. {valor_lite-0.33.4 → valor_lite-0.33.6}/examples/.gitignore +0 -0
  40. {valor_lite-0.33.4 → valor_lite-0.33.6}/pyproject.toml +0 -0
  41. {valor_lite-0.33.4 → valor_lite-0.33.6}/setup.cfg +0 -0
  42. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/__init__.py +0 -0
  43. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/conftest.py +0 -0
  44. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_average_precision.py +0 -0
  45. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_average_recall.py +0 -0
  46. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_counts.py +0 -0
  47. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_dataloader.py +0 -0
  48. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_pr_curve.py +0 -0
  49. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_precision.py +0 -0
  50. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_recall.py +0 -0
  51. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_schemas.py +0 -0
  52. {valor_lite-0.33.4 → valor_lite-0.33.6}/tests/detection/test_stability.py +0 -0
  53. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite/__init__.py +0 -0
  54. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite/detection/annotation.py +0 -0
  55. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite/schemas.py +0 -0
  56. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite.egg-info/dependency_links.txt +0 -0
  57. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite.egg-info/requires.txt +0 -0
  58. {valor_lite-0.33.4 → valor_lite-0.33.6}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.4
3
+ Version: 0.33.6
4
4
  Summary: Compute valor metrics directly in your client.
5
5
  License: MIT License
6
6
 
@@ -0,0 +1,254 @@
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from time import time
7
+
8
+ import requests
9
+ from tqdm import tqdm
10
+ from valor_lite.classification import DataLoader, MetricType
11
+
12
+
13
+ def time_it(fn):
14
+ def wrapper(*args, **kwargs):
15
+ start = time()
16
+ results = fn(*args, **kwargs)
17
+ return (time() - start, results)
18
+
19
+ return wrapper
20
+
21
+
22
+ def download_data_if_not_exists(
23
+ file_name: str,
24
+ file_path: Path,
25
+ url: str,
26
+ ):
27
+ """Download the data from a public bucket if it doesn't exist locally."""
28
+
29
+ if not os.path.exists(file_path):
30
+ response = requests.get(url, stream=True)
31
+ if response.status_code == 200:
32
+ total_size = int(response.headers.get("content-length", 0))
33
+ with open(file_path, "wb") as f:
34
+ with tqdm(
35
+ total=total_size,
36
+ unit="B",
37
+ unit_scale=True,
38
+ unit_divisor=1024,
39
+ desc=file_name,
40
+ ) as pbar:
41
+ for chunk in response.iter_content(chunk_size=1024):
42
+ if chunk:
43
+ f.write(chunk)
44
+ pbar.update(1024)
45
+ else:
46
+ raise RuntimeError(response)
47
+ else:
48
+ print(f"{file_name} already exists locally.")
49
+
50
+
51
+ def write_results_to_file(write_path: Path, results: list[dict]):
52
+ """Write results to results.json"""
53
+ current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
54
+ if os.path.isfile(write_path):
55
+ with open(write_path, "r") as file:
56
+ file.seek(0)
57
+ data = json.load(file)
58
+ else:
59
+ data = {}
60
+
61
+ data[current_datetime] = results
62
+
63
+ with open(write_path, "w+") as file:
64
+ json.dump(data, file, indent=4)
65
+
66
+
67
+ @time_it
68
+ def ingest(
69
+ loader: DataLoader,
70
+ gt_path: Path,
71
+ pd_path: Path,
72
+ limit: int,
73
+ chunk_size: int,
74
+ ):
75
+ accumulated_time = 0.0
76
+ with open(gt_path, "r") as gf:
77
+ with open(pd_path, "r") as pf:
78
+ count = 0
79
+ groundtruths = []
80
+ predictions = []
81
+ for gline, pline in zip(gf, pf):
82
+
83
+ # groundtruth
84
+ gt_dict = json.loads(gline)
85
+ groundtruths.append(gt_dict)
86
+
87
+ # prediction
88
+ pd_dict = json.loads(pline)
89
+ predictions.append(pd_dict)
90
+
91
+ count += 1
92
+ if count >= limit and limit > 0:
93
+ break
94
+ elif len(groundtruths) < chunk_size or chunk_size == -1:
95
+ continue
96
+
97
+ timer, _ = time_it(loader.add_data_from_valor_dict)(
98
+ zip(groundtruths, predictions), True
99
+ )
100
+ accumulated_time += timer
101
+ groundtruths = []
102
+ predictions = []
103
+
104
+ if groundtruths:
105
+ timer, _ = time_it(loader.add_data_from_valor_dict)(
106
+ zip(groundtruths, predictions), True
107
+ )
108
+ accumulated_time += timer
109
+
110
+ return accumulated_time
111
+
112
+
113
+ @dataclass
114
+ class Benchmark:
115
+ limit: int
116
+ n_datums: int
117
+ n_groundtruths: int
118
+ n_predictions: int
119
+ n_labels: int
120
+ chunk_size: int
121
+ ingestion: float
122
+ preprocessing: float
123
+ precomputation: float
124
+ evaluation: float
125
+ detailed_evaluation: list[tuple[int, float]]
126
+
127
+ def result(self) -> dict:
128
+ return {
129
+ "limit": self.limit,
130
+ "n_datums": self.n_datums,
131
+ "n_groundtruths": self.n_groundtruths,
132
+ "n_predictions": self.n_predictions,
133
+ "n_labels": self.n_labels,
134
+ "chunk_size": self.chunk_size,
135
+ "ingestion": {
136
+ "loading_from_file": f"{round(self.ingestion - self.preprocessing, 2)} seconds",
137
+ "numpy_conversion": f"{round(self.preprocessing, 2)} seconds",
138
+ "finalization": f"{round(self.precomputation, 2)} seconds",
139
+ "total": f"{round(self.ingestion + self.precomputation, 2)} seconds",
140
+ },
141
+ "base_evaluation": f"{round(self.evaluation, 2)} seconds",
142
+ "detailed_evaluation": [
143
+ {
144
+ "n_points": 10,
145
+ "n_examples": curve[0],
146
+ "computation": f"{round(curve[1], 2)} seconds",
147
+ }
148
+ for curve in self.detailed_evaluation
149
+ ],
150
+ }
151
+
152
+
153
+ def run_benchmarking_analysis(
154
+ limits_to_test: list[int],
155
+ results_file: str = "clf_results.json",
156
+ chunk_size: int = -1,
157
+ ingestion_timeout=30,
158
+ evaluation_timeout=30,
159
+ ):
160
+ """Time various function calls and export the results."""
161
+ current_directory = Path(__file__).parent
162
+ write_path = current_directory / Path(results_file)
163
+
164
+ gt_filename = "gt_classification.jsonl"
165
+ pd_filename = "pd_classification.jsonl"
166
+
167
+ # cache data locally
168
+ for filename in [gt_filename, pd_filename]:
169
+ file_path = current_directory / Path(filename)
170
+ url = f"https://pub-fae71003f78140bdaedf32a7c8d331d2.r2.dev/{filename}"
171
+ download_data_if_not_exists(
172
+ file_name=filename, file_path=file_path, url=url
173
+ )
174
+
175
+ # iterate through datum limits
176
+ results = list()
177
+ for limit in limits_to_test:
178
+
179
+ # === Base Evaluation ===
180
+ loader = DataLoader()
181
+
182
+ # ingest + preprocess
183
+ (ingest_time, preprocessing_time,) = ingest(
184
+ loader=loader,
185
+ gt_path=current_directory / Path(gt_filename),
186
+ pd_path=current_directory / Path(pd_filename),
187
+ limit=limit,
188
+ chunk_size=chunk_size,
189
+ ) # type: ignore - time_it wrapper
190
+
191
+ finalization_time, evaluator = time_it(loader.finalize)()
192
+
193
+ if ingest_time > ingestion_timeout and ingestion_timeout != -1:
194
+ raise TimeoutError(
195
+ f"Base precomputation timed out with limit of {limit}."
196
+ )
197
+
198
+ # evaluate
199
+ eval_time, _ = time_it(evaluator.evaluate)()
200
+ if eval_time > evaluation_timeout and evaluation_timeout != -1:
201
+ raise TimeoutError(
202
+ f"Base evaluation timed out with {evaluator.n_datums} datums."
203
+ )
204
+
205
+ detail_no_examples_time, _ = time_it(evaluator.evaluate)(
206
+ metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
207
+ )
208
+ if (
209
+ detail_no_examples_time > evaluation_timeout
210
+ and evaluation_timeout != -1
211
+ ):
212
+ raise TimeoutError(
213
+ f"Base evaluation timed out with {evaluator.n_datums} datums."
214
+ )
215
+
216
+ detail_three_examples_time, _ = time_it(evaluator.evaluate)(
217
+ metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
218
+ number_of_examples=3,
219
+ )
220
+ if (
221
+ detail_three_examples_time > evaluation_timeout
222
+ and evaluation_timeout != -1
223
+ ):
224
+ raise TimeoutError(
225
+ f"Base evaluation timed out with {evaluator.n_datums} datums."
226
+ )
227
+
228
+ results.append(
229
+ Benchmark(
230
+ limit=limit,
231
+ n_datums=evaluator.n_datums,
232
+ n_groundtruths=evaluator.n_groundtruths,
233
+ n_predictions=evaluator.n_predictions,
234
+ n_labels=evaluator.n_labels,
235
+ chunk_size=chunk_size,
236
+ ingestion=ingest_time,
237
+ preprocessing=preprocessing_time,
238
+ precomputation=finalization_time,
239
+ evaluation=eval_time,
240
+ detailed_evaluation=[
241
+ (0, detail_no_examples_time),
242
+ (3, detail_three_examples_time),
243
+ ],
244
+ ).result()
245
+ )
246
+
247
+ write_results_to_file(write_path=write_path, results=results)
248
+
249
+
250
+ if __name__ == "__main__":
251
+
252
+ run_benchmarking_analysis(
253
+ limits_to_test=[5000, 5000, 5000],
254
+ )
@@ -67,7 +67,7 @@ def download_data_if_not_exists(
67
67
 
68
68
 
69
69
  def write_results_to_file(write_path: Path, results: list[dict]):
70
- """Write results to manager_results.json"""
70
+ """Write results to json"""
71
71
  current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
72
72
  if os.path.isfile(write_path):
73
73
  with open(write_path, "r") as file:
@@ -178,7 +178,7 @@ class Benchmark:
178
178
  def run_benchmarking_analysis(
179
179
  limits_to_test: list[int],
180
180
  combinations: list[tuple[AnnotationType, AnnotationType]] | None = None,
181
- results_file: str = "manager_results.json",
181
+ results_file: str = "objdet_results.json",
182
182
  chunk_size: int = -1,
183
183
  compute_pr: bool = True,
184
184
  compute_detailed: bool = True,
@@ -270,7 +270,7 @@ def run_benchmarking_analysis(
270
270
  evaluator.evaluate
271
271
  )(
272
272
  [
273
- MetricType.DetailedCounts,
273
+ MetricType.ConfusionMatrix,
274
274
  *MetricType.base_metrics(),
275
275
  ]
276
276
  )
@@ -287,7 +287,7 @@ def run_benchmarking_analysis(
287
287
  evaluator.evaluate
288
288
  )(
289
289
  [
290
- MetricType.DetailedCounts,
290
+ MetricType.ConfusionMatrix,
291
291
  *MetricType.base_metrics(),
292
292
  ],
293
293
  number_of_examples=3,