valor-lite 0.33.5__tar.gz → 0.33.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {valor_lite-0.33.5 → valor_lite-0.33.7}/PKG-INFO +1 -1
  2. valor_lite-0.33.7/benchmarks/benchmark_classification.py +254 -0
  3. {valor_lite-0.33.5 → valor_lite-0.33.7}/benchmarks/benchmark_objdet.py +2 -2
  4. valor_lite-0.33.5/examples/coco-yolo.ipynb → valor_lite-0.33.7/examples/object-detection.ipynb +1 -16
  5. valor_lite-0.33.7/examples/tabular_classification.ipynb +618 -0
  6. valor_lite-0.33.7/tests/classification/conftest.py +378 -0
  7. valor_lite-0.33.7/tests/classification/test_accuracy.py +316 -0
  8. valor_lite-0.33.7/tests/classification/test_confusion_matrix.py +651 -0
  9. valor_lite-0.33.7/tests/classification/test_counts.py +870 -0
  10. valor_lite-0.33.7/tests/classification/test_dataloader.py +49 -0
  11. valor_lite-0.33.7/tests/classification/test_evaluator.py +25 -0
  12. valor_lite-0.33.7/tests/classification/test_f1.py +322 -0
  13. valor_lite-0.33.7/tests/classification/test_filtering.py +560 -0
  14. valor_lite-0.33.7/tests/classification/test_precision.py +321 -0
  15. valor_lite-0.33.7/tests/classification/test_recall.py +322 -0
  16. valor_lite-0.33.7/tests/classification/test_rocauc.py +348 -0
  17. valor_lite-0.33.7/tests/classification/test_schemas.py +21 -0
  18. valor_lite-0.33.7/tests/classification/test_stability.py +75 -0
  19. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_average_precision.py +48 -32
  20. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_average_recall.py +17 -20
  21. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_confusion_matrix.py +15 -8
  22. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_counts.py +40 -15
  23. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_pr_curve.py +2 -3
  24. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_precision.py +36 -13
  25. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_recall.py +36 -13
  26. valor_lite-0.33.7/valor_lite/classification/__init__.py +30 -0
  27. valor_lite-0.33.7/valor_lite/classification/annotation.py +13 -0
  28. valor_lite-0.33.7/valor_lite/classification/computation.py +411 -0
  29. valor_lite-0.33.7/valor_lite/classification/manager.py +844 -0
  30. valor_lite-0.33.7/valor_lite/classification/metric.py +191 -0
  31. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite/detection/manager.py +19 -8
  32. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite.egg-info/PKG-INFO +1 -1
  33. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite.egg-info/SOURCES.txt +21 -1
  34. {valor_lite-0.33.5 → valor_lite-0.33.7}/LICENSE +0 -0
  35. {valor_lite-0.33.5 → valor_lite-0.33.7}/README.md +0 -0
  36. {valor_lite-0.33.5 → valor_lite-0.33.7}/benchmarks/.gitignore +0 -0
  37. {valor_lite-0.33.5 → valor_lite-0.33.7}/examples/.gitignore +0 -0
  38. {valor_lite-0.33.5 → valor_lite-0.33.7}/pyproject.toml +0 -0
  39. {valor_lite-0.33.5 → valor_lite-0.33.7}/setup.cfg +0 -0
  40. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/__init__.py +0 -0
  41. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/conftest.py +0 -0
  42. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_dataloader.py +0 -0
  43. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_evaluator.py +0 -0
  44. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_filtering.py +0 -0
  45. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_iou.py +0 -0
  46. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_schemas.py +0 -0
  47. {valor_lite-0.33.5 → valor_lite-0.33.7}/tests/detection/test_stability.py +0 -0
  48. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite/__init__.py +0 -0
  49. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite/detection/__init__.py +0 -0
  50. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite/detection/annotation.py +0 -0
  51. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite/detection/computation.py +0 -0
  52. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite/detection/metric.py +0 -0
  53. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite/schemas.py +0 -0
  54. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite.egg-info/dependency_links.txt +0 -0
  55. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite.egg-info/requires.txt +0 -0
  56. {valor_lite-0.33.5 → valor_lite-0.33.7}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.5
3
+ Version: 0.33.7
4
4
  Summary: Compute valor metrics directly in your client.
5
5
  License: MIT License
6
6
 
@@ -0,0 +1,254 @@
1
+ import json
2
+ import os
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from time import time
7
+
8
+ import requests
9
+ from tqdm import tqdm
10
+ from valor_lite.classification import DataLoader, MetricType
11
+
12
+
13
+ def time_it(fn):
14
+ def wrapper(*args, **kwargs):
15
+ start = time()
16
+ results = fn(*args, **kwargs)
17
+ return (time() - start, results)
18
+
19
+ return wrapper
20
+
21
+
22
+ def download_data_if_not_exists(
23
+ file_name: str,
24
+ file_path: Path,
25
+ url: str,
26
+ ):
27
+ """Download the data from a public bucket if it doesn't exist locally."""
28
+
29
+ if not os.path.exists(file_path):
30
+ response = requests.get(url, stream=True)
31
+ if response.status_code == 200:
32
+ total_size = int(response.headers.get("content-length", 0))
33
+ with open(file_path, "wb") as f:
34
+ with tqdm(
35
+ total=total_size,
36
+ unit="B",
37
+ unit_scale=True,
38
+ unit_divisor=1024,
39
+ desc=file_name,
40
+ ) as pbar:
41
+ for chunk in response.iter_content(chunk_size=1024):
42
+ if chunk:
43
+ f.write(chunk)
44
+ pbar.update(1024)
45
+ else:
46
+ raise RuntimeError(response)
47
+ else:
48
+ print(f"{file_name} already exists locally.")
49
+
50
+
51
+ def write_results_to_file(write_path: Path, results: list[dict]):
52
+ """Write results to results.json"""
53
+ current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
54
+ if os.path.isfile(write_path):
55
+ with open(write_path, "r") as file:
56
+ file.seek(0)
57
+ data = json.load(file)
58
+ else:
59
+ data = {}
60
+
61
+ data[current_datetime] = results
62
+
63
+ with open(write_path, "w+") as file:
64
+ json.dump(data, file, indent=4)
65
+
66
+
67
+ @time_it
68
+ def ingest(
69
+ loader: DataLoader,
70
+ gt_path: Path,
71
+ pd_path: Path,
72
+ limit: int,
73
+ chunk_size: int,
74
+ ):
75
+ accumulated_time = 0.0
76
+ with open(gt_path, "r") as gf:
77
+ with open(pd_path, "r") as pf:
78
+ count = 0
79
+ groundtruths = []
80
+ predictions = []
81
+ for gline, pline in zip(gf, pf):
82
+
83
+ # groundtruth
84
+ gt_dict = json.loads(gline)
85
+ groundtruths.append(gt_dict)
86
+
87
+ # prediction
88
+ pd_dict = json.loads(pline)
89
+ predictions.append(pd_dict)
90
+
91
+ count += 1
92
+ if count >= limit and limit > 0:
93
+ break
94
+ elif len(groundtruths) < chunk_size or chunk_size == -1:
95
+ continue
96
+
97
+ timer, _ = time_it(loader.add_data_from_valor_dict)(
98
+ zip(groundtruths, predictions), True
99
+ )
100
+ accumulated_time += timer
101
+ groundtruths = []
102
+ predictions = []
103
+
104
+ if groundtruths:
105
+ timer, _ = time_it(loader.add_data_from_valor_dict)(
106
+ zip(groundtruths, predictions), True
107
+ )
108
+ accumulated_time += timer
109
+
110
+ return accumulated_time
111
+
112
+
113
+ @dataclass
114
+ class Benchmark:
115
+ limit: int
116
+ n_datums: int
117
+ n_groundtruths: int
118
+ n_predictions: int
119
+ n_labels: int
120
+ chunk_size: int
121
+ ingestion: float
122
+ preprocessing: float
123
+ precomputation: float
124
+ evaluation: float
125
+ detailed_evaluation: list[tuple[int, float]]
126
+
127
+ def result(self) -> dict:
128
+ return {
129
+ "limit": self.limit,
130
+ "n_datums": self.n_datums,
131
+ "n_groundtruths": self.n_groundtruths,
132
+ "n_predictions": self.n_predictions,
133
+ "n_labels": self.n_labels,
134
+ "chunk_size": self.chunk_size,
135
+ "ingestion": {
136
+ "loading_from_file": f"{round(self.ingestion - self.preprocessing, 2)} seconds",
137
+ "numpy_conversion": f"{round(self.preprocessing, 2)} seconds",
138
+ "finalization": f"{round(self.precomputation, 2)} seconds",
139
+ "total": f"{round(self.ingestion + self.precomputation, 2)} seconds",
140
+ },
141
+ "base_evaluation": f"{round(self.evaluation, 2)} seconds",
142
+ "detailed_evaluation": [
143
+ {
144
+ "n_points": 10,
145
+ "n_examples": curve[0],
146
+ "computation": f"{round(curve[1], 2)} seconds",
147
+ }
148
+ for curve in self.detailed_evaluation
149
+ ],
150
+ }
151
+
152
+
153
+ def run_benchmarking_analysis(
154
+ limits_to_test: list[int],
155
+ results_file: str = "clf_results.json",
156
+ chunk_size: int = -1,
157
+ ingestion_timeout=30,
158
+ evaluation_timeout=30,
159
+ ):
160
+ """Time various function calls and export the results."""
161
+ current_directory = Path(__file__).parent
162
+ write_path = current_directory / Path(results_file)
163
+
164
+ gt_filename = "gt_classification.jsonl"
165
+ pd_filename = "pd_classification.jsonl"
166
+
167
+ # cache data locally
168
+ for filename in [gt_filename, pd_filename]:
169
+ file_path = current_directory / Path(filename)
170
+ url = f"https://pub-fae71003f78140bdaedf32a7c8d331d2.r2.dev/{filename}"
171
+ download_data_if_not_exists(
172
+ file_name=filename, file_path=file_path, url=url
173
+ )
174
+
175
+ # iterate through datum limits
176
+ results = list()
177
+ for limit in limits_to_test:
178
+
179
+ # === Base Evaluation ===
180
+ loader = DataLoader()
181
+
182
+ # ingest + preprocess
183
+ (ingest_time, preprocessing_time,) = ingest(
184
+ loader=loader,
185
+ gt_path=current_directory / Path(gt_filename),
186
+ pd_path=current_directory / Path(pd_filename),
187
+ limit=limit,
188
+ chunk_size=chunk_size,
189
+ ) # type: ignore - time_it wrapper
190
+
191
+ finalization_time, evaluator = time_it(loader.finalize)()
192
+
193
+ if ingest_time > ingestion_timeout and ingestion_timeout != -1:
194
+ raise TimeoutError(
195
+ f"Base precomputation timed out with limit of {limit}."
196
+ )
197
+
198
+ # evaluate
199
+ eval_time, _ = time_it(evaluator.evaluate)()
200
+ if eval_time > evaluation_timeout and evaluation_timeout != -1:
201
+ raise TimeoutError(
202
+ f"Base evaluation timed out with {evaluator.n_datums} datums."
203
+ )
204
+
205
+ detail_no_examples_time, _ = time_it(evaluator.evaluate)(
206
+ metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
207
+ )
208
+ if (
209
+ detail_no_examples_time > evaluation_timeout
210
+ and evaluation_timeout != -1
211
+ ):
212
+ raise TimeoutError(
213
+ f"Base evaluation timed out with {evaluator.n_datums} datums."
214
+ )
215
+
216
+ detail_three_examples_time, _ = time_it(evaluator.evaluate)(
217
+ metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
218
+ number_of_examples=3,
219
+ )
220
+ if (
221
+ detail_three_examples_time > evaluation_timeout
222
+ and evaluation_timeout != -1
223
+ ):
224
+ raise TimeoutError(
225
+ f"Base evaluation timed out with {evaluator.n_datums} datums."
226
+ )
227
+
228
+ results.append(
229
+ Benchmark(
230
+ limit=limit,
231
+ n_datums=evaluator.n_datums,
232
+ n_groundtruths=evaluator.n_groundtruths,
233
+ n_predictions=evaluator.n_predictions,
234
+ n_labels=evaluator.n_labels,
235
+ chunk_size=chunk_size,
236
+ ingestion=ingest_time,
237
+ preprocessing=preprocessing_time,
238
+ precomputation=finalization_time,
239
+ evaluation=eval_time,
240
+ detailed_evaluation=[
241
+ (0, detail_no_examples_time),
242
+ (3, detail_three_examples_time),
243
+ ],
244
+ ).result()
245
+ )
246
+
247
+ write_results_to_file(write_path=write_path, results=results)
248
+
249
+
250
+ if __name__ == "__main__":
251
+
252
+ run_benchmarking_analysis(
253
+ limits_to_test=[5000, 5000, 5000],
254
+ )
@@ -67,7 +67,7 @@ def download_data_if_not_exists(
67
67
 
68
68
 
69
69
  def write_results_to_file(write_path: Path, results: list[dict]):
70
- """Write results to manager_results.json"""
70
+ """Write results to json"""
71
71
  current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
72
72
  if os.path.isfile(write_path):
73
73
  with open(write_path, "r") as file:
@@ -178,7 +178,7 @@ class Benchmark:
178
178
  def run_benchmarking_analysis(
179
179
  limits_to_test: list[int],
180
180
  combinations: list[tuple[AnnotationType, AnnotationType]] | None = None,
181
- results_file: str = "manager_results.json",
181
+ results_file: str = "objdet_results.json",
182
182
  chunk_size: int = -1,
183
183
  compute_pr: bool = True,
184
184
  compute_detailed: bool = True,
@@ -12,9 +12,7 @@
12
12
  "\n",
13
13
  "In this notebook, we'll walk through a detailed example of how you can use Valor to evaluate object detections made on [the COCO Panoptic dataset](https://cocodataset.org/#home). We'll use Ultralytics' `YOLOv8` model to predict what objects exist in various COCO photographs and compare performance between bounding box and image segmentation results.\n",
14
14
  "\n",
15
- "For a conceptual introduction to Valor, [check out our project overview](https://striveworks.github.io/valor/). For a higher-level example notebook, [check out our \"Getting Started\" notebook](https://github.com/Striveworks/valor/blob/main/examples/getting_started.ipynb).\n",
16
- "\n",
17
- "Before using this notebook, please ensure that the Valor service is running on your machine (for start-up instructions, [click here](https://striveworks.github.io/valor/getting_started/)). To connect to a non-local instance of Valor, update `client = Client(\"http://0.0.0.0:8000\")` in the first code block to point to the correct URL."
15
+ "For a conceptual introduction to Valor, [check out our project overview](https://striveworks.github.io/valor/). For a higher-level example notebook, [check out our \"Getting Started\" notebook](https://github.com/Striveworks/valor/blob/main/examples/getting_started.ipynb)."
18
16
  ]
19
17
  },
20
18
  {
@@ -49,19 +47,6 @@
49
47
  "from valor_lite.detection import DataLoader, MetricType"
50
48
  ]
51
49
  },
52
- {
53
- "attachments": {},
54
- "cell_type": "markdown",
55
- "id": "a28f5e66",
56
- "metadata": {},
57
- "source": [
58
- "The modules included in `./integrations` are helper modules that demonstrate how to ingest datasets and model inferences into Valor. The depth of each integration varies depending on the use case. \n",
59
- "\n",
60
- "The `coco_integration` is designed to download, extract, and upload all in one command as you are starting off with all the the data. \n",
61
- "\n",
62
- "The `yolo_integration` is much simpler; it is a collection of parser functions that convert YOLO model results into Valor types."
63
- ]
64
- },
65
50
  {
66
51
  "cell_type": "code",
67
52
  "execution_count": 2,