valor-lite 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. valor_lite/cache/__init__.py +11 -0
  2. valor_lite/cache/compute.py +211 -0
  3. valor_lite/cache/ephemeral.py +302 -0
  4. valor_lite/cache/persistent.py +536 -0
  5. valor_lite/classification/__init__.py +5 -10
  6. valor_lite/classification/annotation.py +4 -0
  7. valor_lite/classification/computation.py +233 -251
  8. valor_lite/classification/evaluator.py +882 -0
  9. valor_lite/classification/loader.py +97 -0
  10. valor_lite/classification/metric.py +141 -4
  11. valor_lite/classification/shared.py +184 -0
  12. valor_lite/classification/utilities.py +221 -118
  13. valor_lite/exceptions.py +5 -0
  14. valor_lite/object_detection/__init__.py +5 -4
  15. valor_lite/object_detection/annotation.py +13 -1
  16. valor_lite/object_detection/computation.py +367 -304
  17. valor_lite/object_detection/evaluator.py +804 -0
  18. valor_lite/object_detection/loader.py +292 -0
  19. valor_lite/object_detection/metric.py +152 -3
  20. valor_lite/object_detection/shared.py +206 -0
  21. valor_lite/object_detection/utilities.py +182 -109
  22. valor_lite/semantic_segmentation/__init__.py +5 -4
  23. valor_lite/semantic_segmentation/annotation.py +7 -0
  24. valor_lite/semantic_segmentation/computation.py +20 -110
  25. valor_lite/semantic_segmentation/evaluator.py +414 -0
  26. valor_lite/semantic_segmentation/loader.py +205 -0
  27. valor_lite/semantic_segmentation/shared.py +149 -0
  28. valor_lite/semantic_segmentation/utilities.py +6 -23
  29. {valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
  30. valor_lite-0.37.5.dist-info/RECORD +49 -0
  31. {valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
  32. valor_lite/classification/manager.py +0 -545
  33. valor_lite/object_detection/manager.py +0 -865
  34. valor_lite/profiling.py +0 -374
  35. valor_lite/semantic_segmentation/benchmark.py +0 -237
  36. valor_lite/semantic_segmentation/manager.py +0 -446
  37. valor_lite-0.36.5.dist-info/RECORD +0 -41
  38. {valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0
valor_lite/profiling.py DELETED
@@ -1,374 +0,0 @@
1
- import json
2
- import math
3
- import multiprocessing as mp
4
- import resource
5
- import time
6
- from collections import deque
7
- from multiprocessing import Queue
8
- from typing import Any
9
-
10
- from tqdm import tqdm
11
-
12
-
13
- class BenchmarkError(Exception):
14
- def __init__(
15
- self, benchmark: str, error_type: str, error_message: str
16
- ) -> None:
17
- super().__init__(
18
- f"'{benchmark}' raised '{error_type}' with the following message: {error_message}"
19
- )
20
-
21
-
22
- def _timeit_subprocess(*args, __fn, __queue: Queue, **kwargs):
23
- """
24
- Multiprocessing subprocess that reports either runtime or errors.
25
-
26
- This is handled within a subprocess to protect the benchmark against OOM errors.
27
- """
28
- try:
29
- timer_start = time.perf_counter()
30
- __fn(*args, **kwargs)
31
- timer_end = time.perf_counter()
32
- __queue.put(timer_end - timer_start)
33
- except Exception as e:
34
- __queue.put(e)
35
-
36
-
37
- def create_runtime_profiler(
38
- time_limit: float | None,
39
- repeat: int = 1,
40
- ):
41
- """
42
- Creates a runtime profiler as a decorating function.
43
-
44
- The profiler reports runtime of the wrapped function from a subprocess to protect against OOM errors.
45
-
46
- Parameters
47
- ----------
48
- time_limit : float, optional
49
- An optional time limit to constrain the benchmark.
50
- repeat : int, default=1
51
- The number of times to repeat the benchmark to produce an average runtime.
52
- """
53
- ctx = mp.get_context("spawn")
54
-
55
- def decorator(fn):
56
- def wrapper(*args, **kwargs):
57
- # Record average runtime over repeated runs.
58
- elapsed = 0
59
- for _ in range(repeat):
60
- q = ctx.Queue()
61
- p = ctx.Process(
62
- target=_timeit_subprocess,
63
- args=args,
64
- kwargs={"__fn": fn, "__queue": q, **kwargs},
65
- )
66
- p.start()
67
- p.join(timeout=time_limit)
68
-
69
- # Check if computation finishes within the timeout
70
- if p.is_alive():
71
- p.terminate()
72
- p.join()
73
- q.close()
74
- q.join_thread()
75
- raise TimeoutError(
76
- f"Function '{fn.__name__}' did not complete within {time_limit} seconds."
77
- )
78
-
79
- # Retrieve the result
80
- result = q.get(timeout=1)
81
- if isinstance(result, Exception):
82
- raise result
83
- elif isinstance(result, float):
84
- elapsed += result
85
- else:
86
- raise TypeError(type(result).__name__)
87
-
88
- return elapsed / repeat
89
-
90
- return wrapper
91
-
92
- return decorator
93
-
94
-
95
- def pretty_print_results(results: tuple):
96
- valid, invalid, permutations = results
97
-
98
- print(
99
- "====================================================================="
100
- )
101
- print("Details")
102
- print(json.dumps(permutations, indent=4))
103
-
104
- if len(valid) > 0:
105
- print()
106
- print("Passed")
107
- keys = ["complexity", "runtime", *valid[0]["details"].keys()]
108
- header = " | ".join(f"{header:^15}" for header in keys)
109
- print(header)
110
- print("-" * len(header))
111
- for entry in valid:
112
- values = [
113
- entry["complexity"],
114
- round(entry["runtime"], 4),
115
- *entry["details"].values(),
116
- ]
117
- row = " | ".join(f"{str(value):^15}" for value in values)
118
- print(row)
119
-
120
- if len(invalid) > 0:
121
- print()
122
- print("Failed")
123
- keys = ["complexity", "error", *invalid[0]["details"].keys(), "msg"]
124
- header = " | ".join(f"{header:^15}" for header in keys)
125
- print(header)
126
- print("-" * len(header))
127
- for entry in invalid:
128
- values = [
129
- entry["complexity"],
130
- entry["error"],
131
- *entry["details"].values(),
132
- entry["msg"],
133
- ]
134
- row = " | ".join(f"{str(value):^15}" for value in values)
135
- print(row)
136
-
137
-
138
- def _calculate_complexity(params: list[int | tuple[int]]) -> int:
139
- """
140
- Basic metric of benchmark complexity.
141
- """
142
- flattened_params = [
143
- math.prod(p) if isinstance(p, tuple) else p for p in params
144
- ]
145
- return math.prod(flattened_params)
146
-
147
-
148
- class Benchmark:
149
- def __init__(
150
- self,
151
- time_limit: float | None,
152
- memory_limit: int | None,
153
- *_,
154
- repeat: int | None = 1,
155
- verbose: bool = False,
156
- ):
157
- self.time_limit = time_limit
158
- self.memory_limit = memory_limit
159
- self.repeat = repeat
160
- self.verbose = verbose
161
-
162
- def get_limits(
163
- self,
164
- *_,
165
- readable: bool = True,
166
- memory_unit: str = "GB",
167
- time_unit: str = "seconds",
168
- ) -> dict[str, str | int | float | None]:
169
- """
170
- Returns a dictionary of benchmark limits.
171
-
172
- Parameters
173
- ----------
174
- readable : bool, default=True
175
- Toggles whether the output should be human readable.
176
- memory_unit : str, default="GB"
177
- Toggles what unit to display the memory limit with when 'readable=True'.
178
- time_unit : str, default="seconds"
179
- Toggles what unit to display the time limit with when 'readable=True'.
180
-
181
- Returns
182
- -------
183
- dict[str, str | int | float | None]
184
- The benchmark limits.
185
- """
186
-
187
- memory_value = self.memory_limit
188
- if readable and memory_value is not None:
189
- match memory_unit:
190
- case "TB":
191
- memory_value /= 1024**4
192
- case "GB":
193
- memory_value /= 1024**3
194
- case "MB":
195
- memory_value /= 1024**2
196
- case "KB":
197
- memory_value /= 1024
198
- case "B":
199
- pass
200
- case _:
201
- valid_set = {"TB", "GB", "MB", "KB", "B"}
202
- raise ValueError(
203
- f"Expected memory unit to be in the set {valid_set}, received '{memory_unit}'."
204
- )
205
- memory_value = f"{memory_value} {memory_unit}"
206
-
207
- time_value = self.time_limit
208
- if readable and time_value is not None:
209
- match time_unit:
210
- case "minutes":
211
- time_value /= 60
212
- case "seconds":
213
- pass
214
- case "milliseconds":
215
- time_value *= 1000
216
- case _:
217
- valid_set = {"minutes", "seconds", "milliseconds"}
218
- raise ValueError(
219
- f"Expected time unit to be in the set {valid_set}, received '{time_unit}'."
220
- )
221
- time_value = f"{time_value} {time_unit}"
222
-
223
- return {
224
- "memory_limit": memory_value,
225
- "time_limit": time_value,
226
- "repeat": self.repeat,
227
- }
228
-
229
- @property
230
- def memory_limit(self) -> int | None:
231
- """
232
- The memory limit in bytes (B).
233
- """
234
- return self._memory_limit
235
-
236
- @memory_limit.setter
237
- def memory_limit(self, limit: int | None):
238
- """
239
- Stores the memory limit and restricts resources.
240
- """
241
- self._memory_limit = limit
242
- if limit is not None:
243
- _, hard = resource.getrlimit(resource.RLIMIT_AS)
244
- resource.setrlimit(resource.RLIMIT_AS, (limit, hard))
245
-
246
- def run(
247
- self,
248
- benchmark,
249
- **kwargs: list[Any],
250
- ):
251
- """
252
- Runs a benchmark with ranges of parameters.
253
-
254
- Parameters
255
- ----------
256
- benchmark : Callable
257
- The benchmark function.
258
- **kwargs : list[Any]
259
- Keyword arguments passing lists of parameters to benchmark. The values should be sorted in
260
- decreasing complexity. For example, if the number of labels is a parameter then a higher
261
- number of unique labels would be considered "more" complex.
262
-
263
- Example
264
- -------
265
- >>> b = Benchmark(
266
- ... time_limit=10.0,
267
- ... memory_limit=8 * (1024**3),
268
- ... repeat=1,
269
- ... verbose=False,
270
- ... )
271
- >>> results = b.run(
272
- ... benchmark=semseg_add_data,
273
- ... n_labels=[
274
- ... 100,
275
- ... 10,
276
- ... ],
277
- ... shape=[
278
- ... (1000, 1000),
279
- ... (100, 100),
280
- ... ],
281
- ... )
282
- """
283
-
284
- nvars = len(kwargs)
285
- keys = tuple(kwargs.keys())
286
- vars = tuple(kwargs[key] for key in keys)
287
-
288
- initial_indices = tuple(0 for _ in range(nvars))
289
- max_indices = tuple(len(v) for v in vars)
290
- permutations = math.prod(max_indices)
291
-
292
- # Initialize queue with the starting index (0, ...)
293
- queue = deque()
294
- queue.append(initial_indices)
295
-
296
- # Keep track of explored combinations to avoid duplicates
297
- explored = set()
298
- explored.add(initial_indices)
299
-
300
- # Store valid combinations that finish within the time limit
301
- valid_combinations = []
302
- invalid_combinations = []
303
-
304
- pbar = tqdm(total=math.prod(max_indices), disable=(not self.verbose))
305
- prev_count = 0
306
- while queue:
307
-
308
- current_indices = queue.popleft()
309
- parameters = {
310
- k: v[current_indices[idx]]
311
- for idx, (k, v) in enumerate(zip(keys, vars))
312
- }
313
- complexity = _calculate_complexity(list(parameters.values()))
314
-
315
- details: dict = {k: str(v) for k, v in parameters.items()}
316
-
317
- # update terminal with status
318
- count = len(valid_combinations) + len(invalid_combinations)
319
- pbar.update(count - prev_count)
320
- prev_count = count
321
-
322
- try:
323
- runtime = benchmark(
324
- time_limit=self.time_limit,
325
- repeat=self.repeat,
326
- **parameters,
327
- )
328
- valid_combinations.append(
329
- {
330
- "complexity": complexity,
331
- "runtime": runtime,
332
- "details": details,
333
- }
334
- )
335
- continue
336
- except Exception as e:
337
- invalid_combinations.append(
338
- {
339
- "complexity": complexity,
340
- "error": type(e).__name__,
341
- "msg": str(e),
342
- "details": details,
343
- }
344
- )
345
-
346
- for idx in range(nvars):
347
- new_indices = list(current_indices)
348
- if new_indices[idx] + 1 < max_indices[idx]:
349
- new_indices[idx] += 1
350
- new_indices_tuple = tuple(new_indices)
351
- if new_indices_tuple not in explored:
352
- queue.append(new_indices_tuple)
353
- explored.add(new_indices_tuple)
354
-
355
- valid_combinations.sort(key=lambda x: -x["complexity"])
356
- invalid_combinations.sort(key=lambda x: -x["complexity"])
357
-
358
- # clear terminal and display results
359
- results = (
360
- valid_combinations,
361
- invalid_combinations,
362
- {
363
- "benchmark": benchmark.__name__,
364
- "limits": self.get_limits(readable=True),
365
- "passed": permutations - len(invalid_combinations),
366
- "failed": len(invalid_combinations),
367
- "total": permutations,
368
- },
369
- )
370
- pbar.close()
371
- if self.verbose:
372
- pretty_print_results(results)
373
-
374
- return results
@@ -1,237 +0,0 @@
1
- import numpy as np
2
-
3
- from valor_lite.profiling import create_runtime_profiler
4
- from valor_lite.semantic_segmentation import Bitmask, DataLoader, Segmentation
5
-
6
-
7
- def generate_segmentation(
8
- datum_uid: str,
9
- number_of_unique_labels: int,
10
- mask_height: int,
11
- mask_width: int,
12
- ) -> Segmentation:
13
- """
14
- Generates a semantic segmentation annotation.
15
-
16
- Parameters
17
- ----------
18
- datum_uid : str
19
- The datum UID for the generated segmentation.
20
- number_of_unique_labels : int
21
- The number of unique labels.
22
- mask_height : int
23
- The height of the mask in pixels.
24
- mask_width : int
25
- The width of the mask in pixels.
26
-
27
- Returns
28
- -------
29
- Segmentation
30
- A generated semantic segmenatation annotation.
31
- """
32
-
33
- if number_of_unique_labels > 1:
34
- common_proba = 0.4 / (number_of_unique_labels - 1)
35
- min_proba = min(common_proba, 0.1)
36
- labels = [str(i) for i in range(number_of_unique_labels)] + [None]
37
- proba = (
38
- [0.5]
39
- + [common_proba for _ in range(number_of_unique_labels - 1)]
40
- + [0.1]
41
- )
42
- elif number_of_unique_labels == 1:
43
- labels = ["0", None]
44
- proba = [0.9, 0.1]
45
- min_proba = 0.1
46
- else:
47
- raise ValueError(
48
- "The number of unique labels should be greater than zero."
49
- )
50
-
51
- probabilities = np.array(proba, dtype=np.float64)
52
- weights = (probabilities / min_proba).astype(np.int32)
53
-
54
- indices = np.random.choice(
55
- np.arange(len(weights)),
56
- size=(mask_height * 2, mask_width),
57
- p=probabilities,
58
- )
59
-
60
- N = len(labels)
61
-
62
- masks = np.arange(N)[:, None, None] == indices
63
-
64
- gts = []
65
- pds = []
66
- for lidx in range(N):
67
- label = labels[lidx]
68
- if label is None:
69
- continue
70
- gts.append(
71
- Bitmask(
72
- mask=masks[lidx, :mask_height, :],
73
- label=label,
74
- )
75
- )
76
- pds.append(
77
- Bitmask(
78
- mask=masks[lidx, mask_height:, :],
79
- label=label,
80
- )
81
- )
82
-
83
- return Segmentation(
84
- uid=datum_uid,
85
- groundtruths=gts,
86
- predictions=pds,
87
- shape=(mask_height, mask_width),
88
- )
89
-
90
-
91
- def benchmark_add_data(
92
- n_labels: int,
93
- shape: tuple[int, int],
94
- time_limit: float | None,
95
- repeat: int = 1,
96
- ) -> float:
97
- """
98
- Benchmarks 'Dataloader.add_data' for semantic segmentation.
99
-
100
- Parameters
101
- ----------
102
- n_labels : int
103
- The number of unique labels to generate.
104
- shape : tuple[int, int]
105
- The size (h,w) of the mask to generate.
106
- time_limit : float, optional
107
- An optional time limit to constrain the benchmark.
108
- repeat : int
109
- The number of times to run the benchmark to produce a runtime average.
110
-
111
- Returns
112
- -------
113
- float
114
- The average runtime.
115
- """
116
-
117
- profile = create_runtime_profiler(
118
- time_limit=time_limit,
119
- repeat=repeat,
120
- )
121
-
122
- elapsed = 0
123
- for _ in range(repeat):
124
- data = generate_segmentation(
125
- datum_uid="uid",
126
- number_of_unique_labels=n_labels,
127
- mask_height=shape[0],
128
- mask_width=shape[1],
129
- )
130
- loader = DataLoader()
131
- elapsed += profile(loader.add_data)([data])
132
- return elapsed / repeat
133
-
134
-
135
- def benchmark_finalize(
136
- n_datums: int,
137
- n_labels: int,
138
- time_limit: float | None,
139
- repeat: int = 1,
140
- ):
141
- """
142
- Benchmarks 'Dataloader.finalize' for semantic segmentation.
143
-
144
- Parameters
145
- ----------
146
- n_datums : int
147
- The number of datums to generate.
148
- n_labels : int
149
- The number of unique labels to generate.
150
- time_limit : float, optional
151
- An optional time limit to constrain the benchmark.
152
- repeat : int
153
- The number of times to run the benchmark to produce a runtime average.
154
-
155
- Returns
156
- -------
157
- float
158
- The average runtime.
159
- """
160
-
161
- profile = create_runtime_profiler(
162
- time_limit=time_limit,
163
- repeat=repeat,
164
- )
165
-
166
- elapsed = 0
167
- for _ in range(repeat):
168
-
169
- data = [
170
- generate_segmentation(
171
- datum_uid=str(i),
172
- number_of_unique_labels=n_labels,
173
- mask_height=5,
174
- mask_width=5,
175
- )
176
- for i in range(10)
177
- ]
178
- loader = DataLoader()
179
- for datum_idx in range(n_datums):
180
- segmentation = data[datum_idx % 10]
181
- segmentation.uid = str(datum_idx)
182
- loader.add_data([segmentation])
183
- elapsed += profile(loader.finalize)()
184
- return elapsed / repeat
185
-
186
-
187
- def benchmark_evaluate(
188
- n_datums: int,
189
- n_labels: int,
190
- time_limit: float | None,
191
- repeat: int = 1,
192
- ):
193
- """
194
- Benchmarks 'Evaluator.evaluate' for semantic segmentation.
195
-
196
- Parameters
197
- ----------
198
- n_datums : int
199
- The number of datums to generate.
200
- n_labels : int
201
- The number of unique labels to generate.
202
- time_limit : float, optional
203
- An optional time limit to constrain the benchmark.
204
- repeat : int
205
- The number of times to run the benchmark to produce a runtime average.
206
-
207
- Returns
208
- -------
209
- float
210
- The average runtime.
211
- """
212
-
213
- profile = create_runtime_profiler(
214
- time_limit=time_limit,
215
- repeat=repeat,
216
- )
217
-
218
- elapsed = 0
219
- for _ in range(repeat):
220
-
221
- data = [
222
- generate_segmentation(
223
- datum_uid=str(i),
224
- number_of_unique_labels=n_labels,
225
- mask_height=5,
226
- mask_width=5,
227
- )
228
- for i in range(10)
229
- ]
230
- loader = DataLoader()
231
- for datum_idx in range(n_datums):
232
- segmentation = data[datum_idx % 10]
233
- segmentation.uid = str(datum_idx)
234
- loader.add_data([segmentation])
235
- evaluator = loader.finalize()
236
- elapsed += profile(evaluator.evaluate)()
237
- return elapsed / repeat