valor-lite 0.33.17__tar.gz → 0.33.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {valor_lite-0.33.17/valor_lite.egg-info → valor_lite-0.33.19}/PKG-INFO +1 -1
- valor_lite-0.33.19/benchmarks/synthetic/benchmark_semantic_segmentation.py +94 -0
- valor_lite-0.33.19/examples/benchmarking.ipynb +279 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/object-detection.ipynb +4 -4
- {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/tabular_classification.ipynb +1 -1
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_confusion_matrix.py +21 -21
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_dataloader.py +1 -1
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/conftest.py +2 -2
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_accuracy.py +3 -3
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_average_precision.py +2 -2
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_average_recall.py +2 -2
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_confusion_matrix.py +145 -135
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_counts.py +2 -2
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_f1.py +3 -3
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_filtering.py +6 -6
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_precision.py +3 -3
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_recall.py +2 -2
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_annotation.py +57 -1
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_confusion_matrix.py +4 -4
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/computation.py +6 -6
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/metric.py +6 -6
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/utilities.py +10 -8
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/computation.py +14 -14
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/manager.py +6 -2
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/metric.py +12 -12
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/utilities.py +21 -19
- valor_lite-0.33.19/valor_lite/profiling.py +374 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/__init__.py +2 -1
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/annotation.py +84 -1
- valor_lite-0.33.19/valor_lite/semantic_segmentation/benchmark.py +151 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/computation.py +20 -33
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/manager.py +6 -2
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/metric.py +10 -10
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/utilities.py +6 -6
- {valor_lite-0.33.17 → valor_lite-0.33.19/valor_lite.egg-info}/PKG-INFO +1 -1
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/SOURCES.txt +4 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/LICENSE +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/README.md +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/benchmarks/.gitignore +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/benchmarks/benchmark_classification.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/benchmarks/benchmark_objdet.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/.gitignore +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/text_generation.ipynb +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/pyproject.toml +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/setup.cfg +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/conftest.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_accuracy.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_counts.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_evaluator.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_f1.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_filtering.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_metric.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_precision.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_recall.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_rocauc.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_schemas.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_stability.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_dataloader.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_evaluator.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_iou.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_metric.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_pr_curve.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_schemas.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_stability.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/conftest.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_accuracy.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_dataloader.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_evaluator.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_f1.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_filtering.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_iou.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_metric.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_precision.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_recall.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_stability.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/conftest.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_generation.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_integrations.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_utilities.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_validators.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_answer_correctness.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_answer_relevance.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_bias.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_context_precision.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_context_recall.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_context_relevance.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_faithfulness.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_hallucination.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_metric.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_rouge.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_sentence_bleu.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_summary_coherence.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_toxicity.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/test_evaluator.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/test_manager.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/LICENSE +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/annotation.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/manager.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/annotation.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/schemas.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/annotation.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/computation.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/__init__.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/exceptions.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/generation.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/instructions.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/integrations.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/utilities.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/validators.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/manager.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/metric.py +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/dependency_links.txt +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/requires.txt +0 -0
- {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from valor_lite.profiling import Benchmark, BenchmarkError
|
|
2
|
+
from valor_lite.semantic_segmentation.benchmark import (
|
|
3
|
+
benchmark_add_data,
|
|
4
|
+
benchmark_evaluate,
|
|
5
|
+
benchmark_finalize,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def benchmark(
|
|
10
|
+
bitmask_shape: tuple[int, int],
|
|
11
|
+
number_of_unique_labels: int,
|
|
12
|
+
number_of_images: int,
|
|
13
|
+
*_,
|
|
14
|
+
memory_limit: float = 4.0,
|
|
15
|
+
time_limit: float = 10.0,
|
|
16
|
+
repeat: int = 1,
|
|
17
|
+
verbose: bool = False,
|
|
18
|
+
):
|
|
19
|
+
"""
|
|
20
|
+
Runs a single benchmark.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
bitmask_shape : tuple[int, int]
|
|
25
|
+
The size (h, w) of the bitmask array.
|
|
26
|
+
number_of_unique_labels : int
|
|
27
|
+
The number of unique labels used in the synthetic example.
|
|
28
|
+
number_of_images : int
|
|
29
|
+
The number of distinct datums that are created.
|
|
30
|
+
memory_limit : float
|
|
31
|
+
The maximum amount of system memory allowed in gigabytes (GB).
|
|
32
|
+
time_limit : float
|
|
33
|
+
The maximum amount of time permitted before killing the benchmark.
|
|
34
|
+
repeat : int
|
|
35
|
+
The number of times to run a benchmark to produce an average runtime.
|
|
36
|
+
verbose : bool, default=False
|
|
37
|
+
Toggles terminal output of benchmark results.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
b = Benchmark(
|
|
41
|
+
time_limit=time_limit,
|
|
42
|
+
memory_limit=int(memory_limit * (1024**3)),
|
|
43
|
+
repeat=repeat,
|
|
44
|
+
verbose=verbose,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
_, failed, details = b.run(
|
|
48
|
+
benchmark=benchmark_add_data,
|
|
49
|
+
n_labels=[number_of_unique_labels],
|
|
50
|
+
shape=[bitmask_shape],
|
|
51
|
+
)
|
|
52
|
+
if failed:
|
|
53
|
+
raise BenchmarkError(
|
|
54
|
+
benchmark=details["benchmark"],
|
|
55
|
+
error_type=failed[0]["error"],
|
|
56
|
+
error_message=failed[0]["msg"],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
_, failed, details = b.run(
|
|
60
|
+
benchmark=benchmark_finalize,
|
|
61
|
+
n_datums=[number_of_images],
|
|
62
|
+
n_labels=[number_of_unique_labels],
|
|
63
|
+
)
|
|
64
|
+
if failed:
|
|
65
|
+
raise BenchmarkError(
|
|
66
|
+
benchmark=details["benchmark"],
|
|
67
|
+
error_type=failed[0]["error"],
|
|
68
|
+
error_message=failed[0]["msg"],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
_, failed, details = b.run(
|
|
72
|
+
benchmark=benchmark_evaluate,
|
|
73
|
+
n_datums=[number_of_images],
|
|
74
|
+
n_labels=[number_of_unique_labels],
|
|
75
|
+
)
|
|
76
|
+
if failed:
|
|
77
|
+
raise BenchmarkError(
|
|
78
|
+
benchmark=details["benchmark"],
|
|
79
|
+
error_type=failed[0]["error"],
|
|
80
|
+
error_message=failed[0]["msg"],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
|
|
86
|
+
benchmark(
|
|
87
|
+
bitmask_shape=(4000, 4000),
|
|
88
|
+
number_of_images=1000,
|
|
89
|
+
number_of_unique_labels=10,
|
|
90
|
+
memory_limit=4.0,
|
|
91
|
+
time_limit=10.0,
|
|
92
|
+
repeat=1,
|
|
93
|
+
verbose=True,
|
|
94
|
+
)
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "code",
|
|
5
|
+
"execution_count": 1,
|
|
6
|
+
"metadata": {},
|
|
7
|
+
"outputs": [],
|
|
8
|
+
"source": [
|
|
9
|
+
"from valor_lite.profiling import Benchmark\n",
|
|
10
|
+
"\n",
|
|
11
|
+
"b = Benchmark(\n",
|
|
12
|
+
" time_limit=5.0, # 5s\n",
|
|
13
|
+
" memory_limit=8 * (1024 ** 3), # 8 GB\n",
|
|
14
|
+
" repeat=1,\n",
|
|
15
|
+
" verbose=True,\n",
|
|
16
|
+
")"
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"attachments": {},
|
|
21
|
+
"cell_type": "markdown",
|
|
22
|
+
"metadata": {},
|
|
23
|
+
"source": [
|
|
24
|
+
"# Semantic Segmentation"
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"cell_type": "code",
|
|
29
|
+
"execution_count": 2,
|
|
30
|
+
"metadata": {},
|
|
31
|
+
"outputs": [],
|
|
32
|
+
"source": [
|
|
33
|
+
"from valor_lite.semantic_segmentation.benchmark import (\n",
|
|
34
|
+
" benchmark_add_data as semseg_add_data,\n",
|
|
35
|
+
" benchmark_finalize as semseg_finalize,\n",
|
|
36
|
+
" benchmark_evaluate as semseg_evaluate,\n",
|
|
37
|
+
")"
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"cell_type": "code",
|
|
42
|
+
"execution_count": 3,
|
|
43
|
+
"metadata": {},
|
|
44
|
+
"outputs": [],
|
|
45
|
+
"source": [
|
|
46
|
+
"n_datums = [\n",
|
|
47
|
+
" 10000,\n",
|
|
48
|
+
" 1000,\n",
|
|
49
|
+
" 100,\n",
|
|
50
|
+
" 10,\n",
|
|
51
|
+
" 1,\n",
|
|
52
|
+
"]\n",
|
|
53
|
+
"\n",
|
|
54
|
+
"n_labels = [\n",
|
|
55
|
+
" 1000,\n",
|
|
56
|
+
" 100,\n",
|
|
57
|
+
" 10,\n",
|
|
58
|
+
" 3,\n",
|
|
59
|
+
"]\n",
|
|
60
|
+
"\n",
|
|
61
|
+
"shapes = [\n",
|
|
62
|
+
" (10000, 10000),\n",
|
|
63
|
+
" (2500, 2500),\n",
|
|
64
|
+
" (1000, 1000),\n",
|
|
65
|
+
" (100, 100),\n",
|
|
66
|
+
"]"
|
|
67
|
+
]
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"cell_type": "code",
|
|
71
|
+
"execution_count": 4,
|
|
72
|
+
"metadata": {},
|
|
73
|
+
"outputs": [
|
|
74
|
+
{
|
|
75
|
+
"name": "stderr",
|
|
76
|
+
"output_type": "stream",
|
|
77
|
+
"text": [
|
|
78
|
+
" 69%|██████▉ | 11/16 [00:46<00:21, 4.26s/it]"
|
|
79
|
+
]
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"name": "stdout",
|
|
83
|
+
"output_type": "stream",
|
|
84
|
+
"text": [
|
|
85
|
+
"=====================================================================\n",
|
|
86
|
+
"Details\n",
|
|
87
|
+
"{\n",
|
|
88
|
+
" \"benchmark\": \"benchmark_add_data\",\n",
|
|
89
|
+
" \"limits\": {\n",
|
|
90
|
+
" \"memory_limit\": \"8.0 GB\",\n",
|
|
91
|
+
" \"time_limit\": \"5.0 seconds\",\n",
|
|
92
|
+
" \"repeat\": 1\n",
|
|
93
|
+
" },\n",
|
|
94
|
+
" \"passed\": 8,\n",
|
|
95
|
+
" \"failed\": 8,\n",
|
|
96
|
+
" \"total\": 16\n",
|
|
97
|
+
"}\n",
|
|
98
|
+
"\n",
|
|
99
|
+
"Passed\n",
|
|
100
|
+
" complexity | runtime | n_labels | shape \n",
|
|
101
|
+
"---------------------------------------------------------------------\n",
|
|
102
|
+
" 300000000 | 1.5151 | 3 | (10000, 10000) \n",
|
|
103
|
+
" 62500000 | 0.5952 | 10 | (2500, 2500) \n",
|
|
104
|
+
" 10000000 | 0.0911 | 10 | (1000, 1000) \n",
|
|
105
|
+
" 1000000 | 0.0582 | 100 | (100, 100) \n",
|
|
106
|
+
"\n",
|
|
107
|
+
"Failed\n",
|
|
108
|
+
" complexity | error | n_labels | shape | msg \n",
|
|
109
|
+
"---------------------------------------------------------------------------------------\n",
|
|
110
|
+
" 100000000000 | MemoryError | 1000 | (10000, 10000) | Unable to allocate 186. GiB for an array with shape (1001, 20000, 10000) and data type bool\n",
|
|
111
|
+
" 10000000000 | MemoryError | 100 | (10000, 10000) | Unable to allocate 18.8 GiB for an array with shape (101, 20000, 10000) and data type bool\n",
|
|
112
|
+
" 6250000000 | MemoryError | 1000 | (2500, 2500) | Unable to allocate 11.7 GiB for an array with shape (1001, 5000, 2500) and data type bool\n",
|
|
113
|
+
" 1000000000 | MemoryError | 10 | (10000, 10000) | Unable to allocate 9.31 GiB for an array with shape (10, 10, 100000000) and data type bool\n",
|
|
114
|
+
" 1000000000 | MemoryError | 1000 | (1000, 1000) | Unable to allocate 931. GiB for an array with shape (1000, 1000, 1000000) and data type bool\n",
|
|
115
|
+
" 625000000 | MemoryError | 100 | (2500, 2500) | Unable to allocate 58.2 GiB for an array with shape (100, 100, 6250000) and data type bool\n",
|
|
116
|
+
" 100000000 | MemoryError | 100 | (1000, 1000) | Unable to allocate 9.31 GiB for an array with shape (100, 100, 1000000) and data type bool\n",
|
|
117
|
+
" 10000000 | MemoryError | 1000 | (100, 100) | Unable to allocate 9.31 GiB for an array with shape (1000, 1000, 10000) and data type bool\n"
|
|
118
|
+
]
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
"name": "stderr",
|
|
122
|
+
"output_type": "stream",
|
|
123
|
+
"text": [
|
|
124
|
+
"\n"
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
],
|
|
128
|
+
"source": [
|
|
129
|
+
"_ = b.run(\n",
|
|
130
|
+
" benchmark=semseg_add_data,\n",
|
|
131
|
+
" n_labels=n_labels,\n",
|
|
132
|
+
" shape=shapes,\n",
|
|
133
|
+
")"
|
|
134
|
+
]
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"cell_type": "code",
|
|
138
|
+
"execution_count": 5,
|
|
139
|
+
"metadata": {},
|
|
140
|
+
"outputs": [
|
|
141
|
+
{
|
|
142
|
+
"name": "stderr",
|
|
143
|
+
"output_type": "stream",
|
|
144
|
+
"text": [
|
|
145
|
+
" 20%|██ | 4/20 [02:35<10:22, 38.92s/it]"
|
|
146
|
+
]
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
"name": "stdout",
|
|
150
|
+
"output_type": "stream",
|
|
151
|
+
"text": [
|
|
152
|
+
"=====================================================================\n",
|
|
153
|
+
"Details\n",
|
|
154
|
+
"{\n",
|
|
155
|
+
" \"benchmark\": \"benchmark_finalize\",\n",
|
|
156
|
+
" \"limits\": {\n",
|
|
157
|
+
" \"memory_limit\": \"8.0 GB\",\n",
|
|
158
|
+
" \"time_limit\": \"5.0 seconds\",\n",
|
|
159
|
+
" \"repeat\": 1\n",
|
|
160
|
+
" },\n",
|
|
161
|
+
" \"passed\": 18,\n",
|
|
162
|
+
" \"failed\": 2,\n",
|
|
163
|
+
" \"total\": 20\n",
|
|
164
|
+
"}\n",
|
|
165
|
+
"\n",
|
|
166
|
+
"Passed\n",
|
|
167
|
+
" complexity | runtime | n_datums | n_labels \n",
|
|
168
|
+
"---------------------------------------------------------------------\n",
|
|
169
|
+
" 1000000 | 1.1142 | 10000 | 100 \n",
|
|
170
|
+
" 100000 | 0.1748 | 100 | 1000 \n",
|
|
171
|
+
" 100000 | 0.1086 | 1000 | 100 \n",
|
|
172
|
+
"\n",
|
|
173
|
+
"Failed\n",
|
|
174
|
+
" complexity | error | n_datums | n_labels | msg \n",
|
|
175
|
+
"---------------------------------------------------------------------------------------\n",
|
|
176
|
+
" 10000000 | MemoryError | 10000 | 1000 | Unable to allocate 7.63 MiB for an array with shape (1000, 1000) and data type int64\n",
|
|
177
|
+
" 1000000 | MemoryError | 1000 | 1000 | \n"
|
|
178
|
+
]
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
"name": "stderr",
|
|
182
|
+
"output_type": "stream",
|
|
183
|
+
"text": [
|
|
184
|
+
"\n"
|
|
185
|
+
]
|
|
186
|
+
}
|
|
187
|
+
],
|
|
188
|
+
"source": [
|
|
189
|
+
"_ = b.run(\n",
|
|
190
|
+
" benchmark=semseg_finalize,\n",
|
|
191
|
+
" n_datums=n_datums,\n",
|
|
192
|
+
" n_labels=n_labels,\n",
|
|
193
|
+
")"
|
|
194
|
+
]
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"cell_type": "code",
|
|
198
|
+
"execution_count": 6,
|
|
199
|
+
"metadata": {},
|
|
200
|
+
"outputs": [
|
|
201
|
+
{
|
|
202
|
+
"name": "stderr",
|
|
203
|
+
"output_type": "stream",
|
|
204
|
+
"text": [
|
|
205
|
+
" 20%|██ | 4/20 [02:25<09:40, 36.28s/it]"
|
|
206
|
+
]
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
"name": "stdout",
|
|
210
|
+
"output_type": "stream",
|
|
211
|
+
"text": [
|
|
212
|
+
"=====================================================================\n",
|
|
213
|
+
"Details\n",
|
|
214
|
+
"{\n",
|
|
215
|
+
" \"benchmark\": \"benchmark_evaluate\",\n",
|
|
216
|
+
" \"limits\": {\n",
|
|
217
|
+
" \"memory_limit\": \"8.0 GB\",\n",
|
|
218
|
+
" \"time_limit\": \"5.0 seconds\",\n",
|
|
219
|
+
" \"repeat\": 1\n",
|
|
220
|
+
" },\n",
|
|
221
|
+
" \"passed\": 18,\n",
|
|
222
|
+
" \"failed\": 2,\n",
|
|
223
|
+
" \"total\": 20\n",
|
|
224
|
+
"}\n",
|
|
225
|
+
"\n",
|
|
226
|
+
"Passed\n",
|
|
227
|
+
" complexity | runtime | n_datums | n_labels \n",
|
|
228
|
+
"---------------------------------------------------------------------\n",
|
|
229
|
+
" 1000000 | 0.0537 | 10000 | 100 \n",
|
|
230
|
+
" 100000 | 0.0815 | 100 | 1000 \n",
|
|
231
|
+
" 100000 | 0.0137 | 1000 | 100 \n",
|
|
232
|
+
"\n",
|
|
233
|
+
"Failed\n",
|
|
234
|
+
" complexity | error | n_datums | n_labels | msg \n",
|
|
235
|
+
"---------------------------------------------------------------------------------------\n",
|
|
236
|
+
" 10000000 | MemoryError | 10000 | 1000 | Unable to allocate 23.8 MiB for an array with shape (1000, 1000, 25) and data type bool\n",
|
|
237
|
+
" 1000000 | MemoryError | 1000 | 1000 | Unable to allocate 3.73 GiB for an array with shape (1000, 1001, 1001) and data type int32\n"
|
|
238
|
+
]
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
"name": "stderr",
|
|
242
|
+
"output_type": "stream",
|
|
243
|
+
"text": [
|
|
244
|
+
"\n"
|
|
245
|
+
]
|
|
246
|
+
}
|
|
247
|
+
],
|
|
248
|
+
"source": [
|
|
249
|
+
"_ = b.run(\n",
|
|
250
|
+
" benchmark=semseg_evaluate,\n",
|
|
251
|
+
" n_datums=n_datums,\n",
|
|
252
|
+
" n_labels=n_labels,\n",
|
|
253
|
+
")"
|
|
254
|
+
]
|
|
255
|
+
}
|
|
256
|
+
],
|
|
257
|
+
"metadata": {
|
|
258
|
+
"kernelspec": {
|
|
259
|
+
"display_name": ".env-valor",
|
|
260
|
+
"language": "python",
|
|
261
|
+
"name": "python3"
|
|
262
|
+
},
|
|
263
|
+
"language_info": {
|
|
264
|
+
"codemirror_mode": {
|
|
265
|
+
"name": "ipython",
|
|
266
|
+
"version": 3
|
|
267
|
+
},
|
|
268
|
+
"file_extension": ".py",
|
|
269
|
+
"mimetype": "text/x-python",
|
|
270
|
+
"name": "python",
|
|
271
|
+
"nbconvert_exporter": "python",
|
|
272
|
+
"pygments_lexer": "ipython3",
|
|
273
|
+
"version": "3.10.15"
|
|
274
|
+
},
|
|
275
|
+
"orig_nbformat": 4
|
|
276
|
+
},
|
|
277
|
+
"nbformat": 4,
|
|
278
|
+
"nbformat_minor": 2
|
|
279
|
+
}
|
|
@@ -959,7 +959,7 @@
|
|
|
959
959
|
"id": "98edc4dd",
|
|
960
960
|
"metadata": {},
|
|
961
961
|
"source": [
|
|
962
|
-
"###
|
|
962
|
+
"### Unmatched Predictions"
|
|
963
963
|
]
|
|
964
964
|
},
|
|
965
965
|
{
|
|
@@ -1764,7 +1764,7 @@
|
|
|
1764
1764
|
}
|
|
1765
1765
|
],
|
|
1766
1766
|
"source": [
|
|
1767
|
-
"metric.value[\"
|
|
1767
|
+
"metric.value[\"unmatched_predictions\"]"
|
|
1768
1768
|
]
|
|
1769
1769
|
},
|
|
1770
1770
|
{
|
|
@@ -1773,7 +1773,7 @@
|
|
|
1773
1773
|
"id": "415335e4",
|
|
1774
1774
|
"metadata": {},
|
|
1775
1775
|
"source": [
|
|
1776
|
-
"### Ground Truths
|
|
1776
|
+
"### Unmatched Ground Truths"
|
|
1777
1777
|
]
|
|
1778
1778
|
},
|
|
1779
1779
|
{
|
|
@@ -2995,7 +2995,7 @@
|
|
|
2995
2995
|
}
|
|
2996
2996
|
],
|
|
2997
2997
|
"source": [
|
|
2998
|
-
"metric.value[\"
|
|
2998
|
+
"metric.value[\"unmatched_ground_truths\"]"
|
|
2999
2999
|
]
|
|
3000
3000
|
}
|
|
3001
3001
|
],
|
|
@@ -40,7 +40,7 @@ def test_compute_confusion_matrix():
|
|
|
40
40
|
|
|
41
41
|
score_thresholds = np.array([0.25, 0.75], dtype=np.float64)
|
|
42
42
|
|
|
43
|
-
confusion_matrix,
|
|
43
|
+
confusion_matrix, unmatched_ground_truths = compute_confusion_matrix(
|
|
44
44
|
data=data,
|
|
45
45
|
label_metadata=label_metadata,
|
|
46
46
|
score_thresholds=score_thresholds,
|
|
@@ -74,15 +74,15 @@ def test_compute_confusion_matrix():
|
|
|
74
74
|
)
|
|
75
75
|
).all()
|
|
76
76
|
|
|
77
|
-
assert
|
|
77
|
+
assert unmatched_ground_truths.shape == (2, 4, 1)
|
|
78
78
|
assert (
|
|
79
79
|
# score >= 0.25
|
|
80
|
-
|
|
80
|
+
unmatched_ground_truths[0, :, 0]
|
|
81
81
|
== np.array([-1.0, -1.0, -1.0, -1.0])
|
|
82
82
|
).all()
|
|
83
83
|
assert (
|
|
84
84
|
# score >= 0.75
|
|
85
|
-
|
|
85
|
+
unmatched_ground_truths[1, :, 0]
|
|
86
86
|
== np.array([-1.0, -1.0, -1.0, 1.0])
|
|
87
87
|
).all()
|
|
88
88
|
|
|
@@ -144,7 +144,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
|
|
|
144
144
|
}
|
|
145
145
|
},
|
|
146
146
|
},
|
|
147
|
-
"
|
|
147
|
+
"unmatched_ground_truths": {},
|
|
148
148
|
},
|
|
149
149
|
"parameters": {
|
|
150
150
|
"score_threshold": 0.25,
|
|
@@ -166,7 +166,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
|
|
|
166
166
|
},
|
|
167
167
|
}
|
|
168
168
|
},
|
|
169
|
-
"
|
|
169
|
+
"unmatched_ground_truths": {
|
|
170
170
|
"3": {"count": 1, "examples": [{"datum": "uid2"}]}
|
|
171
171
|
},
|
|
172
172
|
},
|
|
@@ -179,7 +179,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
|
|
|
179
179
|
for m in actual_metrics:
|
|
180
180
|
_filter_elements_with_zero_count(
|
|
181
181
|
cm=m["value"]["confusion_matrix"],
|
|
182
|
-
mp=m["value"]["
|
|
182
|
+
mp=m["value"]["unmatched_ground_truths"],
|
|
183
183
|
)
|
|
184
184
|
assert m in expected_metrics
|
|
185
185
|
for m in expected_metrics:
|
|
@@ -212,7 +212,7 @@ def test_confusion_matrix_unit(
|
|
|
212
212
|
"1": {"1": {"count": 1, "examples": []}},
|
|
213
213
|
"2": {"1": {"count": 2, "examples": []}},
|
|
214
214
|
},
|
|
215
|
-
"
|
|
215
|
+
"unmatched_ground_truths": {},
|
|
216
216
|
},
|
|
217
217
|
"parameters": {
|
|
218
218
|
"score_threshold": 0.5,
|
|
@@ -223,7 +223,7 @@ def test_confusion_matrix_unit(
|
|
|
223
223
|
for m in actual_metrics:
|
|
224
224
|
_filter_elements_with_zero_count(
|
|
225
225
|
cm=m["value"]["confusion_matrix"],
|
|
226
|
-
mp=m["value"]["
|
|
226
|
+
mp=m["value"]["unmatched_ground_truths"],
|
|
227
227
|
)
|
|
228
228
|
assert m in expected_metrics
|
|
229
229
|
for m in expected_metrics:
|
|
@@ -282,7 +282,7 @@ def test_confusion_matrix_with_animal_example(
|
|
|
282
282
|
}
|
|
283
283
|
},
|
|
284
284
|
},
|
|
285
|
-
"
|
|
285
|
+
"unmatched_ground_truths": {
|
|
286
286
|
"dog": {"count": 1, "examples": [{"datum": "uid5"}]}
|
|
287
287
|
},
|
|
288
288
|
},
|
|
@@ -295,7 +295,7 @@ def test_confusion_matrix_with_animal_example(
|
|
|
295
295
|
for m in actual_metrics:
|
|
296
296
|
_filter_elements_with_zero_count(
|
|
297
297
|
cm=m["value"]["confusion_matrix"],
|
|
298
|
-
mp=m["value"]["
|
|
298
|
+
mp=m["value"]["unmatched_ground_truths"],
|
|
299
299
|
)
|
|
300
300
|
assert m in expected_metrics
|
|
301
301
|
for m in expected_metrics:
|
|
@@ -356,7 +356,7 @@ def test_confusion_matrix_with_color_example(
|
|
|
356
356
|
}
|
|
357
357
|
},
|
|
358
358
|
},
|
|
359
|
-
"
|
|
359
|
+
"unmatched_ground_truths": {
|
|
360
360
|
"red": {"count": 1, "examples": [{"datum": "uid2"}]}
|
|
361
361
|
},
|
|
362
362
|
},
|
|
@@ -369,7 +369,7 @@ def test_confusion_matrix_with_color_example(
|
|
|
369
369
|
for m in actual_metrics:
|
|
370
370
|
_filter_elements_with_zero_count(
|
|
371
371
|
cm=m["value"]["confusion_matrix"],
|
|
372
|
-
mp=m["value"]["
|
|
372
|
+
mp=m["value"]["unmatched_ground_truths"],
|
|
373
373
|
)
|
|
374
374
|
assert m in expected_metrics
|
|
375
375
|
for m in expected_metrics:
|
|
@@ -438,7 +438,7 @@ def test_confusion_matrix_multiclass(
|
|
|
438
438
|
}
|
|
439
439
|
},
|
|
440
440
|
},
|
|
441
|
-
"
|
|
441
|
+
"unmatched_ground_truths": {},
|
|
442
442
|
},
|
|
443
443
|
"parameters": {
|
|
444
444
|
"score_threshold": 0.05,
|
|
@@ -466,7 +466,7 @@ def test_confusion_matrix_multiclass(
|
|
|
466
466
|
}
|
|
467
467
|
},
|
|
468
468
|
},
|
|
469
|
-
"
|
|
469
|
+
"unmatched_ground_truths": {
|
|
470
470
|
"cat": {
|
|
471
471
|
"count": 2,
|
|
472
472
|
"examples": [{"datum": "uid0"}, {"datum": "uid2"}],
|
|
@@ -483,7 +483,7 @@ def test_confusion_matrix_multiclass(
|
|
|
483
483
|
"type": "ConfusionMatrix",
|
|
484
484
|
"value": {
|
|
485
485
|
"confusion_matrix": {},
|
|
486
|
-
"
|
|
486
|
+
"unmatched_ground_truths": {
|
|
487
487
|
"cat": {
|
|
488
488
|
"count": 2,
|
|
489
489
|
"examples": [{"datum": "uid0"}, {"datum": "uid2"}],
|
|
@@ -504,7 +504,7 @@ def test_confusion_matrix_multiclass(
|
|
|
504
504
|
for m in actual_metrics:
|
|
505
505
|
_filter_elements_with_zero_count(
|
|
506
506
|
cm=m["value"]["confusion_matrix"],
|
|
507
|
-
mp=m["value"]["
|
|
507
|
+
mp=m["value"]["unmatched_ground_truths"],
|
|
508
508
|
)
|
|
509
509
|
assert m in expected_metrics
|
|
510
510
|
for m in expected_metrics:
|
|
@@ -560,7 +560,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
|
|
|
560
560
|
},
|
|
561
561
|
}
|
|
562
562
|
},
|
|
563
|
-
"
|
|
563
|
+
"unmatched_ground_truths": {},
|
|
564
564
|
},
|
|
565
565
|
"parameters": {
|
|
566
566
|
"score_threshold": 0.05,
|
|
@@ -580,7 +580,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
|
|
|
580
580
|
}
|
|
581
581
|
}
|
|
582
582
|
},
|
|
583
|
-
"
|
|
583
|
+
"unmatched_ground_truths": {},
|
|
584
584
|
},
|
|
585
585
|
"parameters": {
|
|
586
586
|
"score_threshold": 0.4,
|
|
@@ -591,7 +591,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
|
|
|
591
591
|
"type": "ConfusionMatrix",
|
|
592
592
|
"value": {
|
|
593
593
|
"confusion_matrix": {},
|
|
594
|
-
"
|
|
594
|
+
"unmatched_ground_truths": {
|
|
595
595
|
"ant": {
|
|
596
596
|
"count": 1,
|
|
597
597
|
"examples": [
|
|
@@ -611,7 +611,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
|
|
|
611
611
|
for m in actual_metrics:
|
|
612
612
|
_filter_elements_with_zero_count(
|
|
613
613
|
cm=m["value"]["confusion_matrix"],
|
|
614
|
-
mp=m["value"]["
|
|
614
|
+
mp=m["value"]["unmatched_ground_truths"],
|
|
615
615
|
)
|
|
616
616
|
assert m in expected_metrics
|
|
617
617
|
for m in expected_metrics:
|
|
@@ -704,7 +704,7 @@ def false_negatives_two_images_one_only_with_different_class_high_confidence_of_
|
|
|
704
704
|
|
|
705
705
|
|
|
706
706
|
@pytest.fixture
|
|
707
|
-
def
|
|
707
|
+
def detections_fp_unmatched_prediction_edge_case() -> list[Detection]:
|
|
708
708
|
return [
|
|
709
709
|
Detection(
|
|
710
710
|
uid="uid1",
|
|
@@ -1093,7 +1093,7 @@ def detections_for_detailed_counting(
|
|
|
1093
1093
|
xmax=rect4[1],
|
|
1094
1094
|
ymin=rect4[2],
|
|
1095
1095
|
ymax=rect4[3],
|
|
1096
|
-
labels=["
|
|
1096
|
+
labels=["no_overlap"],
|
|
1097
1097
|
scores=[0.1],
|
|
1098
1098
|
),
|
|
1099
1099
|
],
|
|
@@ -95,9 +95,9 @@ def test_accuracy_metrics_first_class(
|
|
|
95
95
|
groundtruths
|
|
96
96
|
datum uid1
|
|
97
97
|
box 1 - label v1 - tp
|
|
98
|
-
box 3 - label v2 - fn
|
|
98
|
+
box 3 - label v2 - fn unmatched ground truths
|
|
99
99
|
datum uid2
|
|
100
|
-
box 2 - label v1 - fn
|
|
100
|
+
box 2 - label v1 - fn unmatched ground truths
|
|
101
101
|
|
|
102
102
|
predictions
|
|
103
103
|
datum uid1
|
|
@@ -176,7 +176,7 @@ def test_accuracy_metrics_second_class(
|
|
|
176
176
|
|
|
177
177
|
groundtruths
|
|
178
178
|
datum uid1
|
|
179
|
-
box 3 - label v2 - fn
|
|
179
|
+
box 3 - label v2 - fn unmatched ground truths
|
|
180
180
|
datum uid2
|
|
181
181
|
none
|
|
182
182
|
predictions
|
|
@@ -72,7 +72,7 @@ def test_ap_metrics_first_class(
|
|
|
72
72
|
datum uid1
|
|
73
73
|
box 1 - label v1 - tp
|
|
74
74
|
datum uid2
|
|
75
|
-
box 2 - label v1 - fn
|
|
75
|
+
box 2 - label v1 - fn unmatched ground truths
|
|
76
76
|
|
|
77
77
|
predictions
|
|
78
78
|
datum uid1
|
|
@@ -192,7 +192,7 @@ def test_ap_metrics_second_class(
|
|
|
192
192
|
|
|
193
193
|
groundtruths
|
|
194
194
|
datum uid1
|
|
195
|
-
box 3 - label v2 - fn
|
|
195
|
+
box 3 - label v2 - fn unmatched ground truths
|
|
196
196
|
datum uid2
|
|
197
197
|
none
|
|
198
198
|
predictions
|