valor-lite 0.33.17__tar.gz → 0.33.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {valor_lite-0.33.17/valor_lite.egg-info → valor_lite-0.33.19}/PKG-INFO +1 -1
  2. valor_lite-0.33.19/benchmarks/synthetic/benchmark_semantic_segmentation.py +94 -0
  3. valor_lite-0.33.19/examples/benchmarking.ipynb +279 -0
  4. {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/object-detection.ipynb +4 -4
  5. {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/tabular_classification.ipynb +1 -1
  6. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_confusion_matrix.py +21 -21
  7. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_dataloader.py +1 -1
  8. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/conftest.py +2 -2
  9. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_accuracy.py +3 -3
  10. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_average_precision.py +2 -2
  11. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_average_recall.py +2 -2
  12. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_confusion_matrix.py +145 -135
  13. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_counts.py +2 -2
  14. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_f1.py +3 -3
  15. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_filtering.py +6 -6
  16. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_precision.py +3 -3
  17. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_recall.py +2 -2
  18. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_annotation.py +57 -1
  19. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_confusion_matrix.py +4 -4
  20. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/computation.py +6 -6
  21. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/metric.py +6 -6
  22. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/utilities.py +10 -8
  23. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/computation.py +14 -14
  24. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/manager.py +6 -2
  25. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/metric.py +12 -12
  26. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/utilities.py +21 -19
  27. valor_lite-0.33.19/valor_lite/profiling.py +374 -0
  28. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/__init__.py +2 -1
  29. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/annotation.py +84 -1
  30. valor_lite-0.33.19/valor_lite/semantic_segmentation/benchmark.py +151 -0
  31. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/computation.py +20 -33
  32. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/manager.py +6 -2
  33. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/metric.py +10 -10
  34. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/semantic_segmentation/utilities.py +6 -6
  35. {valor_lite-0.33.17 → valor_lite-0.33.19/valor_lite.egg-info}/PKG-INFO +1 -1
  36. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/SOURCES.txt +4 -0
  37. {valor_lite-0.33.17 → valor_lite-0.33.19}/LICENSE +0 -0
  38. {valor_lite-0.33.17 → valor_lite-0.33.19}/README.md +0 -0
  39. {valor_lite-0.33.17 → valor_lite-0.33.19}/benchmarks/.gitignore +0 -0
  40. {valor_lite-0.33.17 → valor_lite-0.33.19}/benchmarks/benchmark_classification.py +0 -0
  41. {valor_lite-0.33.17 → valor_lite-0.33.19}/benchmarks/benchmark_objdet.py +0 -0
  42. {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/.gitignore +0 -0
  43. {valor_lite-0.33.17 → valor_lite-0.33.19}/examples/text_generation.ipynb +0 -0
  44. {valor_lite-0.33.17 → valor_lite-0.33.19}/pyproject.toml +0 -0
  45. {valor_lite-0.33.17 → valor_lite-0.33.19}/setup.cfg +0 -0
  46. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/__init__.py +0 -0
  47. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/__init__.py +0 -0
  48. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/conftest.py +0 -0
  49. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_accuracy.py +0 -0
  50. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_counts.py +0 -0
  51. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_evaluator.py +0 -0
  52. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_f1.py +0 -0
  53. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_filtering.py +0 -0
  54. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_metric.py +0 -0
  55. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_precision.py +0 -0
  56. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_recall.py +0 -0
  57. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_rocauc.py +0 -0
  58. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_schemas.py +0 -0
  59. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/classification/test_stability.py +0 -0
  60. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/__init__.py +0 -0
  61. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_dataloader.py +0 -0
  62. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_evaluator.py +0 -0
  63. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_iou.py +0 -0
  64. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_metric.py +0 -0
  65. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_pr_curve.py +0 -0
  66. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_schemas.py +0 -0
  67. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/object_detection/test_stability.py +0 -0
  68. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/__init__.py +0 -0
  69. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/conftest.py +0 -0
  70. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_accuracy.py +0 -0
  71. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_dataloader.py +0 -0
  72. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_evaluator.py +0 -0
  73. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_f1.py +0 -0
  74. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_filtering.py +0 -0
  75. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_iou.py +0 -0
  76. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_metric.py +0 -0
  77. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_precision.py +0 -0
  78. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_recall.py +0 -0
  79. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/semantic_segmentation/test_stability.py +0 -0
  80. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/__init__.py +0 -0
  81. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/conftest.py +0 -0
  82. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/__init__.py +0 -0
  83. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_generation.py +0 -0
  84. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_integrations.py +0 -0
  85. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_utilities.py +0 -0
  86. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/llm/test_validators.py +0 -0
  87. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_answer_correctness.py +0 -0
  88. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_answer_relevance.py +0 -0
  89. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_bias.py +0 -0
  90. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_context_precision.py +0 -0
  91. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_context_recall.py +0 -0
  92. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_context_relevance.py +0 -0
  93. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_faithfulness.py +0 -0
  94. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_hallucination.py +0 -0
  95. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_metric.py +0 -0
  96. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_rouge.py +0 -0
  97. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_sentence_bleu.py +0 -0
  98. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_summary_coherence.py +0 -0
  99. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/metrics/test_toxicity.py +0 -0
  100. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/test_evaluator.py +0 -0
  101. {valor_lite-0.33.17 → valor_lite-0.33.19}/tests/text_generation/test_manager.py +0 -0
  102. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/LICENSE +0 -0
  103. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/__init__.py +0 -0
  104. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/__init__.py +0 -0
  105. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/annotation.py +0 -0
  106. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/classification/manager.py +0 -0
  107. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/__init__.py +0 -0
  108. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/object_detection/annotation.py +0 -0
  109. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/schemas.py +0 -0
  110. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/__init__.py +0 -0
  111. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/annotation.py +0 -0
  112. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/computation.py +0 -0
  113. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/__init__.py +0 -0
  114. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/exceptions.py +0 -0
  115. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/generation.py +0 -0
  116. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/instructions.py +0 -0
  117. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/integrations.py +0 -0
  118. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/utilities.py +0 -0
  119. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/llm/validators.py +0 -0
  120. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/manager.py +0 -0
  121. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite/text_generation/metric.py +0 -0
  122. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/dependency_links.txt +0 -0
  123. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/requires.txt +0 -0
  124. {valor_lite-0.33.17 → valor_lite-0.33.19}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.17
3
+ Version: 0.33.19
4
4
  Summary: Compute valor metrics locally.
5
5
  License: MIT License
6
6
 
@@ -0,0 +1,94 @@
1
+ from valor_lite.profiling import Benchmark, BenchmarkError
2
+ from valor_lite.semantic_segmentation.benchmark import (
3
+ benchmark_add_data,
4
+ benchmark_evaluate,
5
+ benchmark_finalize,
6
+ )
7
+
8
+
9
+ def benchmark(
10
+ bitmask_shape: tuple[int, int],
11
+ number_of_unique_labels: int,
12
+ number_of_images: int,
13
+ *_,
14
+ memory_limit: float = 4.0,
15
+ time_limit: float = 10.0,
16
+ repeat: int = 1,
17
+ verbose: bool = False,
18
+ ):
19
+ """
20
+ Runs a single benchmark.
21
+
22
+ Parameters
23
+ ----------
24
+ bitmask_shape : tuple[int, int]
25
+ The size (h, w) of the bitmask array.
26
+ number_of_unique_labels : int
27
+ The number of unique labels used in the synthetic example.
28
+ number_of_images : int
29
+ The number of distinct datums that are created.
30
+ memory_limit : float
31
+ The maximum amount of system memory allowed in gigabytes (GB).
32
+ time_limit : float
33
+ The maximum amount of time permitted before killing the benchmark.
34
+ repeat : int
35
+ The number of times to run a benchmark to produce an average runtime.
36
+ verbose : bool, default=False
37
+ Toggles terminal output of benchmark results.
38
+ """
39
+
40
+ b = Benchmark(
41
+ time_limit=time_limit,
42
+ memory_limit=int(memory_limit * (1024**3)),
43
+ repeat=repeat,
44
+ verbose=verbose,
45
+ )
46
+
47
+ _, failed, details = b.run(
48
+ benchmark=benchmark_add_data,
49
+ n_labels=[number_of_unique_labels],
50
+ shape=[bitmask_shape],
51
+ )
52
+ if failed:
53
+ raise BenchmarkError(
54
+ benchmark=details["benchmark"],
55
+ error_type=failed[0]["error"],
56
+ error_message=failed[0]["msg"],
57
+ )
58
+
59
+ _, failed, details = b.run(
60
+ benchmark=benchmark_finalize,
61
+ n_datums=[number_of_images],
62
+ n_labels=[number_of_unique_labels],
63
+ )
64
+ if failed:
65
+ raise BenchmarkError(
66
+ benchmark=details["benchmark"],
67
+ error_type=failed[0]["error"],
68
+ error_message=failed[0]["msg"],
69
+ )
70
+
71
+ _, failed, details = b.run(
72
+ benchmark=benchmark_evaluate,
73
+ n_datums=[number_of_images],
74
+ n_labels=[number_of_unique_labels],
75
+ )
76
+ if failed:
77
+ raise BenchmarkError(
78
+ benchmark=details["benchmark"],
79
+ error_type=failed[0]["error"],
80
+ error_message=failed[0]["msg"],
81
+ )
82
+
83
+
84
+ if __name__ == "__main__":
85
+
86
+ benchmark(
87
+ bitmask_shape=(4000, 4000),
88
+ number_of_images=1000,
89
+ number_of_unique_labels=10,
90
+ memory_limit=4.0,
91
+ time_limit=10.0,
92
+ repeat=1,
93
+ verbose=True,
94
+ )
@@ -0,0 +1,279 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from valor_lite.profiling import Benchmark\n",
10
+ "\n",
11
+ "b = Benchmark(\n",
12
+ " time_limit=5.0, # 5s\n",
13
+ " memory_limit=8 * (1024 ** 3), # 8 GB\n",
14
+ " repeat=1,\n",
15
+ " verbose=True,\n",
16
+ ")"
17
+ ]
18
+ },
19
+ {
20
+ "attachments": {},
21
+ "cell_type": "markdown",
22
+ "metadata": {},
23
+ "source": [
24
+ "# Semantic Segmentation"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 2,
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "from valor_lite.semantic_segmentation.benchmark import (\n",
34
+ " benchmark_add_data as semseg_add_data,\n",
35
+ " benchmark_finalize as semseg_finalize,\n",
36
+ " benchmark_evaluate as semseg_evaluate,\n",
37
+ ")"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 3,
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "n_datums = [\n",
47
+ " 10000,\n",
48
+ " 1000,\n",
49
+ " 100,\n",
50
+ " 10,\n",
51
+ " 1,\n",
52
+ "]\n",
53
+ "\n",
54
+ "n_labels = [\n",
55
+ " 1000,\n",
56
+ " 100,\n",
57
+ " 10,\n",
58
+ " 3,\n",
59
+ "]\n",
60
+ "\n",
61
+ "shapes = [\n",
62
+ " (10000, 10000),\n",
63
+ " (2500, 2500),\n",
64
+ " (1000, 1000),\n",
65
+ " (100, 100),\n",
66
+ "]"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 4,
72
+ "metadata": {},
73
+ "outputs": [
74
+ {
75
+ "name": "stderr",
76
+ "output_type": "stream",
77
+ "text": [
78
+ " 69%|██████▉ | 11/16 [00:46<00:21, 4.26s/it]"
79
+ ]
80
+ },
81
+ {
82
+ "name": "stdout",
83
+ "output_type": "stream",
84
+ "text": [
85
+ "=====================================================================\n",
86
+ "Details\n",
87
+ "{\n",
88
+ " \"benchmark\": \"benchmark_add_data\",\n",
89
+ " \"limits\": {\n",
90
+ " \"memory_limit\": \"8.0 GB\",\n",
91
+ " \"time_limit\": \"5.0 seconds\",\n",
92
+ " \"repeat\": 1\n",
93
+ " },\n",
94
+ " \"passed\": 8,\n",
95
+ " \"failed\": 8,\n",
96
+ " \"total\": 16\n",
97
+ "}\n",
98
+ "\n",
99
+ "Passed\n",
100
+ " complexity | runtime | n_labels | shape \n",
101
+ "---------------------------------------------------------------------\n",
102
+ " 300000000 | 1.5151 | 3 | (10000, 10000) \n",
103
+ " 62500000 | 0.5952 | 10 | (2500, 2500) \n",
104
+ " 10000000 | 0.0911 | 10 | (1000, 1000) \n",
105
+ " 1000000 | 0.0582 | 100 | (100, 100) \n",
106
+ "\n",
107
+ "Failed\n",
108
+ " complexity | error | n_labels | shape | msg \n",
109
+ "---------------------------------------------------------------------------------------\n",
110
+ " 100000000000 | MemoryError | 1000 | (10000, 10000) | Unable to allocate 186. GiB for an array with shape (1001, 20000, 10000) and data type bool\n",
111
+ " 10000000000 | MemoryError | 100 | (10000, 10000) | Unable to allocate 18.8 GiB for an array with shape (101, 20000, 10000) and data type bool\n",
112
+ " 6250000000 | MemoryError | 1000 | (2500, 2500) | Unable to allocate 11.7 GiB for an array with shape (1001, 5000, 2500) and data type bool\n",
113
+ " 1000000000 | MemoryError | 10 | (10000, 10000) | Unable to allocate 9.31 GiB for an array with shape (10, 10, 100000000) and data type bool\n",
114
+ " 1000000000 | MemoryError | 1000 | (1000, 1000) | Unable to allocate 931. GiB for an array with shape (1000, 1000, 1000000) and data type bool\n",
115
+ " 625000000 | MemoryError | 100 | (2500, 2500) | Unable to allocate 58.2 GiB for an array with shape (100, 100, 6250000) and data type bool\n",
116
+ " 100000000 | MemoryError | 100 | (1000, 1000) | Unable to allocate 9.31 GiB for an array with shape (100, 100, 1000000) and data type bool\n",
117
+ " 10000000 | MemoryError | 1000 | (100, 100) | Unable to allocate 9.31 GiB for an array with shape (1000, 1000, 10000) and data type bool\n"
118
+ ]
119
+ },
120
+ {
121
+ "name": "stderr",
122
+ "output_type": "stream",
123
+ "text": [
124
+ "\n"
125
+ ]
126
+ }
127
+ ],
128
+ "source": [
129
+ "_ = b.run(\n",
130
+ " benchmark=semseg_add_data,\n",
131
+ " n_labels=n_labels,\n",
132
+ " shape=shapes,\n",
133
+ ")"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 5,
139
+ "metadata": {},
140
+ "outputs": [
141
+ {
142
+ "name": "stderr",
143
+ "output_type": "stream",
144
+ "text": [
145
+ " 20%|██ | 4/20 [02:35<10:22, 38.92s/it]"
146
+ ]
147
+ },
148
+ {
149
+ "name": "stdout",
150
+ "output_type": "stream",
151
+ "text": [
152
+ "=====================================================================\n",
153
+ "Details\n",
154
+ "{\n",
155
+ " \"benchmark\": \"benchmark_finalize\",\n",
156
+ " \"limits\": {\n",
157
+ " \"memory_limit\": \"8.0 GB\",\n",
158
+ " \"time_limit\": \"5.0 seconds\",\n",
159
+ " \"repeat\": 1\n",
160
+ " },\n",
161
+ " \"passed\": 18,\n",
162
+ " \"failed\": 2,\n",
163
+ " \"total\": 20\n",
164
+ "}\n",
165
+ "\n",
166
+ "Passed\n",
167
+ " complexity | runtime | n_datums | n_labels \n",
168
+ "---------------------------------------------------------------------\n",
169
+ " 1000000 | 1.1142 | 10000 | 100 \n",
170
+ " 100000 | 0.1748 | 100 | 1000 \n",
171
+ " 100000 | 0.1086 | 1000 | 100 \n",
172
+ "\n",
173
+ "Failed\n",
174
+ " complexity | error | n_datums | n_labels | msg \n",
175
+ "---------------------------------------------------------------------------------------\n",
176
+ " 10000000 | MemoryError | 10000 | 1000 | Unable to allocate 7.63 MiB for an array with shape (1000, 1000) and data type int64\n",
177
+ " 1000000 | MemoryError | 1000 | 1000 | \n"
178
+ ]
179
+ },
180
+ {
181
+ "name": "stderr",
182
+ "output_type": "stream",
183
+ "text": [
184
+ "\n"
185
+ ]
186
+ }
187
+ ],
188
+ "source": [
189
+ "_ = b.run(\n",
190
+ " benchmark=semseg_finalize,\n",
191
+ " n_datums=n_datums,\n",
192
+ " n_labels=n_labels,\n",
193
+ ")"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 6,
199
+ "metadata": {},
200
+ "outputs": [
201
+ {
202
+ "name": "stderr",
203
+ "output_type": "stream",
204
+ "text": [
205
+ " 20%|██ | 4/20 [02:25<09:40, 36.28s/it]"
206
+ ]
207
+ },
208
+ {
209
+ "name": "stdout",
210
+ "output_type": "stream",
211
+ "text": [
212
+ "=====================================================================\n",
213
+ "Details\n",
214
+ "{\n",
215
+ " \"benchmark\": \"benchmark_evaluate\",\n",
216
+ " \"limits\": {\n",
217
+ " \"memory_limit\": \"8.0 GB\",\n",
218
+ " \"time_limit\": \"5.0 seconds\",\n",
219
+ " \"repeat\": 1\n",
220
+ " },\n",
221
+ " \"passed\": 18,\n",
222
+ " \"failed\": 2,\n",
223
+ " \"total\": 20\n",
224
+ "}\n",
225
+ "\n",
226
+ "Passed\n",
227
+ " complexity | runtime | n_datums | n_labels \n",
228
+ "---------------------------------------------------------------------\n",
229
+ " 1000000 | 0.0537 | 10000 | 100 \n",
230
+ " 100000 | 0.0815 | 100 | 1000 \n",
231
+ " 100000 | 0.0137 | 1000 | 100 \n",
232
+ "\n",
233
+ "Failed\n",
234
+ " complexity | error | n_datums | n_labels | msg \n",
235
+ "---------------------------------------------------------------------------------------\n",
236
+ " 10000000 | MemoryError | 10000 | 1000 | Unable to allocate 23.8 MiB for an array with shape (1000, 1000, 25) and data type bool\n",
237
+ " 1000000 | MemoryError | 1000 | 1000 | Unable to allocate 3.73 GiB for an array with shape (1000, 1001, 1001) and data type int32\n"
238
+ ]
239
+ },
240
+ {
241
+ "name": "stderr",
242
+ "output_type": "stream",
243
+ "text": [
244
+ "\n"
245
+ ]
246
+ }
247
+ ],
248
+ "source": [
249
+ "_ = b.run(\n",
250
+ " benchmark=semseg_evaluate,\n",
251
+ " n_datums=n_datums,\n",
252
+ " n_labels=n_labels,\n",
253
+ ")"
254
+ ]
255
+ }
256
+ ],
257
+ "metadata": {
258
+ "kernelspec": {
259
+ "display_name": ".env-valor",
260
+ "language": "python",
261
+ "name": "python3"
262
+ },
263
+ "language_info": {
264
+ "codemirror_mode": {
265
+ "name": "ipython",
266
+ "version": 3
267
+ },
268
+ "file_extension": ".py",
269
+ "mimetype": "text/x-python",
270
+ "name": "python",
271
+ "nbconvert_exporter": "python",
272
+ "pygments_lexer": "ipython3",
273
+ "version": "3.10.15"
274
+ },
275
+ "orig_nbformat": 4
276
+ },
277
+ "nbformat": 4,
278
+ "nbformat_minor": 2
279
+ }
@@ -959,7 +959,7 @@
959
959
  "id": "98edc4dd",
960
960
  "metadata": {},
961
961
  "source": [
962
- "### Hallucinations"
962
+ "### Unmatched Predictions"
963
963
  ]
964
964
  },
965
965
  {
@@ -1764,7 +1764,7 @@
1764
1764
  }
1765
1765
  ],
1766
1766
  "source": [
1767
- "metric.value[\"hallucinations\"]"
1767
+ "metric.value[\"unmatched_predictions\"]"
1768
1768
  ]
1769
1769
  },
1770
1770
  {
@@ -1773,7 +1773,7 @@
1773
1773
  "id": "415335e4",
1774
1774
  "metadata": {},
1775
1775
  "source": [
1776
- "### Ground Truths Missing Predictions"
1776
+ "### Unmatched Ground Truths"
1777
1777
  ]
1778
1778
  },
1779
1779
  {
@@ -2995,7 +2995,7 @@
2995
2995
  }
2996
2996
  ],
2997
2997
  "source": [
2998
- "metric.value[\"missing_predictions\"]"
2998
+ "metric.value[\"unmatched_ground_truths\"]"
2999
2999
  ]
3000
3000
  }
3001
3001
  ],
@@ -605,7 +605,7 @@
605
605
  }
606
606
  ],
607
607
  "source": [
608
- "cm.value[\"missing_predictions\"]"
608
+ "cm.value[\"unmatched_ground_truths\"]"
609
609
  ]
610
610
  }
611
611
  ],
@@ -40,7 +40,7 @@ def test_compute_confusion_matrix():
40
40
 
41
41
  score_thresholds = np.array([0.25, 0.75], dtype=np.float64)
42
42
 
43
- confusion_matrix, missing_predictions = compute_confusion_matrix(
43
+ confusion_matrix, unmatched_ground_truths = compute_confusion_matrix(
44
44
  data=data,
45
45
  label_metadata=label_metadata,
46
46
  score_thresholds=score_thresholds,
@@ -74,15 +74,15 @@ def test_compute_confusion_matrix():
74
74
  )
75
75
  ).all()
76
76
 
77
- assert missing_predictions.shape == (2, 4, 1)
77
+ assert unmatched_ground_truths.shape == (2, 4, 1)
78
78
  assert (
79
79
  # score >= 0.25
80
- missing_predictions[0, :, 0]
80
+ unmatched_ground_truths[0, :, 0]
81
81
  == np.array([-1.0, -1.0, -1.0, -1.0])
82
82
  ).all()
83
83
  assert (
84
84
  # score >= 0.75
85
- missing_predictions[1, :, 0]
85
+ unmatched_ground_truths[1, :, 0]
86
86
  == np.array([-1.0, -1.0, -1.0, 1.0])
87
87
  ).all()
88
88
 
@@ -144,7 +144,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
144
144
  }
145
145
  },
146
146
  },
147
- "missing_predictions": {},
147
+ "unmatched_ground_truths": {},
148
148
  },
149
149
  "parameters": {
150
150
  "score_threshold": 0.25,
@@ -166,7 +166,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
166
166
  },
167
167
  }
168
168
  },
169
- "missing_predictions": {
169
+ "unmatched_ground_truths": {
170
170
  "3": {"count": 1, "examples": [{"datum": "uid2"}]}
171
171
  },
172
172
  },
@@ -179,7 +179,7 @@ def test_confusion_matrix_basic(basic_classifications: list[Classification]):
179
179
  for m in actual_metrics:
180
180
  _filter_elements_with_zero_count(
181
181
  cm=m["value"]["confusion_matrix"],
182
- mp=m["value"]["missing_predictions"],
182
+ mp=m["value"]["unmatched_ground_truths"],
183
183
  )
184
184
  assert m in expected_metrics
185
185
  for m in expected_metrics:
@@ -212,7 +212,7 @@ def test_confusion_matrix_unit(
212
212
  "1": {"1": {"count": 1, "examples": []}},
213
213
  "2": {"1": {"count": 2, "examples": []}},
214
214
  },
215
- "missing_predictions": {},
215
+ "unmatched_ground_truths": {},
216
216
  },
217
217
  "parameters": {
218
218
  "score_threshold": 0.5,
@@ -223,7 +223,7 @@ def test_confusion_matrix_unit(
223
223
  for m in actual_metrics:
224
224
  _filter_elements_with_zero_count(
225
225
  cm=m["value"]["confusion_matrix"],
226
- mp=m["value"]["missing_predictions"],
226
+ mp=m["value"]["unmatched_ground_truths"],
227
227
  )
228
228
  assert m in expected_metrics
229
229
  for m in expected_metrics:
@@ -282,7 +282,7 @@ def test_confusion_matrix_with_animal_example(
282
282
  }
283
283
  },
284
284
  },
285
- "missing_predictions": {
285
+ "unmatched_ground_truths": {
286
286
  "dog": {"count": 1, "examples": [{"datum": "uid5"}]}
287
287
  },
288
288
  },
@@ -295,7 +295,7 @@ def test_confusion_matrix_with_animal_example(
295
295
  for m in actual_metrics:
296
296
  _filter_elements_with_zero_count(
297
297
  cm=m["value"]["confusion_matrix"],
298
- mp=m["value"]["missing_predictions"],
298
+ mp=m["value"]["unmatched_ground_truths"],
299
299
  )
300
300
  assert m in expected_metrics
301
301
  for m in expected_metrics:
@@ -356,7 +356,7 @@ def test_confusion_matrix_with_color_example(
356
356
  }
357
357
  },
358
358
  },
359
- "missing_predictions": {
359
+ "unmatched_ground_truths": {
360
360
  "red": {"count": 1, "examples": [{"datum": "uid2"}]}
361
361
  },
362
362
  },
@@ -369,7 +369,7 @@ def test_confusion_matrix_with_color_example(
369
369
  for m in actual_metrics:
370
370
  _filter_elements_with_zero_count(
371
371
  cm=m["value"]["confusion_matrix"],
372
- mp=m["value"]["missing_predictions"],
372
+ mp=m["value"]["unmatched_ground_truths"],
373
373
  )
374
374
  assert m in expected_metrics
375
375
  for m in expected_metrics:
@@ -438,7 +438,7 @@ def test_confusion_matrix_multiclass(
438
438
  }
439
439
  },
440
440
  },
441
- "missing_predictions": {},
441
+ "unmatched_ground_truths": {},
442
442
  },
443
443
  "parameters": {
444
444
  "score_threshold": 0.05,
@@ -466,7 +466,7 @@ def test_confusion_matrix_multiclass(
466
466
  }
467
467
  },
468
468
  },
469
- "missing_predictions": {
469
+ "unmatched_ground_truths": {
470
470
  "cat": {
471
471
  "count": 2,
472
472
  "examples": [{"datum": "uid0"}, {"datum": "uid2"}],
@@ -483,7 +483,7 @@ def test_confusion_matrix_multiclass(
483
483
  "type": "ConfusionMatrix",
484
484
  "value": {
485
485
  "confusion_matrix": {},
486
- "missing_predictions": {
486
+ "unmatched_ground_truths": {
487
487
  "cat": {
488
488
  "count": 2,
489
489
  "examples": [{"datum": "uid0"}, {"datum": "uid2"}],
@@ -504,7 +504,7 @@ def test_confusion_matrix_multiclass(
504
504
  for m in actual_metrics:
505
505
  _filter_elements_with_zero_count(
506
506
  cm=m["value"]["confusion_matrix"],
507
- mp=m["value"]["missing_predictions"],
507
+ mp=m["value"]["unmatched_ground_truths"],
508
508
  )
509
509
  assert m in expected_metrics
510
510
  for m in expected_metrics:
@@ -560,7 +560,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
560
560
  },
561
561
  }
562
562
  },
563
- "missing_predictions": {},
563
+ "unmatched_ground_truths": {},
564
564
  },
565
565
  "parameters": {
566
566
  "score_threshold": 0.05,
@@ -580,7 +580,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
580
580
  }
581
581
  }
582
582
  },
583
- "missing_predictions": {},
583
+ "unmatched_ground_truths": {},
584
584
  },
585
585
  "parameters": {
586
586
  "score_threshold": 0.4,
@@ -591,7 +591,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
591
591
  "type": "ConfusionMatrix",
592
592
  "value": {
593
593
  "confusion_matrix": {},
594
- "missing_predictions": {
594
+ "unmatched_ground_truths": {
595
595
  "ant": {
596
596
  "count": 1,
597
597
  "examples": [
@@ -611,7 +611,7 @@ def test_confusion_matrix_without_hardmax_animal_example(
611
611
  for m in actual_metrics:
612
612
  _filter_elements_with_zero_count(
613
613
  cm=m["value"]["confusion_matrix"],
614
- mp=m["value"]["missing_predictions"],
614
+ mp=m["value"]["unmatched_ground_truths"],
615
615
  )
616
616
  assert m in expected_metrics
617
617
  for m in expected_metrics:
@@ -8,7 +8,7 @@ def test_no_data():
8
8
  loader.finalize()
9
9
 
10
10
 
11
- def test_missing_predictions(
11
+ def test_unmatched_ground_truths(
12
12
  classifications_no_predictions: list[Classification],
13
13
  ):
14
14
  loader = DataLoader()
@@ -704,7 +704,7 @@ def false_negatives_two_images_one_only_with_different_class_high_confidence_of_
704
704
 
705
705
 
706
706
  @pytest.fixture
707
- def detections_fp_hallucination_edge_case() -> list[Detection]:
707
+ def detections_fp_unmatched_prediction_edge_case() -> list[Detection]:
708
708
  return [
709
709
  Detection(
710
710
  uid="uid1",
@@ -1093,7 +1093,7 @@ def detections_for_detailed_counting(
1093
1093
  xmax=rect4[1],
1094
1094
  ymin=rect4[2],
1095
1095
  ymax=rect4[3],
1096
- labels=["hallucination"],
1096
+ labels=["no_overlap"],
1097
1097
  scores=[0.1],
1098
1098
  ),
1099
1099
  ],
@@ -95,9 +95,9 @@ def test_accuracy_metrics_first_class(
95
95
  groundtruths
96
96
  datum uid1
97
97
  box 1 - label v1 - tp
98
- box 3 - label v2 - fn missing prediction
98
+ box 3 - label v2 - fn unmatched ground truths
99
99
  datum uid2
100
- box 2 - label v1 - fn missing prediction
100
+ box 2 - label v1 - fn unmatched ground truths
101
101
 
102
102
  predictions
103
103
  datum uid1
@@ -176,7 +176,7 @@ def test_accuracy_metrics_second_class(
176
176
 
177
177
  groundtruths
178
178
  datum uid1
179
- box 3 - label v2 - fn missing prediction
179
+ box 3 - label v2 - fn unmatched ground truths
180
180
  datum uid2
181
181
  none
182
182
  predictions
@@ -72,7 +72,7 @@ def test_ap_metrics_first_class(
72
72
  datum uid1
73
73
  box 1 - label v1 - tp
74
74
  datum uid2
75
- box 2 - label v1 - fn missing prediction
75
+ box 2 - label v1 - fn unmatched ground truths
76
76
 
77
77
  predictions
78
78
  datum uid1
@@ -192,7 +192,7 @@ def test_ap_metrics_second_class(
192
192
 
193
193
  groundtruths
194
194
  datum uid1
195
- box 3 - label v2 - fn missing prediction
195
+ box 3 - label v2 - fn unmatched ground truths
196
196
  datum uid2
197
197
  none
198
198
  predictions