ultralytics-opencv-headless 8.4.7__py3-none-any.whl → 8.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_cli.py +10 -3
- tests/test_cuda.py +1 -1
- tests/test_exports.py +64 -43
- tests/test_python.py +16 -12
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +1 -0
- ultralytics/cfg/default.yaml +1 -0
- ultralytics/data/augment.py +2 -2
- ultralytics/data/converter.py +11 -0
- ultralytics/engine/exporter.py +13 -16
- ultralytics/engine/predictor.py +5 -0
- ultralytics/engine/trainer.py +3 -3
- ultralytics/engine/tuner.py +2 -2
- ultralytics/engine/validator.py +5 -0
- ultralytics/models/sam/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +14 -1
- ultralytics/models/yolo/detect/train.py +4 -2
- ultralytics/models/yolo/pose/train.py +2 -1
- ultralytics/models/yolo/world/train_world.py +21 -1
- ultralytics/models/yolo/yoloe/train.py +1 -2
- ultralytics/nn/autobackend.py +22 -6
- ultralytics/nn/modules/head.py +13 -2
- ultralytics/nn/tasks.py +18 -0
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/utils/benchmarks.py +3 -9
- ultralytics/utils/checks.py +18 -3
- ultralytics/utils/dist.py +9 -3
- ultralytics/utils/loss.py +4 -5
- ultralytics/utils/tal.py +15 -5
- ultralytics/utils/torch_utils.py +2 -1
- {ultralytics_opencv_headless-8.4.7.dist-info → ultralytics_opencv_headless-8.4.9.dist-info}/METADATA +3 -3
- {ultralytics_opencv_headless-8.4.7.dist-info → ultralytics_opencv_headless-8.4.9.dist-info}/RECORD +36 -36
- {ultralytics_opencv_headless-8.4.7.dist-info → ultralytics_opencv_headless-8.4.9.dist-info}/WHEEL +1 -1
- {ultralytics_opencv_headless-8.4.7.dist-info → ultralytics_opencv_headless-8.4.9.dist-info}/entry_points.txt +0 -0
- {ultralytics_opencv_headless-8.4.7.dist-info → ultralytics_opencv_headless-8.4.9.dist-info}/licenses/LICENSE +0 -0
- {ultralytics_opencv_headless-8.4.7.dist-info → ultralytics_opencv_headless-8.4.9.dist-info}/top_level.txt +0 -0
tests/test_cli.py
CHANGED
|
@@ -34,19 +34,26 @@ def test_train(task: str, model: str, data: str) -> None:
|
|
|
34
34
|
@pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
|
|
35
35
|
def test_val(task: str, model: str, data: str) -> None:
|
|
36
36
|
"""Test YOLO validation process for specified task, model, and data using a shell command."""
|
|
37
|
-
|
|
37
|
+
for end2end in {False, True}:
|
|
38
|
+
run(
|
|
39
|
+
f"yolo val {task} model={model} data={data} imgsz=32 save_txt save_json visualize end2end={end2end} max_det=100 agnostic_nms"
|
|
40
|
+
)
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
@pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
|
|
41
44
|
def test_predict(task: str, model: str, data: str) -> None:
|
|
42
45
|
"""Test YOLO prediction on provided sample assets for specified task and model."""
|
|
43
|
-
|
|
46
|
+
for end2end in {False, True}:
|
|
47
|
+
run(
|
|
48
|
+
f"yolo {task} predict model={model} source={ASSETS} imgsz=32 save save_crop save_txt visualize end2end={end2end} max_det=100"
|
|
49
|
+
)
|
|
44
50
|
|
|
45
51
|
|
|
46
52
|
@pytest.mark.parametrize("model", MODELS)
|
|
47
53
|
def test_export(model: str) -> None:
|
|
48
54
|
"""Test exporting a YOLO model to TorchScript format."""
|
|
49
|
-
|
|
55
|
+
for end2end in {False, True}:
|
|
56
|
+
run(f"yolo export model={model} format=torchscript imgsz=32 end2end={end2end} max_det=100")
|
|
50
57
|
|
|
51
58
|
|
|
52
59
|
@pytest.mark.skipif(not TORCH_1_11, reason="RTDETR requires torch>=1.11")
|
tests/test_cuda.py
CHANGED
|
@@ -120,7 +120,7 @@ def test_train():
|
|
|
120
120
|
device = tuple(DEVICES) if len(DEVICES) > 1 else DEVICES[0]
|
|
121
121
|
# NVIDIA Jetson only has one GPU and therefore skipping checks
|
|
122
122
|
if not IS_JETSON:
|
|
123
|
-
results = YOLO(MODEL).train(data="coco8.yaml", imgsz=64, epochs=1, device=device, batch=15)
|
|
123
|
+
results = YOLO(MODEL).train(data="coco8.yaml", imgsz=64, epochs=1, device=device, batch=15, compile=True)
|
|
124
124
|
results = YOLO(MODEL).train(data="coco128.yaml", imgsz=64, epochs=1, device=device, batch=15, val=False)
|
|
125
125
|
visible = eval(os.environ["CUDA_VISIBLE_DEVICES"])
|
|
126
126
|
assert visible == device, f"Passed GPUs '{device}', but used GPUs '{visible}'"
|
tests/test_exports.py
CHANGED
|
@@ -16,38 +16,42 @@ from ultralytics.utils import ARM64, IS_RASPBERRYPI, LINUX, MACOS, MACOS_VERSION
|
|
|
16
16
|
from ultralytics.utils.torch_utils import TORCH_1_10, TORCH_1_11, TORCH_1_13, TORCH_2_0, TORCH_2_1, TORCH_2_8, TORCH_2_9
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
@pytest.mark.parametrize("end2end", [False, True])
|
|
20
|
+
def test_export_torchscript(end2end):
|
|
20
21
|
"""Test YOLO model export to TorchScript format for compatibility and correctness."""
|
|
21
|
-
file = YOLO(MODEL).export(format="torchscript", optimize=False, imgsz=32)
|
|
22
|
+
file = YOLO(MODEL).export(format="torchscript", optimize=False, imgsz=32, end2end=end2end)
|
|
22
23
|
YOLO(file)(SOURCE, imgsz=32) # exported model inference
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
@pytest.mark.parametrize("end2end", [False, True])
|
|
27
|
+
def test_export_onnx(end2end):
|
|
26
28
|
"""Test YOLO model export to ONNX format with dynamic axes."""
|
|
27
|
-
file = YOLO(MODEL).export(format="onnx", dynamic=True, imgsz=32)
|
|
29
|
+
file = YOLO(MODEL).export(format="onnx", dynamic=True, imgsz=32, end2end=end2end)
|
|
28
30
|
YOLO(file)(SOURCE, imgsz=32) # exported model inference
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
@pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
|
|
32
|
-
|
|
34
|
+
@pytest.mark.parametrize("end2end", [False, True])
|
|
35
|
+
def test_export_openvino(end2end):
|
|
33
36
|
"""Test YOLO export to OpenVINO format for model inference compatibility."""
|
|
34
|
-
file = YOLO(MODEL).export(format="openvino", imgsz=32)
|
|
37
|
+
file = YOLO(MODEL).export(format="openvino", imgsz=32, end2end=end2end)
|
|
35
38
|
YOLO(file)(SOURCE, imgsz=32) # exported model inference
|
|
36
39
|
|
|
37
40
|
|
|
38
41
|
@pytest.mark.slow
|
|
39
42
|
@pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
|
|
40
43
|
@pytest.mark.parametrize(
|
|
41
|
-
"task, dynamic, int8, half, batch, nms",
|
|
44
|
+
"task, dynamic, int8, half, batch, nms, end2end",
|
|
42
45
|
[ # generate all combinations except for exclusion cases
|
|
43
|
-
(task, dynamic, int8, half, batch, nms)
|
|
44
|
-
for task, dynamic, int8, half, batch, nms in product(
|
|
45
|
-
TASKS, [True, False], [True, False], [True, False], [1, 2], [True, False]
|
|
46
|
+
(task, dynamic, int8, half, batch, nms, end2end)
|
|
47
|
+
for task, dynamic, int8, half, batch, nms, end2end in product(
|
|
48
|
+
TASKS, [True, False], [True, False], [True, False], [1, 2], [True, False], [True]
|
|
46
49
|
)
|
|
47
|
-
if not ((int8 and half) or (task == "classify" and nms))
|
|
50
|
+
if not ((int8 and half) or (task == "classify" and nms) or (end2end and nms))
|
|
48
51
|
],
|
|
49
52
|
)
|
|
50
|
-
|
|
53
|
+
# disable end2end=False test for now due to github runner OOM during openvino tests
|
|
54
|
+
def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms, end2end):
|
|
51
55
|
"""Test YOLO model export to OpenVINO under various configuration matrix conditions."""
|
|
52
56
|
file = YOLO(TASK2MODEL[task]).export(
|
|
53
57
|
format="openvino",
|
|
@@ -58,6 +62,7 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
|
|
|
58
62
|
batch=batch,
|
|
59
63
|
data=TASK2DATA[task],
|
|
60
64
|
nms=nms,
|
|
65
|
+
end2end=end2end,
|
|
61
66
|
)
|
|
62
67
|
if WINDOWS:
|
|
63
68
|
# Use unique filenames due to Windows file permissions bug possibly due to latent threaded use
|
|
@@ -70,19 +75,27 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
|
|
|
70
75
|
|
|
71
76
|
@pytest.mark.slow
|
|
72
77
|
@pytest.mark.parametrize(
|
|
73
|
-
"task, dynamic, int8, half, batch, simplify, nms",
|
|
78
|
+
"task, dynamic, int8, half, batch, simplify, nms, end2end",
|
|
74
79
|
[ # generate all combinations except for exclusion cases
|
|
75
|
-
(task, dynamic, int8, half, batch, simplify, nms)
|
|
76
|
-
for task, dynamic, int8, half, batch, simplify, nms in product(
|
|
77
|
-
TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
|
|
80
|
+
(task, dynamic, int8, half, batch, simplify, nms, end2end)
|
|
81
|
+
for task, dynamic, int8, half, batch, simplify, nms, end2end in product(
|
|
82
|
+
TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False], [True, False]
|
|
78
83
|
)
|
|
79
|
-
if not ((int8 and half) or (task == "classify" and nms) or (nms and not TORCH_1_13))
|
|
84
|
+
if not ((int8 and half) or (task == "classify" and nms) or (nms and not TORCH_1_13) or (end2end and nms))
|
|
80
85
|
],
|
|
81
86
|
)
|
|
82
|
-
def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
|
|
87
|
+
def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms, end2end):
|
|
83
88
|
"""Test YOLO export to ONNX format with various configurations and parameters."""
|
|
84
89
|
file = YOLO(TASK2MODEL[task]).export(
|
|
85
|
-
format="onnx",
|
|
90
|
+
format="onnx",
|
|
91
|
+
imgsz=32,
|
|
92
|
+
dynamic=dynamic,
|
|
93
|
+
int8=int8,
|
|
94
|
+
half=half,
|
|
95
|
+
batch=batch,
|
|
96
|
+
simplify=simplify,
|
|
97
|
+
nms=nms,
|
|
98
|
+
end2end=end2end,
|
|
86
99
|
)
|
|
87
100
|
YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference
|
|
88
101
|
Path(file).unlink() # cleanup
|
|
@@ -90,19 +103,19 @@ def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
|
|
|
90
103
|
|
|
91
104
|
@pytest.mark.slow
|
|
92
105
|
@pytest.mark.parametrize(
|
|
93
|
-
"task, dynamic, int8, half, batch, nms",
|
|
106
|
+
"task, dynamic, int8, half, batch, nms, end2end",
|
|
94
107
|
[ # generate all combinations except for exclusion cases
|
|
95
|
-
(task, dynamic, int8, half, batch, nms)
|
|
96
|
-
for task, dynamic, int8, half, batch, nms in product(
|
|
97
|
-
TASKS, [False, True], [False], [False, True], [1, 2], [True, False]
|
|
108
|
+
(task, dynamic, int8, half, batch, nms, end2end)
|
|
109
|
+
for task, dynamic, int8, half, batch, nms, end2end in product(
|
|
110
|
+
TASKS, [False, True], [False], [False, True], [1, 2], [True, False], [True, False]
|
|
98
111
|
)
|
|
99
|
-
if not (task == "classify" and nms)
|
|
112
|
+
if not ((task == "classify" and nms) or (end2end and nms))
|
|
100
113
|
],
|
|
101
114
|
)
|
|
102
|
-
def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
|
|
115
|
+
def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms, end2end):
|
|
103
116
|
"""Test YOLO model export to TorchScript format under varied configurations."""
|
|
104
117
|
file = YOLO(TASK2MODEL[task]).export(
|
|
105
|
-
format="torchscript", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms
|
|
118
|
+
format="torchscript", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms, end2end=end2end
|
|
106
119
|
)
|
|
107
120
|
YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference
|
|
108
121
|
Path(file).unlink() # cleanup
|
|
@@ -116,19 +129,20 @@ def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
|
|
|
116
129
|
MACOS and MACOS_VERSION and MACOS_VERSION >= "15", reason="CoreML YOLO26 matrix test crashes on macOS 15+"
|
|
117
130
|
)
|
|
118
131
|
@pytest.mark.parametrize(
|
|
119
|
-
"task, dynamic, int8, half, nms, batch",
|
|
132
|
+
"task, dynamic, int8, half, nms, batch, end2end",
|
|
120
133
|
[ # generate all combinations except for exclusion cases
|
|
121
|
-
(task, dynamic, int8, half, nms, batch)
|
|
122
|
-
for task, dynamic, int8, half, nms, batch in product(
|
|
123
|
-
TASKS, [True, False], [True, False], [True, False], [True, False], [1]
|
|
134
|
+
(task, dynamic, int8, half, nms, batch, end2end)
|
|
135
|
+
for task, dynamic, int8, half, nms, batch, end2end in product(
|
|
136
|
+
TASKS, [True, False], [True, False], [True, False], [True, False], [1], [True, False]
|
|
124
137
|
)
|
|
125
138
|
if not (int8 and half)
|
|
126
139
|
and not (task != "detect" and nms)
|
|
127
140
|
and not (dynamic and nms)
|
|
128
141
|
and not (task == "classify" and dynamic)
|
|
142
|
+
and not (end2end and nms)
|
|
129
143
|
],
|
|
130
144
|
)
|
|
131
|
-
def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
|
|
145
|
+
def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch, end2end):
|
|
132
146
|
"""Test YOLO export to CoreML format with various parameter configurations."""
|
|
133
147
|
file = YOLO(TASK2MODEL[task]).export(
|
|
134
148
|
format="coreml",
|
|
@@ -138,6 +152,7 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
|
|
|
138
152
|
half=half,
|
|
139
153
|
batch=batch,
|
|
140
154
|
nms=nms,
|
|
155
|
+
end2end=end2end,
|
|
141
156
|
)
|
|
142
157
|
YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference
|
|
143
158
|
shutil.rmtree(file) # cleanup
|
|
@@ -152,19 +167,25 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
|
|
|
152
167
|
reason="Test disabled as TF suffers from install conflicts on Windows, macOS and Raspberry Pi",
|
|
153
168
|
)
|
|
154
169
|
@pytest.mark.parametrize(
|
|
155
|
-
"task, dynamic, int8, half, batch, nms",
|
|
170
|
+
"task, dynamic, int8, half, batch, nms, end2end",
|
|
156
171
|
[ # generate all combinations except for exclusion cases
|
|
157
|
-
(task, dynamic, int8, half, batch, nms)
|
|
158
|
-
for task, dynamic, int8, half, batch, nms in product(
|
|
159
|
-
TASKS, [False], [True, False], [True, False], [1], [True, False]
|
|
172
|
+
(task, dynamic, int8, half, batch, nms, end2end)
|
|
173
|
+
for task, dynamic, int8, half, batch, nms, end2end in product(
|
|
174
|
+
TASKS, [False], [True, False], [True, False], [1], [True, False], [True, False]
|
|
175
|
+
)
|
|
176
|
+
if not (
|
|
177
|
+
(int8 and half)
|
|
178
|
+
or (task == "classify" and nms)
|
|
179
|
+
or (ARM64 and nms)
|
|
180
|
+
or (nms and not TORCH_1_13)
|
|
181
|
+
or (end2end and nms)
|
|
160
182
|
)
|
|
161
|
-
if not ((int8 and half) or (task == "classify" and nms) or (ARM64 and nms) or (nms and not TORCH_1_13))
|
|
162
183
|
],
|
|
163
184
|
)
|
|
164
|
-
def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms):
|
|
185
|
+
def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms, end2end):
|
|
165
186
|
"""Test YOLO export to TFLite format considering various export configurations."""
|
|
166
187
|
file = YOLO(TASK2MODEL[task]).export(
|
|
167
|
-
format="tflite", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms
|
|
188
|
+
format="tflite", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms, end2end=end2end
|
|
168
189
|
)
|
|
169
190
|
YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference
|
|
170
191
|
Path(file).unlink() # cleanup
|
|
@@ -225,16 +246,16 @@ def test_export_mnn():
|
|
|
225
246
|
@pytest.mark.slow
|
|
226
247
|
@pytest.mark.skipif(not TORCH_1_10, reason="MNN export requires torch>=1.10")
|
|
227
248
|
@pytest.mark.parametrize(
|
|
228
|
-
"task, int8, half, batch",
|
|
249
|
+
"task, int8, half, batch, end2end",
|
|
229
250
|
[ # generate all combinations except for exclusion cases
|
|
230
|
-
(task, int8, half, batch)
|
|
231
|
-
for task, int8, half, batch in product(TASKS, [True, False], [True, False], [1, 2])
|
|
251
|
+
(task, int8, half, batch, end2end)
|
|
252
|
+
for task, int8, half, batch, end2end in product(TASKS, [True, False], [True, False], [1, 2], [True, False])
|
|
232
253
|
if not (int8 and half)
|
|
233
254
|
],
|
|
234
255
|
)
|
|
235
|
-
def test_export_mnn_matrix(task, int8, half, batch):
|
|
256
|
+
def test_export_mnn_matrix(task, int8, half, batch, end2end):
|
|
236
257
|
"""Test YOLO export to MNN format considering various export configurations."""
|
|
237
|
-
file = YOLO(TASK2MODEL[task]).export(format="mnn", imgsz=32, int8=int8, half=half, batch=batch)
|
|
258
|
+
file = YOLO(TASK2MODEL[task]).export(format="mnn", imgsz=32, int8=int8, half=half, batch=batch, end2end=end2end)
|
|
238
259
|
YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference
|
|
239
260
|
Path(file).unlink() # cleanup
|
|
240
261
|
|
tests/test_python.py
CHANGED
|
@@ -168,13 +168,13 @@ def test_predict_all_image_formats():
|
|
|
168
168
|
dataset_path = Path(data["path"])
|
|
169
169
|
|
|
170
170
|
# Collect all images from train and val
|
|
171
|
-
|
|
172
|
-
images
|
|
171
|
+
expected = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp"}
|
|
172
|
+
images = [im for im in (dataset_path / "images" / "train").glob("*.*") if im.suffix.lower().lstrip(".") in expected]
|
|
173
|
+
images += [im for im in (dataset_path / "images" / "val").glob("*.*") if im.suffix.lower().lstrip(".") in expected]
|
|
173
174
|
assert len(images) == 12, f"Expected 12 images, found {len(images)}"
|
|
174
175
|
|
|
175
176
|
# Verify all format extensions are represented
|
|
176
177
|
extensions = {img.suffix.lower().lstrip(".") for img in images}
|
|
177
|
-
expected = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp"}
|
|
178
178
|
assert extensions == expected, f"Missing formats: {expected - extensions}"
|
|
179
179
|
|
|
180
180
|
# Run inference on all images
|
|
@@ -697,7 +697,7 @@ def test_yolo_world():
|
|
|
697
697
|
checks.IS_PYTHON_3_8 and LINUX and ARM64,
|
|
698
698
|
reason="YOLOE with CLIP is not supported in Python 3.8 and aarch64 Linux",
|
|
699
699
|
)
|
|
700
|
-
def test_yoloe():
|
|
700
|
+
def test_yoloe(tmp_path):
|
|
701
701
|
"""Test YOLOE models with MobileClip support."""
|
|
702
702
|
# Predict
|
|
703
703
|
# text-prompts
|
|
@@ -739,14 +739,18 @@ def test_yoloe():
|
|
|
739
739
|
imgsz=32,
|
|
740
740
|
)
|
|
741
741
|
# Train, from scratch
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
742
|
+
data_dict = dict(train=dict(yolo_data=["coco128-seg.yaml"]), val=dict(yolo_data=["coco128-seg.yaml"]))
|
|
743
|
+
data_yaml = tmp_path / "yoloe-data.yaml"
|
|
744
|
+
YAML.save(data=data_dict, file=data_yaml)
|
|
745
|
+
for data in [data_dict, data_yaml]:
|
|
746
|
+
model = YOLOE("yoloe-11s-seg.yaml")
|
|
747
|
+
model.train(
|
|
748
|
+
data=data,
|
|
749
|
+
epochs=1,
|
|
750
|
+
close_mosaic=1,
|
|
751
|
+
trainer=YOLOESegTrainerFromScratch,
|
|
752
|
+
imgsz=32,
|
|
753
|
+
)
|
|
750
754
|
|
|
751
755
|
# prompt-free
|
|
752
756
|
# predict
|
ultralytics/__init__.py
CHANGED
ultralytics/cfg/__init__.py
CHANGED
ultralytics/cfg/default.yaml
CHANGED
|
@@ -56,6 +56,7 @@ max_det: 300 # (int) maximum number of detections per image
|
|
|
56
56
|
half: False # (bool) use half precision (FP16) if supported
|
|
57
57
|
dnn: False # (bool) use OpenCV DNN for ONNX inference
|
|
58
58
|
plots: True # (bool) save plots and images during train/val
|
|
59
|
+
end2end: # (bool, optional) whether to use end2end head(YOLO26, YOLOv10) for predict/val/export
|
|
59
60
|
|
|
60
61
|
# Predict settings -----------------------------------------------------------------------------------------------------
|
|
61
62
|
source: # (str, optional) path/dir/URL/stream for images or videos; e.g. 'ultralytics/assets' or '0' for webcam
|
ultralytics/data/augment.py
CHANGED
|
@@ -1745,7 +1745,7 @@ class CopyPaste(BaseMixTransform):
|
|
|
1745
1745
|
instances.convert_bbox(format="xyxy")
|
|
1746
1746
|
instances.denormalize(w, h)
|
|
1747
1747
|
|
|
1748
|
-
im_new = np.zeros(im.shape, np.uint8)
|
|
1748
|
+
im_new = np.zeros(im.shape[:2], np.uint8)
|
|
1749
1749
|
instances2 = labels2.pop("instances", None)
|
|
1750
1750
|
if instances2 is None:
|
|
1751
1751
|
instances2 = deepcopy(instances)
|
|
@@ -1758,7 +1758,7 @@ class CopyPaste(BaseMixTransform):
|
|
|
1758
1758
|
for j in indexes[: round(self.p * n)]:
|
|
1759
1759
|
cls = np.concatenate((cls, labels2.get("cls", cls)[[j]]), axis=0)
|
|
1760
1760
|
instances = Instances.concatenate((instances, instances2[[j]]), axis=0)
|
|
1761
|
-
cv2.drawContours(im_new, instances2.segments[[j]].astype(np.int32), -1,
|
|
1761
|
+
cv2.drawContours(im_new, instances2.segments[[j]].astype(np.int32), -1, 1, cv2.FILLED)
|
|
1762
1762
|
|
|
1763
1763
|
result = labels2.get("img", cv2.flip(im, 1)) # augment segments
|
|
1764
1764
|
if result.ndim == 2: # cv2.flip would eliminate the last dimension for grayscale images
|
ultralytics/data/converter.py
CHANGED
|
@@ -796,6 +796,17 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
|
|
|
796
796
|
# Check if this is a classification dataset
|
|
797
797
|
is_classification = dataset_record.get("task") == "classify"
|
|
798
798
|
class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
|
|
799
|
+
len(class_names)
|
|
800
|
+
|
|
801
|
+
# Validate required fields before downloading images
|
|
802
|
+
task = dataset_record.get("task", "detect")
|
|
803
|
+
if not is_classification:
|
|
804
|
+
if "train" not in splits:
|
|
805
|
+
raise ValueError(f"Dataset missing required 'train' split. Found splits: {sorted(splits)}")
|
|
806
|
+
if "val" not in splits and "test" not in splits:
|
|
807
|
+
raise ValueError(f"Dataset missing required 'val' split. Found splits: {sorted(splits)}")
|
|
808
|
+
if task == "pose" and "kpt_shape" not in dataset_record:
|
|
809
|
+
raise ValueError("Pose dataset missing required 'kpt_shape'. See https://docs.ultralytics.com/datasets/pose/")
|
|
799
810
|
|
|
800
811
|
# Create base directories
|
|
801
812
|
dataset_dir.mkdir(parents=True, exist_ok=True)
|
ultralytics/engine/exporter.py
CHANGED
|
@@ -87,7 +87,6 @@ from ultralytics.utils import (
|
|
|
87
87
|
IS_COLAB,
|
|
88
88
|
IS_DEBIAN_BOOKWORM,
|
|
89
89
|
IS_DEBIAN_TRIXIE,
|
|
90
|
-
IS_DOCKER,
|
|
91
90
|
IS_JETSON,
|
|
92
91
|
IS_RASPBERRYPI,
|
|
93
92
|
IS_UBUNTU,
|
|
@@ -108,6 +107,7 @@ from ultralytics.utils.checks import (
|
|
|
108
107
|
IS_PYTHON_3_10,
|
|
109
108
|
IS_PYTHON_MINIMUM_3_9,
|
|
110
109
|
check_apt_requirements,
|
|
110
|
+
check_executorch_requirements,
|
|
111
111
|
check_imgsz,
|
|
112
112
|
check_requirements,
|
|
113
113
|
check_version,
|
|
@@ -404,6 +404,13 @@ class Exporter:
|
|
|
404
404
|
if not hasattr(model, "names"):
|
|
405
405
|
model.names = default_class_names()
|
|
406
406
|
model.names = check_class_names(model.names)
|
|
407
|
+
if hasattr(model, "end2end"):
|
|
408
|
+
if self.args.end2end is not None:
|
|
409
|
+
model.end2end = self.args.end2end
|
|
410
|
+
if rknn or ncnn or executorch or paddle or imx:
|
|
411
|
+
# Disable end2end branch for certain export formats as they does not support topk
|
|
412
|
+
model.end2end = False
|
|
413
|
+
LOGGER.warning(f"{fmt.upper()} export does not support end2end models, disabling end2end branch.")
|
|
407
414
|
if self.args.half and self.args.int8:
|
|
408
415
|
LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
|
|
409
416
|
self.args.half = False
|
|
@@ -463,9 +470,6 @@ class Exporter:
|
|
|
463
470
|
)
|
|
464
471
|
if tfjs and (ARM64 and LINUX):
|
|
465
472
|
raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
|
|
466
|
-
if ncnn and hasattr(model.model[-1], "one2one_cv2"):
|
|
467
|
-
del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
|
|
468
|
-
LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
|
|
469
473
|
# Recommend OpenVINO if export and Intel CPU
|
|
470
474
|
if SETTINGS.get("openvino_msg"):
|
|
471
475
|
if is_intel():
|
|
@@ -509,6 +513,7 @@ class Exporter:
|
|
|
509
513
|
# Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
|
|
510
514
|
anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
|
|
511
515
|
m.max_det = min(self.args.max_det, anchors)
|
|
516
|
+
m.agnostic_nms = self.args.agnostic_nms
|
|
512
517
|
m.xyxy = self.args.nms and not coreml
|
|
513
518
|
m.shape = None # reset cached shape for new export input size
|
|
514
519
|
if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
|
|
@@ -549,6 +554,7 @@ class Exporter:
|
|
|
549
554
|
"names": model.names,
|
|
550
555
|
"args": {k: v for k, v in self.args if k in fmt_keys},
|
|
551
556
|
"channels": model.yaml.get("channels", 3),
|
|
557
|
+
"end2end": getattr(model, "end2end", False),
|
|
552
558
|
} # model metadata
|
|
553
559
|
if dla is not None:
|
|
554
560
|
self.metadata["dla"] = dla # make sure `AutoBackend` uses correct dla device if it has one
|
|
@@ -556,8 +562,6 @@ class Exporter:
|
|
|
556
562
|
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
|
|
557
563
|
if hasattr(model, "kpt_names"):
|
|
558
564
|
self.metadata["kpt_names"] = model.kpt_names
|
|
559
|
-
if getattr(model.model[-1], "end2end", False):
|
|
560
|
-
self.metadata["end2end"] = True
|
|
561
565
|
|
|
562
566
|
LOGGER.info(
|
|
563
567
|
f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
|
|
@@ -1045,7 +1049,7 @@ class Exporter:
|
|
|
1045
1049
|
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
|
|
1046
1050
|
"ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""), # required by 'onnx2tf' package
|
|
1047
1051
|
"onnx>=1.12.0,<2.0.0",
|
|
1048
|
-
"onnx2tf>=1.26.3",
|
|
1052
|
+
"onnx2tf>=1.26.3,<1.29.0", # pin to avoid h5py build issues on aarch64
|
|
1049
1053
|
"onnxslim>=0.1.71",
|
|
1050
1054
|
"onnxruntime-gpu" if cuda else "onnxruntime",
|
|
1051
1055
|
"protobuf>=5",
|
|
@@ -1193,16 +1197,9 @@ class Exporter:
|
|
|
1193
1197
|
following Ultralytics conventions.
|
|
1194
1198
|
"""
|
|
1195
1199
|
LOGGER.info(f"\n{prefix} starting export with ExecuTorch...")
|
|
1196
|
-
assert TORCH_2_9, f"ExecuTorch
|
|
1200
|
+
assert TORCH_2_9, f"ExecuTorch requires torch>=2.9.0 but torch=={TORCH_VERSION} is installed"
|
|
1197
1201
|
|
|
1198
|
-
|
|
1199
|
-
if LINUX and ARM64 and IS_DOCKER:
|
|
1200
|
-
check_requirements("packaging>=22.0")
|
|
1201
|
-
|
|
1202
|
-
check_requirements("ruamel.yaml<0.19.0")
|
|
1203
|
-
check_requirements("executorch==1.0.1", "flatbuffers")
|
|
1204
|
-
# Pin numpy to avoid coremltools errors with numpy>=2.4.0, must be separate
|
|
1205
|
-
check_requirements("numpy<=2.3.5")
|
|
1202
|
+
check_executorch_requirements()
|
|
1206
1203
|
|
|
1207
1204
|
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
|
|
1208
1205
|
from executorch.exir import to_edge_transform_and_lower
|
ultralytics/engine/predictor.py
CHANGED
|
@@ -387,6 +387,11 @@ class BasePredictor:
|
|
|
387
387
|
model (str | Path | torch.nn.Module, optional): Model to load or use.
|
|
388
388
|
verbose (bool): Whether to print verbose output.
|
|
389
389
|
"""
|
|
390
|
+
if hasattr(model, "end2end"):
|
|
391
|
+
if self.args.end2end is not None:
|
|
392
|
+
model.end2end = self.args.end2end
|
|
393
|
+
if model.end2end:
|
|
394
|
+
model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
|
|
390
395
|
self.model = AutoBackend(
|
|
391
396
|
model=model or self.args.model,
|
|
392
397
|
device=select_device(self.args.device, verbose=verbose),
|
ultralytics/engine/trainer.py
CHANGED
|
@@ -948,7 +948,7 @@ class BaseTrainer:
|
|
|
948
948
|
)
|
|
949
949
|
nc = self.data.get("nc", 10) # number of classes
|
|
950
950
|
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
|
|
951
|
-
name, lr, momentum = ("MuSGD", 0.01 if iterations > 10000 else lr_fit, 0.9)
|
|
951
|
+
name, lr, momentum = ("MuSGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
|
|
952
952
|
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
|
|
953
953
|
|
|
954
954
|
use_muon = name == "MuSGD"
|
|
@@ -985,14 +985,14 @@ class BaseTrainer:
|
|
|
985
985
|
g[2] = {"params": g[2], **optim_args, "param_group": "bias"}
|
|
986
986
|
g[0] = {"params": g[0], **optim_args, "weight_decay": decay, "param_group": "weight"}
|
|
987
987
|
g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0, "param_group": "bn"}
|
|
988
|
-
muon, sgd = (0.
|
|
988
|
+
muon, sgd = (0.2, 1.0)
|
|
989
989
|
if use_muon:
|
|
990
990
|
num_params[0] = len(g[3]) # update number of params
|
|
991
991
|
g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True, "param_group": "muon"}
|
|
992
992
|
import re
|
|
993
993
|
|
|
994
994
|
# higher lr for certain parameters in MuSGD when funetuning
|
|
995
|
-
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg
|
|
995
|
+
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg")
|
|
996
996
|
g_ = [] # new param groups
|
|
997
997
|
for x in g:
|
|
998
998
|
p = x.pop("params")
|
ultralytics/engine/tuner.py
CHANGED
|
@@ -26,7 +26,7 @@ from datetime import datetime
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
import torch
|
|
28
28
|
|
|
29
|
-
from ultralytics.cfg import get_cfg, get_save_dir
|
|
29
|
+
from ultralytics.cfg import CFG_INT_KEYS, get_cfg, get_save_dir
|
|
30
30
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
|
|
31
31
|
from ultralytics.utils.checks import check_requirements
|
|
32
32
|
from ultralytics.utils.patches import torch_load
|
|
@@ -448,7 +448,7 @@ class Tuner:
|
|
|
448
448
|
f"{self.prefix}Best fitness model is {best_save_dir}"
|
|
449
449
|
)
|
|
450
450
|
LOGGER.info("\n" + header)
|
|
451
|
-
data = {k:
|
|
451
|
+
data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
|
|
452
452
|
YAML.save(
|
|
453
453
|
self.tune_dir / "best_hyperparameters.yaml",
|
|
454
454
|
data=data,
|
ultralytics/engine/validator.py
CHANGED
|
@@ -156,6 +156,11 @@ class BaseValidator:
|
|
|
156
156
|
if str(self.args.model).endswith(".yaml") and model is None:
|
|
157
157
|
LOGGER.warning("validating an untrained model YAML will result in 0 mAP.")
|
|
158
158
|
callbacks.add_integration_callbacks(self)
|
|
159
|
+
if hasattr(model, "end2end"):
|
|
160
|
+
if self.args.end2end is not None:
|
|
161
|
+
model.end2end = self.args.end2end
|
|
162
|
+
if model.end2end:
|
|
163
|
+
model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
|
|
159
164
|
model = AutoBackend(
|
|
160
165
|
model=model or self.args.model,
|
|
161
166
|
device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),
|
|
@@ -2619,6 +2619,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2619
2619
|
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
|
2620
2620
|
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
|
2621
2621
|
|
|
2622
|
+
names = []
|
|
2622
2623
|
if len(curr_obj_ids) == 0:
|
|
2623
2624
|
pred_masks, pred_boxes = None, torch.zeros((0, 7), device=self.device)
|
|
2624
2625
|
else:
|
|
@@ -2656,9 +2657,8 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2656
2657
|
background_value=0,
|
|
2657
2658
|
).squeeze(1)
|
|
2658
2659
|
) > 0
|
|
2660
|
+
names = self.model.names or dict(enumerate(str(i) for i in range(pred_boxes[:, 6].int().max())))
|
|
2659
2661
|
|
|
2660
|
-
# names = getattr(self.model, "names", [str(i) for i in range(pred_scores.shape[0])])
|
|
2661
|
-
names = dict(enumerate(str(i) for i in range(pred_boxes.shape[0])))
|
|
2662
2662
|
results = []
|
|
2663
2663
|
for masks, boxes, orig_img, img_path in zip([pred_masks], [pred_boxes], orig_imgs, self.batch[0]):
|
|
2664
2664
|
results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=boxes))
|
|
@@ -11,7 +11,7 @@ from ultralytics.data import ClassificationDataset, build_dataloader
|
|
|
11
11
|
from ultralytics.engine.trainer import BaseTrainer
|
|
12
12
|
from ultralytics.models import yolo
|
|
13
13
|
from ultralytics.nn.tasks import ClassificationModel
|
|
14
|
-
from ultralytics.utils import DEFAULT_CFG, RANK
|
|
14
|
+
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
|
15
15
|
from ultralytics.utils.plotting import plot_images
|
|
16
16
|
from ultralytics.utils.torch_utils import is_parallel, torch_distributed_zero_first
|
|
17
17
|
|
|
@@ -138,6 +138,19 @@ class ClassificationTrainer(BaseTrainer):
|
|
|
138
138
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
|
139
139
|
dataset = self.build_dataset(dataset_path, mode)
|
|
140
140
|
|
|
141
|
+
# Filter out samples with class indices >= nc (prevents CUDA assertion errors)
|
|
142
|
+
nc = self.data.get("nc", 0)
|
|
143
|
+
dataset_nc = len(dataset.base.classes)
|
|
144
|
+
if nc and dataset_nc > nc:
|
|
145
|
+
extra_classes = dataset.base.classes[nc:]
|
|
146
|
+
original_count = len(dataset.samples)
|
|
147
|
+
dataset.samples = [s for s in dataset.samples if s[1] < nc]
|
|
148
|
+
skipped = original_count - len(dataset.samples)
|
|
149
|
+
LOGGER.warning(
|
|
150
|
+
f"{mode} split has {dataset_nc} classes but model expects {nc}. "
|
|
151
|
+
f"Skipping {skipped} samples from extra classes: {extra_classes}"
|
|
152
|
+
)
|
|
153
|
+
|
|
141
154
|
loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
|
|
142
155
|
# Attach inference transforms
|
|
143
156
|
if mode != "train":
|
|
@@ -73,7 +73,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
73
73
|
Returns:
|
|
74
74
|
(Dataset): YOLO dataset object configured for the specified mode.
|
|
75
75
|
"""
|
|
76
|
-
gs = max(int(unwrap_model(self.model).stride.max()
|
|
76
|
+
gs = max(int(unwrap_model(self.model).stride.max()), 32)
|
|
77
77
|
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs)
|
|
78
78
|
|
|
79
79
|
def get_dataloader(self, dataset_path: str, batch_size: int = 16, rank: int = 0, mode: str = "train"):
|
|
@@ -92,7 +92,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
92
92
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
|
93
93
|
dataset = self.build_dataset(dataset_path, mode, batch_size)
|
|
94
94
|
shuffle = mode == "train"
|
|
95
|
-
if getattr(dataset, "rect", False) and shuffle:
|
|
95
|
+
if getattr(dataset, "rect", False) and shuffle and not np.all(dataset.batch_shapes == dataset.batch_shapes[0]):
|
|
96
96
|
LOGGER.warning("'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
|
|
97
97
|
shuffle = False
|
|
98
98
|
return build_dataloader(
|
|
@@ -145,6 +145,8 @@ class DetectionTrainer(BaseTrainer):
|
|
|
145
145
|
self.model.nc = self.data["nc"] # attach number of classes to model
|
|
146
146
|
self.model.names = self.data["names"] # attach class names to model
|
|
147
147
|
self.model.args = self.args # attach hyperparameters to model
|
|
148
|
+
if getattr(self.model, "end2end"):
|
|
149
|
+
self.model.set_head_attr(max_det=self.args.max_det)
|
|
148
150
|
# TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
|
|
149
151
|
|
|
150
152
|
def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True):
|
|
@@ -9,6 +9,7 @@ from typing import Any
|
|
|
9
9
|
from ultralytics.models import yolo
|
|
10
10
|
from ultralytics.nn.tasks import PoseModel
|
|
11
11
|
from ultralytics.utils import DEFAULT_CFG
|
|
12
|
+
from ultralytics.utils.torch_utils import unwrap_model
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
@@ -91,7 +92,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
|
91
92
|
def get_validator(self):
|
|
92
93
|
"""Return an instance of the PoseValidator class for validation."""
|
|
93
94
|
self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
|
|
94
|
-
if getattr(self.model.model[-1], "flow_model", None) is not None:
|
|
95
|
+
if getattr(unwrap_model(self.model).model[-1], "flow_model", None) is not None:
|
|
95
96
|
self.loss_names += ("rle_loss",)
|
|
96
97
|
return yolo.pose.PoseValidator(
|
|
97
98
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|