ultralytics-opencv-headless 8.4.4__py3-none-any.whl → 8.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. tests/test_cli.py +10 -3
  2. tests/test_exports.py +64 -43
  3. tests/test_python.py +40 -11
  4. ultralytics/__init__.py +1 -1
  5. ultralytics/cfg/__init__.py +5 -4
  6. ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
  7. ultralytics/cfg/default.yaml +2 -1
  8. ultralytics/data/augment.py +8 -0
  9. ultralytics/data/converter.py +32 -9
  10. ultralytics/data/utils.py +2 -2
  11. ultralytics/engine/exporter.py +10 -6
  12. ultralytics/engine/predictor.py +5 -0
  13. ultralytics/engine/results.py +8 -3
  14. ultralytics/engine/trainer.py +6 -4
  15. ultralytics/engine/tuner.py +2 -2
  16. ultralytics/engine/validator.py +5 -0
  17. ultralytics/models/sam/predict.py +2 -2
  18. ultralytics/models/yolo/classify/train.py +14 -1
  19. ultralytics/models/yolo/detect/train.py +8 -4
  20. ultralytics/models/yolo/pose/train.py +2 -1
  21. ultralytics/models/yolo/world/train_world.py +21 -1
  22. ultralytics/models/yolo/yoloe/train.py +1 -2
  23. ultralytics/nn/autobackend.py +1 -1
  24. ultralytics/nn/modules/head.py +13 -2
  25. ultralytics/nn/tasks.py +18 -0
  26. ultralytics/solutions/security_alarm.py +1 -1
  27. ultralytics/trackers/byte_tracker.py +7 -7
  28. ultralytics/utils/benchmarks.py +3 -9
  29. ultralytics/utils/callbacks/platform.py +2 -1
  30. ultralytics/utils/callbacks/wb.py +6 -1
  31. ultralytics/utils/dist.py +1 -0
  32. ultralytics/utils/loss.py +18 -9
  33. ultralytics/utils/patches.py +42 -0
  34. ultralytics/utils/tal.py +15 -5
  35. ultralytics/utils/torch_utils.py +1 -1
  36. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/METADATA +4 -5
  37. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/RECORD +41 -40
  38. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/WHEEL +1 -1
  39. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/entry_points.txt +0 -0
  40. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
  41. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/top_level.txt +0 -0
tests/test_cli.py CHANGED
@@ -34,19 +34,26 @@ def test_train(task: str, model: str, data: str) -> None:
34
34
  @pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
35
35
  def test_val(task: str, model: str, data: str) -> None:
36
36
  """Test YOLO validation process for specified task, model, and data using a shell command."""
37
- run(f"yolo val {task} model={model} data={data} imgsz=32 save_txt save_json visualize")
37
+ for end2end in {False, True}:
38
+ run(
39
+ f"yolo val {task} model={model} data={data} imgsz=32 save_txt save_json visualize end2end={end2end} max_det=100 agnostic_nms"
40
+ )
38
41
 
39
42
 
40
43
  @pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
41
44
  def test_predict(task: str, model: str, data: str) -> None:
42
45
  """Test YOLO prediction on provided sample assets for specified task and model."""
43
- run(f"yolo {task} predict model={model} source={ASSETS} imgsz=32 save save_crop save_txt visualize")
46
+ for end2end in {False, True}:
47
+ run(
48
+ f"yolo {task} predict model={model} source={ASSETS} imgsz=32 save save_crop save_txt visualize end2end={end2end} max_det=100"
49
+ )
44
50
 
45
51
 
46
52
  @pytest.mark.parametrize("model", MODELS)
47
53
  def test_export(model: str) -> None:
48
54
  """Test exporting a YOLO model to TorchScript format."""
49
- run(f"yolo export model={model} format=torchscript imgsz=32")
55
+ for end2end in {False, True}:
56
+ run(f"yolo export model={model} format=torchscript imgsz=32 end2end={end2end} max_det=100")
50
57
 
51
58
 
52
59
  @pytest.mark.skipif(not TORCH_1_11, reason="RTDETR requires torch>=1.11")
tests/test_exports.py CHANGED
@@ -16,38 +16,42 @@ from ultralytics.utils import ARM64, IS_RASPBERRYPI, LINUX, MACOS, MACOS_VERSION
16
16
  from ultralytics.utils.torch_utils import TORCH_1_10, TORCH_1_11, TORCH_1_13, TORCH_2_0, TORCH_2_1, TORCH_2_8, TORCH_2_9
17
17
 
18
18
 
19
- def test_export_torchscript():
19
+ @pytest.mark.parametrize("end2end", [False, True])
20
+ def test_export_torchscript(end2end):
20
21
  """Test YOLO model export to TorchScript format for compatibility and correctness."""
21
- file = YOLO(MODEL).export(format="torchscript", optimize=False, imgsz=32)
22
+ file = YOLO(MODEL).export(format="torchscript", optimize=False, imgsz=32, end2end=end2end)
22
23
  YOLO(file)(SOURCE, imgsz=32) # exported model inference
23
24
 
24
25
 
25
- def test_export_onnx():
26
+ @pytest.mark.parametrize("end2end", [False, True])
27
+ def test_export_onnx(end2end):
26
28
  """Test YOLO model export to ONNX format with dynamic axes."""
27
- file = YOLO(MODEL).export(format="onnx", dynamic=True, imgsz=32)
29
+ file = YOLO(MODEL).export(format="onnx", dynamic=True, imgsz=32, end2end=end2end)
28
30
  YOLO(file)(SOURCE, imgsz=32) # exported model inference
29
31
 
30
32
 
31
33
  @pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
32
- def test_export_openvino():
34
+ @pytest.mark.parametrize("end2end", [False, True])
35
+ def test_export_openvino(end2end):
33
36
  """Test YOLO export to OpenVINO format for model inference compatibility."""
34
- file = YOLO(MODEL).export(format="openvino", imgsz=32)
37
+ file = YOLO(MODEL).export(format="openvino", imgsz=32, end2end=end2end)
35
38
  YOLO(file)(SOURCE, imgsz=32) # exported model inference
36
39
 
37
40
 
38
41
  @pytest.mark.slow
39
42
  @pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
40
43
  @pytest.mark.parametrize(
41
- "task, dynamic, int8, half, batch, nms",
44
+ "task, dynamic, int8, half, batch, nms, end2end",
42
45
  [ # generate all combinations except for exclusion cases
43
- (task, dynamic, int8, half, batch, nms)
44
- for task, dynamic, int8, half, batch, nms in product(
45
- TASKS, [True, False], [True, False], [True, False], [1, 2], [True, False]
46
+ (task, dynamic, int8, half, batch, nms, end2end)
47
+ for task, dynamic, int8, half, batch, nms, end2end in product(
48
+ TASKS, [True, False], [True, False], [True, False], [1, 2], [True, False], [True]
46
49
  )
47
- if not ((int8 and half) or (task == "classify" and nms))
50
+ if not ((int8 and half) or (task == "classify" and nms) or (end2end and nms))
48
51
  ],
49
52
  )
50
- def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
53
+ # disable end2end=False test for now due to github runner OOM during openvino tests
54
+ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms, end2end):
51
55
  """Test YOLO model export to OpenVINO under various configuration matrix conditions."""
52
56
  file = YOLO(TASK2MODEL[task]).export(
53
57
  format="openvino",
@@ -58,6 +62,7 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
58
62
  batch=batch,
59
63
  data=TASK2DATA[task],
60
64
  nms=nms,
65
+ end2end=end2end,
61
66
  )
62
67
  if WINDOWS:
63
68
  # Use unique filenames due to Windows file permissions bug possibly due to latent threaded use
@@ -70,19 +75,27 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
70
75
 
71
76
  @pytest.mark.slow
72
77
  @pytest.mark.parametrize(
73
- "task, dynamic, int8, half, batch, simplify, nms",
78
+ "task, dynamic, int8, half, batch, simplify, nms, end2end",
74
79
  [ # generate all combinations except for exclusion cases
75
- (task, dynamic, int8, half, batch, simplify, nms)
76
- for task, dynamic, int8, half, batch, simplify, nms in product(
77
- TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
80
+ (task, dynamic, int8, half, batch, simplify, nms, end2end)
81
+ for task, dynamic, int8, half, batch, simplify, nms, end2end in product(
82
+ TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False], [True, False]
78
83
  )
79
- if not ((int8 and half) or (task == "classify" and nms) or (nms and not TORCH_1_13))
84
+ if not ((int8 and half) or (task == "classify" and nms) or (nms and not TORCH_1_13) or (end2end and nms))
80
85
  ],
81
86
  )
82
- def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
87
+ def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms, end2end):
83
88
  """Test YOLO export to ONNX format with various configurations and parameters."""
84
89
  file = YOLO(TASK2MODEL[task]).export(
85
- format="onnx", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, simplify=simplify, nms=nms
90
+ format="onnx",
91
+ imgsz=32,
92
+ dynamic=dynamic,
93
+ int8=int8,
94
+ half=half,
95
+ batch=batch,
96
+ simplify=simplify,
97
+ nms=nms,
98
+ end2end=end2end,
86
99
  )
87
100
  YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference
88
101
  Path(file).unlink() # cleanup
@@ -90,19 +103,19 @@ def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
90
103
 
91
104
  @pytest.mark.slow
92
105
  @pytest.mark.parametrize(
93
- "task, dynamic, int8, half, batch, nms",
106
+ "task, dynamic, int8, half, batch, nms, end2end",
94
107
  [ # generate all combinations except for exclusion cases
95
- (task, dynamic, int8, half, batch, nms)
96
- for task, dynamic, int8, half, batch, nms in product(
97
- TASKS, [False, True], [False], [False, True], [1, 2], [True, False]
108
+ (task, dynamic, int8, half, batch, nms, end2end)
109
+ for task, dynamic, int8, half, batch, nms, end2end in product(
110
+ TASKS, [False, True], [False], [False, True], [1, 2], [True, False], [True, False]
98
111
  )
99
- if not (task == "classify" and nms)
112
+ if not ((task == "classify" and nms) or (end2end and nms))
100
113
  ],
101
114
  )
102
- def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
115
+ def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms, end2end):
103
116
  """Test YOLO model export to TorchScript format under varied configurations."""
104
117
  file = YOLO(TASK2MODEL[task]).export(
105
- format="torchscript", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms
118
+ format="torchscript", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms, end2end=end2end
106
119
  )
107
120
  YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference
108
121
  Path(file).unlink() # cleanup
@@ -116,19 +129,20 @@ def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
116
129
  MACOS and MACOS_VERSION and MACOS_VERSION >= "15", reason="CoreML YOLO26 matrix test crashes on macOS 15+"
117
130
  )
118
131
  @pytest.mark.parametrize(
119
- "task, dynamic, int8, half, nms, batch",
132
+ "task, dynamic, int8, half, nms, batch, end2end",
120
133
  [ # generate all combinations except for exclusion cases
121
- (task, dynamic, int8, half, nms, batch)
122
- for task, dynamic, int8, half, nms, batch in product(
123
- TASKS, [True, False], [True, False], [True, False], [True, False], [1]
134
+ (task, dynamic, int8, half, nms, batch, end2end)
135
+ for task, dynamic, int8, half, nms, batch, end2end in product(
136
+ TASKS, [True, False], [True, False], [True, False], [True, False], [1], [True, False]
124
137
  )
125
138
  if not (int8 and half)
126
139
  and not (task != "detect" and nms)
127
140
  and not (dynamic and nms)
128
141
  and not (task == "classify" and dynamic)
142
+ and not (end2end and nms)
129
143
  ],
130
144
  )
131
- def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
145
+ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch, end2end):
132
146
  """Test YOLO export to CoreML format with various parameter configurations."""
133
147
  file = YOLO(TASK2MODEL[task]).export(
134
148
  format="coreml",
@@ -138,6 +152,7 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
138
152
  half=half,
139
153
  batch=batch,
140
154
  nms=nms,
155
+ end2end=end2end,
141
156
  )
142
157
  YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference
143
158
  shutil.rmtree(file) # cleanup
@@ -152,19 +167,25 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
152
167
  reason="Test disabled as TF suffers from install conflicts on Windows, macOS and Raspberry Pi",
153
168
  )
154
169
  @pytest.mark.parametrize(
155
- "task, dynamic, int8, half, batch, nms",
170
+ "task, dynamic, int8, half, batch, nms, end2end",
156
171
  [ # generate all combinations except for exclusion cases
157
- (task, dynamic, int8, half, batch, nms)
158
- for task, dynamic, int8, half, batch, nms in product(
159
- TASKS, [False], [True, False], [True, False], [1], [True, False]
172
+ (task, dynamic, int8, half, batch, nms, end2end)
173
+ for task, dynamic, int8, half, batch, nms, end2end in product(
174
+ TASKS, [False], [True, False], [True, False], [1], [True, False], [True, False]
175
+ )
176
+ if not (
177
+ (int8 and half)
178
+ or (task == "classify" and nms)
179
+ or (ARM64 and nms)
180
+ or (nms and not TORCH_1_13)
181
+ or (end2end and nms)
160
182
  )
161
- if not ((int8 and half) or (task == "classify" and nms) or (ARM64 and nms) or (nms and not TORCH_1_13))
162
183
  ],
163
184
  )
164
- def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms):
185
+ def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms, end2end):
165
186
  """Test YOLO export to TFLite format considering various export configurations."""
166
187
  file = YOLO(TASK2MODEL[task]).export(
167
- format="tflite", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms
188
+ format="tflite", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms, end2end=end2end
168
189
  )
169
190
  YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference
170
191
  Path(file).unlink() # cleanup
@@ -225,16 +246,16 @@ def test_export_mnn():
225
246
  @pytest.mark.slow
226
247
  @pytest.mark.skipif(not TORCH_1_10, reason="MNN export requires torch>=1.10")
227
248
  @pytest.mark.parametrize(
228
- "task, int8, half, batch",
249
+ "task, int8, half, batch, end2end",
229
250
  [ # generate all combinations except for exclusion cases
230
- (task, int8, half, batch)
231
- for task, int8, half, batch in product(TASKS, [True, False], [True, False], [1, 2])
251
+ (task, int8, half, batch, end2end)
252
+ for task, int8, half, batch, end2end in product(TASKS, [True, False], [True, False], [1, 2], [True, False])
232
253
  if not (int8 and half)
233
254
  ],
234
255
  )
235
- def test_export_mnn_matrix(task, int8, half, batch):
256
+ def test_export_mnn_matrix(task, int8, half, batch, end2end):
236
257
  """Test YOLO export to MNN format considering various export configurations."""
237
- file = YOLO(TASK2MODEL[task]).export(format="mnn", imgsz=32, int8=int8, half=half, batch=batch)
258
+ file = YOLO(TASK2MODEL[task]).export(format="mnn", imgsz=32, int8=int8, half=half, batch=batch, end2end=end2end)
238
259
  YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference
239
260
  Path(file).unlink() # cleanup
240
261
 
tests/test_python.py CHANGED
@@ -159,6 +159,30 @@ def test_predict_gray_and_4ch(tmp_path):
159
159
  f.unlink() # cleanup
160
160
 
161
161
 
162
+ @pytest.mark.slow
163
+ @pytest.mark.skipif(not ONLINE, reason="environment is offline")
164
+ def test_predict_all_image_formats():
165
+ """Test YOLO prediction all 12 image formats (AVIF, BMP, DNG, HEIC, JP2, JPEG, JPG, MPO, PNG, TIF, TIFF, WebP)."""
166
+ # Download dataset if needed
167
+ data = check_det_dataset("coco12-formats.yaml")
168
+ dataset_path = Path(data["path"])
169
+
170
+ # Collect all images from train and val
171
+ expected = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp"}
172
+ images = [im for im in (dataset_path / "images" / "train").glob("*.*") if im.suffix.lower().lstrip(".") in expected]
173
+ images += [im for im in (dataset_path / "images" / "val").glob("*.*") if im.suffix.lower().lstrip(".") in expected]
174
+ assert len(images) == 12, f"Expected 12 images, found {len(images)}"
175
+
176
+ # Verify all format extensions are represented
177
+ extensions = {img.suffix.lower().lstrip(".") for img in images}
178
+ assert extensions == expected, f"Missing formats: {expected - extensions}"
179
+
180
+ # Run inference on all images
181
+ model = YOLO(MODEL)
182
+ results = model(images, imgsz=32)
183
+ assert len(results) == 12, f"Expected 12 results, got {len(results)}"
184
+
185
+
162
186
  @pytest.mark.slow
163
187
  @pytest.mark.skipif(not ONLINE, reason="environment is offline")
164
188
  @pytest.mark.skipif(is_github_action_running(), reason="No auth https://github.com/JuanBindez/pytubefix/issues/166")
@@ -209,11 +233,12 @@ def test_val(task: str, weight: str, data: str) -> None:
209
233
  metrics.confusion_matrix.to_json()
210
234
 
211
235
 
236
+ @pytest.mark.skipif(not ONLINE, reason="environment is offline")
212
237
  @pytest.mark.skipif(IS_JETSON or IS_RASPBERRYPI, reason="Edge devices not intended for training")
213
238
  def test_train_scratch():
214
- """Test training the YOLO model from scratch using the provided configuration."""
239
+ """Test training the YOLO model from scratch on 12 different image types in the COCO12-Formats dataset."""
215
240
  model = YOLO(CFG)
216
- model.train(data="coco8.yaml", epochs=2, imgsz=32, cache="disk", batch=-1, close_mosaic=1, name="model")
241
+ model.train(data="coco12-formats.yaml", epochs=2, imgsz=32, cache="disk", batch=-1, close_mosaic=1, name="model")
217
242
  model(SOURCE)
218
243
 
219
244
 
@@ -672,7 +697,7 @@ def test_yolo_world():
672
697
  checks.IS_PYTHON_3_8 and LINUX and ARM64,
673
698
  reason="YOLOE with CLIP is not supported in Python 3.8 and aarch64 Linux",
674
699
  )
675
- def test_yoloe():
700
+ def test_yoloe(tmp_path):
676
701
  """Test YOLOE models with MobileClip support."""
677
702
  # Predict
678
703
  # text-prompts
@@ -714,14 +739,18 @@ def test_yoloe():
714
739
  imgsz=32,
715
740
  )
716
741
  # Train, from scratch
717
- model = YOLOE("yoloe-11s-seg.yaml")
718
- model.train(
719
- data=dict(train=dict(yolo_data=["coco128-seg.yaml"]), val=dict(yolo_data=["coco128-seg.yaml"])),
720
- epochs=1,
721
- close_mosaic=1,
722
- trainer=YOLOESegTrainerFromScratch,
723
- imgsz=32,
724
- )
742
+ data_dict = dict(train=dict(yolo_data=["coco128-seg.yaml"]), val=dict(yolo_data=["coco128-seg.yaml"]))
743
+ data_yaml = tmp_path / "yoloe-data.yaml"
744
+ YAML.save(data=data_dict, file=data_yaml)
745
+ for data in [data_dict, data_yaml]:
746
+ model = YOLOE("yoloe-11s-seg.yaml")
747
+ model.train(
748
+ data=data,
749
+ epochs=1,
750
+ close_mosaic=1,
751
+ trainer=YOLOESegTrainerFromScratch,
752
+ imgsz=32,
753
+ )
725
754
 
726
755
  # prompt-free
727
756
  # predict
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "8.4.4"
3
+ __version__ = "8.4.8"
4
4
 
5
5
  import importlib
6
6
  import os
@@ -238,6 +238,7 @@ CFG_BOOL_KEYS = frozenset(
238
238
  "simplify",
239
239
  "nms",
240
240
  "profile",
241
+ "end2end",
241
242
  }
242
243
  )
243
244
 
@@ -401,16 +402,16 @@ def get_save_dir(args: SimpleNamespace, name: str | None = None) -> Path:
401
402
  >>> args = SimpleNamespace(project="my_project", task="detect", mode="train", exist_ok=True)
402
403
  >>> save_dir = get_save_dir(args)
403
404
  >>> print(save_dir)
404
- my_project/detect/train
405
+ runs/detect/my_project/train
405
406
  """
406
407
  if getattr(args, "save_dir", None):
407
408
  save_dir = args.save_dir
408
409
  else:
409
410
  from ultralytics.utils.files import increment_path
410
411
 
411
- runs = (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
412
- nested = args.project and len(Path(args.project).parts) > 1 # e.g. "user/project" or "org\repo"
413
- project = runs / args.project if nested else args.project or runs
412
+ project = args.project or ""
413
+ if not Path(project).is_absolute():
414
+ project = (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task / project
414
415
  name = name or args.name or f"{args.mode}"
415
416
  save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in {-1, 0} else True)
416
417
 
@@ -0,0 +1,101 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # COCO12-Formats dataset (12 images testing all supported image formats) by Ultralytics
4
+ # Documentation: https://docs.ultralytics.com/datasets/detect/coco12-formats/
5
+ # Example usage: yolo train data=coco12-formats.yaml
6
+ # parent
7
+ # ├── ultralytics
8
+ # └── datasets
9
+ # └── coco12-formats ← downloads here (1 MB)
10
+
11
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
+ path: coco12-formats # dataset root dir
13
+ train: images/train # train images (relative to 'path') 6 images
14
+ val: images/val # val images (relative to 'path') 6 images
15
+ test: # test images (optional)
16
+
17
+ # Classes
18
+ names:
19
+ 0: person
20
+ 1: bicycle
21
+ 2: car
22
+ 3: motorcycle
23
+ 4: airplane
24
+ 5: bus
25
+ 6: train
26
+ 7: truck
27
+ 8: boat
28
+ 9: traffic light
29
+ 10: fire hydrant
30
+ 11: stop sign
31
+ 12: parking meter
32
+ 13: bench
33
+ 14: bird
34
+ 15: cat
35
+ 16: dog
36
+ 17: horse
37
+ 18: sheep
38
+ 19: cow
39
+ 20: elephant
40
+ 21: bear
41
+ 22: zebra
42
+ 23: giraffe
43
+ 24: backpack
44
+ 25: umbrella
45
+ 26: handbag
46
+ 27: tie
47
+ 28: suitcase
48
+ 29: frisbee
49
+ 30: skis
50
+ 31: snowboard
51
+ 32: sports ball
52
+ 33: kite
53
+ 34: baseball bat
54
+ 35: baseball glove
55
+ 36: skateboard
56
+ 37: surfboard
57
+ 38: tennis racket
58
+ 39: bottle
59
+ 40: wine glass
60
+ 41: cup
61
+ 42: fork
62
+ 43: knife
63
+ 44: spoon
64
+ 45: bowl
65
+ 46: banana
66
+ 47: apple
67
+ 48: sandwich
68
+ 49: orange
69
+ 50: broccoli
70
+ 51: carrot
71
+ 52: hot dog
72
+ 53: pizza
73
+ 54: donut
74
+ 55: cake
75
+ 56: chair
76
+ 57: couch
77
+ 58: potted plant
78
+ 59: bed
79
+ 60: dining table
80
+ 61: toilet
81
+ 62: tv
82
+ 63: laptop
83
+ 64: mouse
84
+ 65: remote
85
+ 66: keyboard
86
+ 67: cell phone
87
+ 68: microwave
88
+ 69: oven
89
+ 70: toaster
90
+ 71: sink
91
+ 72: refrigerator
92
+ 73: book
93
+ 74: clock
94
+ 75: vase
95
+ 76: scissors
96
+ 77: teddy bear
97
+ 78: hair drier
98
+ 79: toothbrush
99
+
100
+ # Download script/URL (optional)
101
+ download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco12-formats.zip
@@ -36,7 +36,7 @@ amp: True # (bool) Automatic Mixed Precision (AMP) training; True runs AMP capab
36
36
  fraction: 1.0 # (float) fraction of training dataset to use (1.0 = all)
37
37
  profile: False # (bool) profile ONNX/TensorRT speeds during training for loggers
38
38
  freeze: # (int | list, optional) freeze first N layers (int) or specific layer indices (list)
39
- multi_scale: 0.0 # (float) multiscale training by varying image size
39
+ multi_scale: 0.0 # (float) multi-scale range as a fraction of imgsz; sizes are rounded to stride multiples
40
40
  compile: False # (bool | str) enable torch.compile() backend='inductor'; True="default", False=off, or "default|reduce-overhead|max-autotune-no-cudagraphs"
41
41
 
42
42
  # Segmentation
@@ -56,6 +56,7 @@ max_det: 300 # (int) maximum number of detections per image
56
56
  half: False # (bool) use half precision (FP16) if supported
57
57
  dnn: False # (bool) use OpenCV DNN for ONNX inference
58
58
  plots: True # (bool) save plots and images during train/val
59
+ end2end: # (bool, optional) whether to use end2end head(YOLO26, YOLOv10) for predict/val/export
59
60
 
60
61
  # Predict settings -----------------------------------------------------------------------------------------------------
61
62
  source: # (str, optional) path/dir/URL/stream for images or videos; e.g. 'ultralytics/assets' or '0' for webcam
@@ -2066,7 +2066,15 @@ class Format:
2066
2066
  if self.mask_overlap:
2067
2067
  sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
2068
2068
  else:
2069
+ # Create sem_masks consistent with mask_overlap=True
2069
2070
  sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
2071
+ overlap = masks.sum(dim=0) > 1 # (H, W)
2072
+ if overlap.any():
2073
+ weights = masks.sum(axis=(1, 2))
2074
+ weighted_masks = masks * weights[:, None, None] # (N, H, W)
2075
+ weighted_masks[masks == 0] = weights.max() + 1 # handle background
2076
+ smallest_idx = weighted_masks.argmin(dim=0) # (H, W)
2077
+ sem_masks[overlap] = cls_tensor[smallest_idx[overlap]]
2070
2078
  else:
2071
2079
  masks = torch.zeros(
2072
2080
  1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
@@ -796,6 +796,17 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
796
796
  # Check if this is a classification dataset
797
797
  is_classification = dataset_record.get("task") == "classify"
798
798
  class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
799
+ len(class_names)
800
+
801
+ # Validate required fields before downloading images
802
+ task = dataset_record.get("task", "detect")
803
+ if not is_classification:
804
+ if "train" not in splits:
805
+ raise ValueError(f"Dataset missing required 'train' split. Found splits: {sorted(splits)}")
806
+ if "val" not in splits and "test" not in splits:
807
+ raise ValueError(f"Dataset missing required 'val' split. Found splits: {sorted(splits)}")
808
+ if task == "pose" and "kpt_shape" not in dataset_record:
809
+ raise ValueError("Pose dataset missing required 'kpt_shape'. See https://docs.ultralytics.com/datasets/pose/")
799
810
 
800
811
  # Create base directories
801
812
  dataset_dir.mkdir(parents=True, exist_ok=True)
@@ -838,14 +849,19 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
838
849
  if http_url := record.get("url"):
839
850
  if not image_path.exists():
840
851
  image_path.parent.mkdir(parents=True, exist_ok=True)
841
- try:
842
- async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
843
- response.raise_for_status()
844
- image_path.write_bytes(await response.read())
845
- return True
846
- except Exception as e:
847
- LOGGER.warning(f"Failed to download {http_url}: {e}")
848
- return False
852
+ # Retry with exponential backoff (3 attempts: 0s, 2s, 4s delays)
853
+ for attempt in range(3):
854
+ try:
855
+ async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
856
+ response.raise_for_status()
857
+ image_path.write_bytes(await response.read())
858
+ return True
859
+ except Exception as e:
860
+ if attempt < 2: # Don't sleep after last attempt
861
+ await asyncio.sleep(2**attempt) # 1s, 2s backoff
862
+ else:
863
+ LOGGER.warning(f"Failed to download {http_url} after 3 attempts: {e}")
864
+ return False
849
865
  return True
850
866
 
851
867
  # Process all images with async downloads (limit connections for small datasets)
@@ -861,9 +877,16 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
861
877
  pbar.update(1)
862
878
  return result
863
879
 
864
- await asyncio.gather(*[tracked_process(record) for record in image_records])
880
+ results = await asyncio.gather(*[tracked_process(record) for record in image_records])
865
881
  pbar.close()
866
882
 
883
+ # Validate images were downloaded successfully
884
+ success_count = sum(1 for r in results if r)
885
+ if success_count == 0:
886
+ raise RuntimeError(f"Failed to download any images from {ndjson_path}. Check network connection and URLs.")
887
+ if success_count < len(image_records):
888
+ LOGGER.warning(f"Downloaded {success_count}/{len(image_records)} images from {ndjson_path}")
889
+
867
890
  if is_classification:
868
891
  # Classification: return dataset directory (check_cls_dataset expects a directory path)
869
892
  return dataset_dir
ultralytics/data/utils.py CHANGED
@@ -37,8 +37,8 @@ from ultralytics.utils.downloads import download, safe_download, unzip_file
37
37
  from ultralytics.utils.ops import segments2boxes
38
38
 
39
39
  HELP_URL = "See https://docs.ultralytics.com/datasets for dataset formatting guidance."
40
- IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm", "heic"} # image suffixes
41
- VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # video suffixes
40
+ IMG_FORMATS = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpeg2000", "jpg", "mpo", "png", "tif", "tiff", "webp"}
41
+ VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # videos
42
42
  FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
43
43
 
44
44
 
@@ -404,6 +404,13 @@ class Exporter:
404
404
  if not hasattr(model, "names"):
405
405
  model.names = default_class_names()
406
406
  model.names = check_class_names(model.names)
407
+ if hasattr(model, "end2end"):
408
+ if self.args.end2end is not None:
409
+ model.end2end = self.args.end2end
410
+ if rknn or ncnn or executorch or paddle or imx:
411
+ # Disable end2end branch for certain export formats as they does not support topk
412
+ model.end2end = False
413
+ LOGGER.warning(f"{fmt.upper()} export does not support end2end models, disabling end2end branch.")
407
414
  if self.args.half and self.args.int8:
408
415
  LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
409
416
  self.args.half = False
@@ -463,9 +470,6 @@ class Exporter:
463
470
  )
464
471
  if tfjs and (ARM64 and LINUX):
465
472
  raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
466
- if ncnn and hasattr(model.model[-1], "one2one_cv2"):
467
- del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
468
- LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
469
473
  # Recommend OpenVINO if export and Intel CPU
470
474
  if SETTINGS.get("openvino_msg"):
471
475
  if is_intel():
@@ -509,6 +513,7 @@ class Exporter:
509
513
  # Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
510
514
  anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
511
515
  m.max_det = min(self.args.max_det, anchors)
516
+ m.agnostic_nms = self.args.agnostic_nms
512
517
  m.xyxy = self.args.nms and not coreml
513
518
  m.shape = None # reset cached shape for new export input size
514
519
  if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
@@ -549,6 +554,7 @@ class Exporter:
549
554
  "names": model.names,
550
555
  "args": {k: v for k, v in self.args if k in fmt_keys},
551
556
  "channels": model.yaml.get("channels", 3),
557
+ "end2end": getattr(model, "end2end", False),
552
558
  } # model metadata
553
559
  if dla is not None:
554
560
  self.metadata["dla"] = dla # make sure `AutoBackend` uses correct dla device if it has one
@@ -556,8 +562,6 @@ class Exporter:
556
562
  self.metadata["kpt_shape"] = model.model[-1].kpt_shape
557
563
  if hasattr(model, "kpt_names"):
558
564
  self.metadata["kpt_names"] = model.kpt_names
559
- if getattr(model.model[-1], "end2end", False):
560
- self.metadata["end2end"] = True
561
565
 
562
566
  LOGGER.info(
563
567
  f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
@@ -1045,7 +1049,7 @@ class Exporter:
1045
1049
  "onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
1046
1050
  "ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""), # required by 'onnx2tf' package
1047
1051
  "onnx>=1.12.0,<2.0.0",
1048
- "onnx2tf>=1.26.3",
1052
+ "onnx2tf>=1.26.3,<1.29.0", # pin to avoid h5py build issues on aarch64
1049
1053
  "onnxslim>=0.1.71",
1050
1054
  "onnxruntime-gpu" if cuda else "onnxruntime",
1051
1055
  "protobuf>=5",
@@ -387,6 +387,11 @@ class BasePredictor:
387
387
  model (str | Path | torch.nn.Module, optional): Model to load or use.
388
388
  verbose (bool): Whether to print verbose output.
389
389
  """
390
+ if hasattr(model, "end2end"):
391
+ if self.args.end2end is not None:
392
+ model.end2end = self.args.end2end
393
+ if model.end2end:
394
+ model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
390
395
  self.model = AutoBackend(
391
396
  model=model or self.args.model,
392
397
  device=select_device(self.args.device, verbose=verbose),