ultralytics 8.3.202__py3-none-any.whl → 8.3.204__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tests/test_cli.py +7 -9
  2. tests/test_cuda.py +4 -1
  3. tests/test_exports.py +7 -7
  4. tests/test_python.py +18 -10
  5. tests/test_solutions.py +13 -11
  6. ultralytics/__init__.py +1 -1
  7. ultralytics/data/build.py +4 -1
  8. ultralytics/data/utils.py +5 -0
  9. ultralytics/engine/exporter.py +45 -6
  10. ultralytics/engine/trainer.py +14 -12
  11. ultralytics/engine/tuner.py +1 -1
  12. ultralytics/engine/validator.py +1 -1
  13. ultralytics/models/fastsam/predict.py +2 -1
  14. ultralytics/models/rtdetr/model.py +2 -0
  15. ultralytics/models/sam/modules/sam.py +1 -1
  16. ultralytics/models/sam/predict.py +9 -5
  17. ultralytics/models/yolo/classify/train.py +2 -2
  18. ultralytics/models/yolo/classify/val.py +2 -2
  19. ultralytics/models/yolo/detect/train.py +1 -1
  20. ultralytics/models/yolo/detect/val.py +1 -1
  21. ultralytics/models/yolo/model.py +1 -0
  22. ultralytics/models/yolo/world/train.py +4 -2
  23. ultralytics/models/yolo/yoloe/train.py +1 -13
  24. ultralytics/nn/autobackend.py +1 -1
  25. ultralytics/nn/modules/head.py +3 -3
  26. ultralytics/nn/modules/transformer.py +3 -1
  27. ultralytics/solutions/similarity_search.py +3 -2
  28. ultralytics/solutions/streamlit_inference.py +2 -3
  29. ultralytics/utils/checks.py +27 -0
  30. ultralytics/utils/metrics.py +3 -3
  31. ultralytics/utils/tal.py +3 -5
  32. ultralytics/utils/torch_utils.py +5 -34
  33. {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/METADATA +21 -21
  34. {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/RECORD +38 -38
  35. {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/WHEEL +0 -0
  36. {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/entry_points.txt +0 -0
  37. {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/licenses/LICENSE +0 -0
  38. {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/top_level.txt +0 -0
tests/test_cli.py CHANGED
@@ -1,13 +1,14 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
3
  import subprocess
4
+ from pathlib import Path
4
5
 
5
6
  import pytest
6
7
  from PIL import Image
7
8
 
8
9
  from tests import CUDA_DEVICE_COUNT, CUDA_IS_AVAILABLE, MODELS, TASK_MODEL_DATA
9
10
  from ultralytics.utils import ARM64, ASSETS, LINUX, WEIGHTS_DIR, checks
10
- from ultralytics.utils.torch_utils import TORCH_1_9
11
+ from ultralytics.utils.torch_utils import TORCH_1_11
11
12
 
12
13
 
13
14
  def run(cmd: str) -> None:
@@ -33,7 +34,7 @@ def test_train(task: str, model: str, data: str) -> None:
33
34
  @pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
34
35
  def test_val(task: str, model: str, data: str) -> None:
35
36
  """Test YOLO validation process for specified task, model, and data using a shell command."""
36
- run(f"yolo val {task} model={model} data={data} imgsz=32 save_txt save_json")
37
+ run(f"yolo val {task} model={model} data={data} imgsz=32 save_txt save_json visualize")
37
38
 
38
39
 
39
40
  @pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
@@ -48,15 +49,12 @@ def test_export(model: str) -> None:
48
49
  run(f"yolo export model={model} format=torchscript imgsz=32")
49
50
 
50
51
 
51
- def test_rtdetr(task: str = "detect", model: str = "yolov8n-rtdetr.yaml", data: str = "coco8.yaml") -> None:
52
+ @pytest.mark.skipif(not TORCH_1_11, reason="RTDETR requires torch>=1.11")
53
+ def test_rtdetr(task: str = "detect", model: Path = WEIGHTS_DIR / "rtdetr-l.pt", data: str = "coco8.yaml") -> None:
52
54
  """Test the RTDETR functionality within Ultralytics for detection tasks using specified model and data."""
53
- # Warning: must use imgsz=640 (note also add comma, spaces, fraction=0.25 args to test single-image training)
54
- run(f"yolo train {task} model={model} data={data} --imgsz= 160 epochs =1, cache = disk fraction=0.25") # spaces
55
+ # Add comma, spaces, fraction=0.25 args to test single-image training
55
56
  run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
56
- if TORCH_1_9:
57
- weights = WEIGHTS_DIR / "rtdetr-l.pt"
58
- run(f"yolo predict {task} model={weights} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
59
- run(f"yolo train {task} model={weights} epochs=1 imgsz=160 cache=disk data=coco8.yaml")
57
+ run(f"yolo train {task} model={model} data={data} --imgsz= 160 epochs =1, cache = disk fraction=0.25")
60
58
 
61
59
 
62
60
  @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="MobileSAM with CLIP is not supported in Python 3.12")
tests/test_cuda.py CHANGED
@@ -70,6 +70,7 @@ def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
70
70
  simplify=simplify,
71
71
  nms=nms,
72
72
  device=DEVICES[0],
73
+ # opset=20 if nms else None, # fix ONNX Runtime errors with NMS
73
74
  )
74
75
  YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32, device=DEVICES[0]) # exported model inference
75
76
  Path(file).unlink() # cleanup
@@ -114,7 +115,9 @@ def test_train():
114
115
  device = tuple(DEVICES) if len(DEVICES) > 1 else DEVICES[0]
115
116
  # NVIDIA Jetson only has one GPU and therefore skipping checks
116
117
  if not IS_JETSON:
117
- results = YOLO(MODEL).train(data="coco8.yaml", imgsz=64, epochs=1, device=device) # requires imgsz>=64
118
+ results = YOLO(MODEL).train(
119
+ data="coco8.yaml", imgsz=64, epochs=1, device=device, batch=15
120
+ ) # requires imgsz>=64
118
121
  visible = eval(os.environ["CUDA_VISIBLE_DEVICES"])
119
122
  assert visible == device, f"Passed GPUs '{device}', but used GPUs '{visible}'"
120
123
  assert (
tests/test_exports.py CHANGED
@@ -20,7 +20,7 @@ from ultralytics.utils import (
20
20
  WINDOWS,
21
21
  checks,
22
22
  )
23
- from ultralytics.utils.torch_utils import TORCH_1_9, TORCH_1_13
23
+ from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13, TORCH_2_1
24
24
 
25
25
 
26
26
  def test_export_torchscript():
@@ -35,7 +35,7 @@ def test_export_onnx():
35
35
  YOLO(file)(SOURCE, imgsz=32) # exported model inference
36
36
 
37
37
 
38
- @pytest.mark.skipif(not TORCH_1_13, reason="OpenVINO requires torch>=1.13")
38
+ @pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
39
39
  def test_export_openvino():
40
40
  """Test YOLO export to OpenVINO format for model inference compatibility."""
41
41
  file = YOLO(MODEL).export(format="openvino", imgsz=32)
@@ -43,7 +43,7 @@ def test_export_openvino():
43
43
 
44
44
 
45
45
  @pytest.mark.slow
46
- @pytest.mark.skipif(not TORCH_1_13, reason="OpenVINO requires torch>=1.13")
46
+ @pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
47
47
  @pytest.mark.parametrize(
48
48
  "task, dynamic, int8, half, batch, nms",
49
49
  [ # generate all combinations except for exclusion cases
@@ -83,7 +83,7 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
83
83
  for task, dynamic, int8, half, batch, simplify, nms in product(
84
84
  TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
85
85
  )
86
- if not ((int8 and half) or (task == "classify" and nms) or (task == "obb" and nms and not TORCH_1_13))
86
+ if not ((int8 and half) or (task == "classify" and nms) or (nms and not TORCH_1_13))
87
87
  ],
88
88
  )
89
89
  def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
@@ -117,7 +117,7 @@ def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
117
117
 
118
118
  @pytest.mark.slow
119
119
  @pytest.mark.skipif(not MACOS, reason="CoreML inference only supported on macOS")
120
- @pytest.mark.skipif(not TORCH_1_9, reason="CoreML>=7.2 not supported with PyTorch<=1.8")
120
+ @pytest.mark.skipif(not TORCH_1_11, reason="CoreML export requires torch>=1.11")
121
121
  @pytest.mark.skipif(checks.IS_PYTHON_3_13, reason="CoreML not supported in Python 3.13")
122
122
  @pytest.mark.parametrize(
123
123
  "task, dynamic, int8, half, nms, batch",
@@ -157,7 +157,7 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
157
157
  for task, dynamic, int8, half, batch, nms in product(
158
158
  TASKS, [False], [True, False], [True, False], [1], [True, False]
159
159
  )
160
- if not ((int8 and half) or (task == "classify" and nms) or (ARM64 and nms))
160
+ if not ((int8 and half) or (task == "classify" and nms) or (ARM64 and nms) or (nms and not TORCH_1_13))
161
161
  ],
162
162
  )
163
163
  def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms):
@@ -169,7 +169,7 @@ def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms):
169
169
  Path(file).unlink() # cleanup
170
170
 
171
171
 
172
- @pytest.mark.skipif(not TORCH_1_9, reason="CoreML>=7.2 not supported with PyTorch<=1.8")
172
+ @pytest.mark.skipif(not TORCH_1_11, reason="CoreML export requires torch>=1.11")
173
173
  @pytest.mark.skipif(WINDOWS, reason="CoreML not supported on Windows") # RuntimeError: BlobWriter not loaded
174
174
  @pytest.mark.skipif(LINUX and ARM64, reason="CoreML not supported on aarch64 Linux")
175
175
  @pytest.mark.skipif(checks.IS_PYTHON_3_13, reason="CoreML not supported in Python 3.13")
tests/test_python.py CHANGED
@@ -34,7 +34,7 @@ from ultralytics.utils import (
34
34
  is_github_action_running,
35
35
  )
36
36
  from ultralytics.utils.downloads import download
37
- from ultralytics.utils.torch_utils import TORCH_1_9
37
+ from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13
38
38
 
39
39
  IS_TMP_WRITEABLE = is_dir_writeable(TMP) # WARNING: must be run once tests start as TMP does not exist on tests/init
40
40
 
@@ -125,7 +125,9 @@ def test_predict_img(model_name):
125
125
  batch = [
126
126
  str(SOURCE), # filename
127
127
  Path(SOURCE), # Path
128
- "https://github.com/ultralytics/assets/releases/download/v0.0.0/zidane.jpg" if ONLINE else SOURCE, # URI
128
+ "https://github.com/ultralytics/assets/releases/download/v0.0.0/zidane.jpg?token=123"
129
+ if ONLINE
130
+ else SOURCE, # URI
129
131
  im, # OpenCV
130
132
  Image.open(SOURCE), # PIL
131
133
  np.zeros((320, 640, channels), dtype=np.uint8), # numpy
@@ -246,7 +248,7 @@ def test_all_model_yamls():
246
248
  """Test YOLO model creation for all available YAML configurations in the `cfg/models` directory."""
247
249
  for m in (ROOT / "cfg" / "models").rglob("*.yaml"):
248
250
  if "rtdetr" in m.name:
249
- if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
251
+ if TORCH_1_11:
250
252
  _ = RTDETR(m.name)(SOURCE, imgsz=640) # must be 640
251
253
  else:
252
254
  YOLO(m.name)
@@ -634,7 +636,8 @@ def test_yolo_world():
634
636
  )
635
637
 
636
638
 
637
- @pytest.mark.skipif(checks.IS_PYTHON_3_12 or not TORCH_1_9, reason="YOLOE with CLIP is not supported in Python 3.12")
639
+ @pytest.mark.skipif(not TORCH_1_13, reason="YOLOE with CLIP requires torch>=1.13")
640
+ @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="YOLOE with CLIP is not supported in Python 3.12")
638
641
  @pytest.mark.skipif(
639
642
  checks.IS_PYTHON_3_8 and LINUX and ARM64,
640
643
  reason="YOLOE with CLIP is not supported in Python 3.8 and aarch64 Linux",
@@ -648,16 +651,12 @@ def test_yoloe():
648
651
  model.set_classes(names, model.get_text_pe(names))
649
652
  model(SOURCE, conf=0.01)
650
653
 
651
- import numpy as np
652
-
653
654
  from ultralytics import YOLOE
654
655
  from ultralytics.models.yolo.yoloe import YOLOEVPSegPredictor
655
656
 
656
657
  # visual-prompts
657
658
  visuals = dict(
658
- bboxes=np.array(
659
- [[221.52, 405.8, 344.98, 857.54], [120, 425, 160, 445]],
660
- ),
659
+ bboxes=np.array([[221.52, 405.8, 344.98, 857.54], [120, 425, 160, 445]]),
661
660
  cls=np.array([0, 1]),
662
661
  )
663
662
  model.predict(
@@ -674,7 +673,7 @@ def test_yoloe():
674
673
  model.val(data="coco128-seg.yaml", load_vp=True, imgsz=32)
675
674
 
676
675
  # Train, fine-tune
677
- from ultralytics.models.yolo.yoloe import YOLOEPESegTrainer
676
+ from ultralytics.models.yolo.yoloe import YOLOEPESegTrainer, YOLOESegTrainerFromScratch
678
677
 
679
678
  model = YOLOE("yoloe-11s-seg.pt")
680
679
  model.train(
@@ -684,6 +683,15 @@ def test_yoloe():
684
683
  trainer=YOLOEPESegTrainer,
685
684
  imgsz=32,
686
685
  )
686
+ # Train, from scratch
687
+ model = YOLOE("yoloe-11s-seg.yaml")
688
+ model.train(
689
+ data=dict(train=dict(yolo_data=["coco128-seg.yaml"]), val=dict(yolo_data=["coco128-seg.yaml"])),
690
+ epochs=1,
691
+ close_mosaic=1,
692
+ trainer=YOLOESegTrainerFromScratch,
693
+ imgsz=32,
694
+ )
687
695
 
688
696
  # prompt-free
689
697
  # predict
tests/test_solutions.py CHANGED
@@ -12,8 +12,9 @@ import pytest
12
12
 
13
13
  from tests import MODEL, TMP
14
14
  from ultralytics import solutions
15
- from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, checks
15
+ from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, TORCH_VERSION, checks
16
16
  from ultralytics.utils.downloads import safe_download
17
+ from ultralytics.utils.torch_utils import TORCH_2_4
17
18
 
18
19
  # Pre-defined arguments values
19
20
  SHOW = False
@@ -205,15 +206,6 @@ def test_solution(name, solution_class, needs_frame_count, video, kwargs):
205
206
  )
206
207
 
207
208
 
208
- @pytest.mark.skipif(checks.IS_PYTHON_3_8, reason="Disabled due to unsupported CLIP dependencies.")
209
- @pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
210
- def test_similarity_search():
211
- """Test similarity search solution with sample images and text query."""
212
- safe_download(f"{ASSETS_URL}/4-imgs-similaritysearch.zip", dir=TMP) # 4 dog images for testing in a zip file
213
- searcher = solutions.VisualAISearch(data=str(TMP / "4-imgs-similaritysearch"))
214
- _ = searcher("a dog sitting on a bench") # Returns the results in format "- img name | similarity score"
215
-
216
-
217
209
  def test_left_click_selection():
218
210
  """Test distance calculation left click selection functionality."""
219
211
  dc = solutions.DistanceCalculation()
@@ -297,7 +289,16 @@ def test_streamlit_handle_video_upload_creates_file():
297
289
  os.remove("ultralytics.mp4")
298
290
 
299
291
 
300
- @pytest.mark.skipif(checks.IS_PYTHON_3_8, reason="Disabled due to unsupported CLIP dependencies.")
292
+ @pytest.mark.skipif(not TORCH_2_4, reason=f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})")
293
+ @pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
294
+ def test_similarity_search():
295
+ """Test similarity search solution with sample images and text query."""
296
+ safe_download(f"{ASSETS_URL}/4-imgs-similaritysearch.zip", dir=TMP) # 4 dog images for testing in a zip file
297
+ searcher = solutions.VisualAISearch(data=str(TMP / "4-imgs-similaritysearch"))
298
+ _ = searcher("a dog sitting on a bench") # Returns the results in format "- img name | similarity score"
299
+
300
+
301
+ @pytest.mark.skipif(not TORCH_2_4, reason=f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})")
301
302
  @pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
302
303
  def test_similarity_search_app_init():
303
304
  """Test SearchApp initializes with required attributes."""
@@ -306,6 +307,7 @@ def test_similarity_search_app_init():
306
307
  assert hasattr(app, "run")
307
308
 
308
309
 
310
+ @pytest.mark.skipif(not TORCH_2_4, reason=f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})")
309
311
  @pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
310
312
  def test_similarity_search_complete(tmp_path):
311
313
  """Test VisualAISearch end-to-end with sample image and query."""
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "8.3.202"
3
+ __version__ = "8.3.204"
4
4
 
5
5
  import importlib
6
6
  import os
ultralytics/data/build.py CHANGED
@@ -7,6 +7,7 @@ import random
7
7
  from collections.abc import Iterator
8
8
  from pathlib import Path
9
9
  from typing import Any
10
+ from urllib.parse import urlsplit
10
11
 
11
12
  import numpy as np
12
13
  import torch
@@ -247,8 +248,10 @@ def check_source(source):
247
248
  if isinstance(source, (str, int, Path)): # int for local usb camera
248
249
  source = str(source)
249
250
  source_lower = source.lower()
250
- is_file = source_lower.rpartition(".")[-1] in (IMG_FORMATS | VID_FORMATS)
251
251
  is_url = source_lower.startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://"))
252
+ is_file = (urlsplit(source_lower).path if is_url else source_lower).rpartition(".")[-1] in (
253
+ IMG_FORMATS | VID_FORMATS
254
+ )
252
255
  webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
253
256
  screenshot = source_lower == "screen"
254
257
  if is_url and is_file:
ultralytics/data/utils.py CHANGED
@@ -512,6 +512,11 @@ def check_cls_dataset(dataset: str | Path, split: str = "") -> dict[str, Any]:
512
512
  dataset = Path(dataset)
513
513
  data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve()
514
514
  if not data_dir.is_dir():
515
+ if data_dir.suffix != "":
516
+ raise ValueError(
517
+ f'Classification datasets must be a directory (data="path/to/dir") not a file (data="{dataset}"), '
518
+ "See https://docs.ultralytics.com/datasets/classify/"
519
+ )
515
520
  LOGGER.info("")
516
521
  LOGGER.warning(f"Dataset not found, missing path {data_dir}, attempting download...")
517
522
  t = time.time()
@@ -112,7 +112,7 @@ from ultralytics.utils.metrics import batch_probiou
112
112
  from ultralytics.utils.nms import TorchNMS
113
113
  from ultralytics.utils.ops import Profile
114
114
  from ultralytics.utils.patches import arange_patch
115
- from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device
115
+ from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13, TORCH_2_1, TORCH_2_4, select_device
116
116
 
117
117
 
118
118
  def export_formats():
@@ -152,6 +152,34 @@ def export_formats():
152
152
  return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU", "Arguments"], zip(*x)))
153
153
 
154
154
 
155
+ def best_onnx_opset(onnx, cuda=False) -> int:
156
+ """Return max ONNX opset for this torch version with ONNX fallback."""
157
+ version = ".".join(TORCH_VERSION.split(".")[:2])
158
+ if TORCH_2_4: # _constants.ONNX_MAX_OPSET first defined in torch 1.13
159
+ opset = torch.onnx.utils._constants.ONNX_MAX_OPSET - 1 # use second-latest version for safety
160
+ if cuda:
161
+ opset -= 2 # fix CUDA ONNXRuntime NMS squeeze op errors
162
+ else:
163
+ opset = {
164
+ "1.8": 12,
165
+ "1.9": 12,
166
+ "1.10": 13,
167
+ "1.11": 14,
168
+ "1.12": 15,
169
+ "1.13": 17,
170
+ "2.0": 17, # reduced from 18 to fix ONNX errors
171
+ "2.1": 17, # reduced from 19
172
+ "2.2": 17, # reduced from 19
173
+ "2.3": 17, # reduced from 19
174
+ "2.4": 20,
175
+ "2.5": 20,
176
+ "2.6": 20,
177
+ "2.7": 20,
178
+ "2.8": 23,
179
+ }.get(version, 12)
180
+ return min(opset, onnx.defs.onnx_opset_version())
181
+
182
+
155
183
  def validate_args(format, passed_args, valid_args):
156
184
  """
157
185
  Validate arguments based on the export format.
@@ -355,6 +383,8 @@ class Exporter:
355
383
  if self.args.nms:
356
384
  assert not isinstance(model, ClassificationModel), "'nms=True' is not valid for classification models."
357
385
  assert not tflite or not ARM64 or not LINUX, "TFLite export with NMS unsupported on ARM64 Linux"
386
+ assert not is_tf_format or TORCH_1_13, "TensorFlow exports with NMS require torch>=1.13"
387
+ assert not onnx or TORCH_1_13, "ONNX export with NMS requires torch>=1.13"
358
388
  if getattr(model, "end2end", False):
359
389
  LOGGER.warning("'nms=True' is not available for end2end models. Forcing 'nms=False'.")
360
390
  self.args.nms = False
@@ -586,8 +616,11 @@ class Exporter:
586
616
  check_requirements(requirements)
587
617
  import onnx # noqa
588
618
 
589
- opset_version = self.args.opset or get_latest_opset()
590
- LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...")
619
+ opset = self.args.opset or best_onnx_opset(onnx, cuda="cuda" in self.device.type)
620
+ LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset}...")
621
+ if self.args.nms:
622
+ assert TORCH_1_13, f"'nms=True' ONNX export requires torch>=1.13 (found torch=={TORCH_VERSION})"
623
+
591
624
  f = str(self.file.with_suffix(".onnx"))
592
625
  output_names = ["output0", "output1"] if isinstance(self.model, SegmentationModel) else ["output0"]
593
626
  dynamic = self.args.dynamic
@@ -601,14 +634,14 @@ class Exporter:
601
634
  if self.args.nms: # only batch size is dynamic with NMS
602
635
  dynamic["output0"].pop(2)
603
636
  if self.args.nms and self.model.task == "obb":
604
- self.args.opset = opset_version # for NMSModel
637
+ self.args.opset = opset # for NMSModel
605
638
 
606
639
  with arange_patch(self.args):
607
640
  torch2onnx(
608
641
  NMSModel(self.model, self.args) if self.args.nms else self.model,
609
642
  self.im,
610
643
  f,
611
- opset=opset_version,
644
+ opset=opset,
612
645
  input_names=["images"],
613
646
  output_names=output_names,
614
647
  dynamic=dynamic or None,
@@ -633,6 +666,11 @@ class Exporter:
633
666
  meta = model_onnx.metadata_props.add()
634
667
  meta.key, meta.value = k, str(v)
635
668
 
669
+ # IR version
670
+ if getattr(model_onnx, "ir_version", 0) > 10:
671
+ LOGGER.info(f"{prefix} limiting IR version {model_onnx.ir_version} to 10 for ONNXRuntime compatibility...")
672
+ model_onnx.ir_version = 10
673
+
636
674
  onnx.save(model_onnx, f)
637
675
  return f
638
676
 
@@ -644,7 +682,7 @@ class Exporter:
644
682
  import openvino as ov
645
683
 
646
684
  LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
647
- assert TORCH_1_13, f"OpenVINO export requires torch>=1.13.0 but torch=={TORCH_VERSION} is installed"
685
+ assert TORCH_2_1, f"OpenVINO export requires torch>=2.1 but torch=={TORCH_VERSION} is installed"
648
686
  ov_model = ov.convert_model(
649
687
  NMSModel(self.model, self.args) if self.args.nms else self.model,
650
688
  input=None if self.args.dynamic else [self.im.shape],
@@ -837,6 +875,7 @@ class Exporter:
837
875
 
838
876
  LOGGER.info(f"\n{prefix} starting export with coremltools {ct.__version__}...")
839
877
  assert not WINDOWS, "CoreML export is not supported on Windows, please run on macOS or Linux."
878
+ assert TORCH_1_11, "CoreML export requires torch>=1.11"
840
879
  assert self.args.batch == 1, "CoreML batch sizes > 1 are not supported. Please retry at 'batch=1'."
841
880
  f = self.file.with_suffix(".mlmodel" if mlmodel else ".mlpackage")
842
881
  if f.is_dir():
@@ -123,7 +123,7 @@ class BaseTrainer:
123
123
  self.hub_session = overrides.pop("session", None) # HUB
124
124
  self.args = get_cfg(cfg, overrides)
125
125
  self.check_resume(overrides)
126
- self.device = select_device(self.args.device, self.args.batch)
126
+ self.device = select_device(self.args.device)
127
127
  # Update "-1" devices so post-training val does not repeat search
128
128
  self.args.device = os.getenv("CUDA_VISIBLE_DEVICES") if "cuda" in str(self.device) else str(self.device)
129
129
  self.validator = None
@@ -216,10 +216,10 @@ class BaseTrainer:
216
216
  LOGGER.warning("'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'")
217
217
  self.args.rect = False
218
218
  if self.args.batch < 1.0:
219
- LOGGER.warning(
220
- "'batch<1' for AutoBatch is incompatible with Multi-GPU training, setting default 'batch=16'"
219
+ raise ValueError(
220
+ "AutoBatch with batch<1 not supported for Multi-GPU training, "
221
+ f"please specify a valid batch size multiple of GPU count {self.world_size}, i.e. batch={self.world_size * 8}."
221
222
  )
222
- self.args.batch = 16
223
223
 
224
224
  # Command
225
225
  cmd, file = generate_ddp_command(self)
@@ -260,10 +260,6 @@ class BaseTrainer:
260
260
  self.model = self.model.to(self.device)
261
261
  self.set_model_attributes()
262
262
 
263
- # Initialize loss criterion before compilation for torch.compile compatibility
264
- if hasattr(self.model, "init_criterion"):
265
- self.model.criterion = self.model.init_criterion()
266
-
267
263
  # Compile model
268
264
  self.model = attempt_compile(self.model, device=self.device, mode=self.args.compile)
269
265
 
@@ -415,9 +411,12 @@ class BaseTrainer:
415
411
  # Forward
416
412
  with autocast(self.amp):
417
413
  batch = self.preprocess_batch(batch)
418
- # decouple inference and loss calculations for torch.compile convenience
419
- preds = self.model(batch["img"])
420
- loss, self.loss_items = unwrap_model(self.model).loss(batch, preds)
414
+ if self.args.compile:
415
+ # Decouple inference and loss calculations for improved compile performance
416
+ preds = self.model(batch["img"])
417
+ loss, self.loss_items = unwrap_model(self.model).loss(batch, preds)
418
+ else:
419
+ loss, self.loss_items = self.model(batch)
421
420
  self.loss = loss.sum()
422
421
  if RANK != -1:
423
422
  self.loss *= self.world_size
@@ -581,6 +580,7 @@ class BaseTrainer:
581
580
  "ema": deepcopy(unwrap_model(self.ema.ema)).half(),
582
581
  "updates": self.ema.updates,
583
582
  "optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())),
583
+ "scaler": self.scaler.state_dict(),
584
584
  "train_args": vars(self.args), # save as dict
585
585
  "train_metrics": {**self.metrics, **{"fitness": self.fitness}},
586
586
  "train_results": self.read_results_csv(),
@@ -809,9 +809,11 @@ class BaseTrainer:
809
809
  return
810
810
  best_fitness = 0.0
811
811
  start_epoch = ckpt.get("epoch", -1) + 1
812
- if ckpt.get("optimizer", None) is not None:
812
+ if ckpt.get("optimizer") is not None:
813
813
  self.optimizer.load_state_dict(ckpt["optimizer"]) # optimizer
814
814
  best_fitness = ckpt["best_fitness"]
815
+ if ckpt.get("scaler") is not None:
816
+ self.scaler.load_state_dict(ckpt["scaler"])
815
817
  if self.ema and ckpt.get("ema"):
816
818
  self.ema.ema.load_state_dict(ckpt["ema"].float().state_dict()) # EMA
817
819
  self.ema.updates = ckpt["updates"]
@@ -435,7 +435,7 @@ class Tuner:
435
435
  best_metrics = {k: round(v, 5) for k, v in metrics.items()}
436
436
  for ckpt in weights_dir.glob("*.pt"):
437
437
  shutil.copy2(ckpt, self.tune_dir / "weights")
438
- elif cleanup:
438
+ elif cleanup and best_save_dir:
439
439
  shutil.rmtree(best_save_dir, ignore_errors=True) # remove iteration dirs to reduce storage space
440
440
 
441
441
  # Plot tune results
@@ -160,7 +160,7 @@ class BaseValidator:
160
160
  callbacks.add_integration_callbacks(self)
161
161
  model = AutoBackend(
162
162
  model=model or self.args.model,
163
- device=select_device(self.args.device, self.args.batch),
163
+ device=select_device(self.args.device),
164
164
  dnn=self.args.dnn,
165
165
  data=self.args.data,
166
166
  fp16=self.args.half,
@@ -7,6 +7,7 @@ from ultralytics.models.yolo.segment import SegmentationPredictor
7
7
  from ultralytics.utils import DEFAULT_CFG, checks
8
8
  from ultralytics.utils.metrics import box_iou
9
9
  from ultralytics.utils.ops import scale_masks
10
+ from ultralytics.utils.torch_utils import TORCH_1_10
10
11
 
11
12
  from .utils import adjust_bboxes_to_image_border
12
13
 
@@ -135,7 +136,7 @@ class FastSAMPredictor(SegmentationPredictor):
135
136
  crop_ims, filter_idx = [], []
136
137
  for i, b in enumerate(result.boxes.xyxy.tolist()):
137
138
  x1, y1, x2, y2 = (int(x) for x in b)
138
- if masks[i].sum() <= 100:
139
+ if (masks[i].sum() if TORCH_1_10 else masks[i].sum(0).sum()) <= 100: # torch 1.9 bug workaround
139
140
  filter_idx.append(i)
140
141
  continue
141
142
  crop_ims.append(Image.fromarray(result.orig_img[y1:y2, x1:x2, ::-1]))
@@ -11,6 +11,7 @@ References:
11
11
 
12
12
  from ultralytics.engine.model import Model
13
13
  from ultralytics.nn.tasks import RTDETRDetectionModel
14
+ from ultralytics.utils.torch_utils import TORCH_1_11
14
15
 
15
16
  from .predict import RTDETRPredictor
16
17
  from .train import RTDETRTrainer
@@ -44,6 +45,7 @@ class RTDETR(Model):
44
45
  Args:
45
46
  model (str): Path to the pre-trained model. Supports .pt, .yaml, and .yml formats.
46
47
  """
48
+ assert TORCH_1_11, "RTDETR requires torch>=1.11"
47
49
  super().__init__(model=model, task="detect")
48
50
 
49
51
  @property
@@ -712,7 +712,7 @@ class SAM2Model(torch.nn.Module):
712
712
  continue # skip padding frames
713
713
  # "maskmem_features" might have been offloaded to CPU in demo use cases,
714
714
  # so we load it back to inference device (it's a no-op if it's already on device).
715
- feats = prev["maskmem_features"].to(device=device, non_blocking=True)
715
+ feats = prev["maskmem_features"].to(device=device, non_blocking=device.type == "cuda")
716
716
  to_cat_memory.append(feats.flatten(2).permute(2, 0, 1))
717
717
  # Spatial positional encoding (it might have been offloaded to CPU in eval)
718
718
  maskmem_enc = prev["maskmem_pos_enc"][-1].to(device=device)
@@ -1126,7 +1126,9 @@ class SAM2VideoPredictor(SAM2Predictor):
1126
1126
  )
1127
1127
 
1128
1128
  if prev_out is not None and prev_out.get("pred_masks") is not None:
1129
- prev_sam_mask_logits = prev_out["pred_masks"].to(device=self.device, non_blocking=True)
1129
+ prev_sam_mask_logits = prev_out["pred_masks"].to(
1130
+ device=self.device, non_blocking=self.device.type == "cuda"
1131
+ )
1130
1132
  # Clamp the scale of prev_sam_mask_logits to avoid rare numerical issues.
1131
1133
  prev_sam_mask_logits.clamp_(-32.0, 32.0)
1132
1134
  current_out = self._run_single_frame_inference(
@@ -1418,12 +1420,12 @@ class SAM2VideoPredictor(SAM2Predictor):
1418
1420
  maskmem_features = current_out["maskmem_features"]
1419
1421
  if maskmem_features is not None:
1420
1422
  current_out["maskmem_features"] = maskmem_features.to(
1421
- dtype=torch.float16, device=self.device, non_blocking=True
1423
+ dtype=torch.float16, device=self.device, non_blocking=self.device.type == "cuda"
1422
1424
  )
1423
1425
  # NOTE: Do not support the `fill_holes_in_mask_scores` function since it needs cuda extensions
1424
1426
  # potentially fill holes in the predicted masks
1425
1427
  # if self.fill_hole_area > 0:
1426
- # pred_masks = current_out["pred_masks"].to(self.device, non_blocking=True)
1428
+ # pred_masks = current_out["pred_masks"].to(self.device, non_blocking=self.device.type == "cuda")
1427
1429
  # pred_masks = fill_holes_in_mask_scores(pred_masks, self.fill_hole_area)
1428
1430
 
1429
1431
  # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
@@ -1636,7 +1638,9 @@ class SAM2VideoPredictor(SAM2Predictor):
1636
1638
 
1637
1639
  # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
1638
1640
  maskmem_pos_enc = self._get_maskmem_pos_enc(maskmem_pos_enc)
1639
- return maskmem_features.to(dtype=torch.float16, device=self.device, non_blocking=True), maskmem_pos_enc
1641
+ return maskmem_features.to(
1642
+ dtype=torch.float16, device=self.device, non_blocking=self.device.type == "cuda"
1643
+ ), maskmem_pos_enc
1640
1644
 
1641
1645
  def _add_output_per_object(self, frame_idx, current_out, storage_key):
1642
1646
  """
@@ -1906,7 +1910,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1906
1910
  consolidated_out["object_score_logits"][obj_idx : obj_idx + 1] = out["object_score_logits"]
1907
1911
 
1908
1912
  high_res_masks = F.interpolate(
1909
- consolidated_out["pred_masks"].to(self.device, non_blocking=True),
1913
+ consolidated_out["pred_masks"].to(self.device, non_blocking=self.device.type == "cuda"),
1910
1914
  size=self.imgsz,
1911
1915
  mode="bilinear",
1912
1916
  align_corners=False,
@@ -155,8 +155,8 @@ class ClassificationTrainer(BaseTrainer):
155
155
 
156
156
  def preprocess_batch(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
157
157
  """Preprocess a batch of images and classes."""
158
- batch["img"] = batch["img"].to(self.device, non_blocking=True)
159
- batch["cls"] = batch["cls"].to(self.device, non_blocking=True)
158
+ batch["img"] = batch["img"].to(self.device, non_blocking=self.device.type == "cuda")
159
+ batch["cls"] = batch["cls"].to(self.device, non_blocking=self.device.type == "cuda")
160
160
  return batch
161
161
 
162
162
  def progress_string(self) -> str:
@@ -89,9 +89,9 @@ class ClassificationValidator(BaseValidator):
89
89
 
90
90
  def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
91
91
  """Preprocess input batch by moving data to device and converting to appropriate dtype."""
92
- batch["img"] = batch["img"].to(self.device, non_blocking=True)
92
+ batch["img"] = batch["img"].to(self.device, non_blocking=self.device.type == "cuda")
93
93
  batch["img"] = batch["img"].half() if self.args.half else batch["img"].float()
94
- batch["cls"] = batch["cls"].to(self.device, non_blocking=True)
94
+ batch["cls"] = batch["cls"].to(self.device, non_blocking=self.device.type == "cuda")
95
95
  return batch
96
96
 
97
97
  def update_metrics(self, preds: torch.Tensor, batch: dict[str, Any]) -> None:
@@ -120,7 +120,7 @@ class DetectionTrainer(BaseTrainer):
120
120
  """
121
121
  for k, v in batch.items():
122
122
  if isinstance(v, torch.Tensor):
123
- batch[k] = v.to(self.device, non_blocking=True)
123
+ batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
124
124
  batch["img"] = batch["img"].float() / 255
125
125
  if self.args.multi_scale:
126
126
  imgs = batch["img"]
@@ -73,7 +73,7 @@ class DetectionValidator(BaseValidator):
73
73
  """
74
74
  for k, v in batch.items():
75
75
  if isinstance(v, torch.Tensor):
76
- batch[k] = v.to(self.device, non_blocking=True)
76
+ batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
77
77
  batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
78
78
  return batch
79
79