ultralytics 8.3.202__py3-none-any.whl → 8.3.204__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_cli.py +7 -9
- tests/test_cuda.py +4 -1
- tests/test_exports.py +7 -7
- tests/test_python.py +18 -10
- tests/test_solutions.py +13 -11
- ultralytics/__init__.py +1 -1
- ultralytics/data/build.py +4 -1
- ultralytics/data/utils.py +5 -0
- ultralytics/engine/exporter.py +45 -6
- ultralytics/engine/trainer.py +14 -12
- ultralytics/engine/tuner.py +1 -1
- ultralytics/engine/validator.py +1 -1
- ultralytics/models/fastsam/predict.py +2 -1
- ultralytics/models/rtdetr/model.py +2 -0
- ultralytics/models/sam/modules/sam.py +1 -1
- ultralytics/models/sam/predict.py +9 -5
- ultralytics/models/yolo/classify/train.py +2 -2
- ultralytics/models/yolo/classify/val.py +2 -2
- ultralytics/models/yolo/detect/train.py +1 -1
- ultralytics/models/yolo/detect/val.py +1 -1
- ultralytics/models/yolo/model.py +1 -0
- ultralytics/models/yolo/world/train.py +4 -2
- ultralytics/models/yolo/yoloe/train.py +1 -13
- ultralytics/nn/autobackend.py +1 -1
- ultralytics/nn/modules/head.py +3 -3
- ultralytics/nn/modules/transformer.py +3 -1
- ultralytics/solutions/similarity_search.py +3 -2
- ultralytics/solutions/streamlit_inference.py +2 -3
- ultralytics/utils/checks.py +27 -0
- ultralytics/utils/metrics.py +3 -3
- ultralytics/utils/tal.py +3 -5
- ultralytics/utils/torch_utils.py +5 -34
- {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/METADATA +21 -21
- {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/RECORD +38 -38
- {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.202.dist-info → ultralytics-8.3.204.dist-info}/top_level.txt +0 -0
tests/test_cli.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
import subprocess
|
4
|
+
from pathlib import Path
|
4
5
|
|
5
6
|
import pytest
|
6
7
|
from PIL import Image
|
7
8
|
|
8
9
|
from tests import CUDA_DEVICE_COUNT, CUDA_IS_AVAILABLE, MODELS, TASK_MODEL_DATA
|
9
10
|
from ultralytics.utils import ARM64, ASSETS, LINUX, WEIGHTS_DIR, checks
|
10
|
-
from ultralytics.utils.torch_utils import
|
11
|
+
from ultralytics.utils.torch_utils import TORCH_1_11
|
11
12
|
|
12
13
|
|
13
14
|
def run(cmd: str) -> None:
|
@@ -33,7 +34,7 @@ def test_train(task: str, model: str, data: str) -> None:
|
|
33
34
|
@pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
|
34
35
|
def test_val(task: str, model: str, data: str) -> None:
|
35
36
|
"""Test YOLO validation process for specified task, model, and data using a shell command."""
|
36
|
-
run(f"yolo val {task} model={model} data={data} imgsz=32 save_txt save_json")
|
37
|
+
run(f"yolo val {task} model={model} data={data} imgsz=32 save_txt save_json visualize")
|
37
38
|
|
38
39
|
|
39
40
|
@pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA)
|
@@ -48,15 +49,12 @@ def test_export(model: str) -> None:
|
|
48
49
|
run(f"yolo export model={model} format=torchscript imgsz=32")
|
49
50
|
|
50
51
|
|
51
|
-
|
52
|
+
@pytest.mark.skipif(not TORCH_1_11, reason="RTDETR requires torch>=1.11")
|
53
|
+
def test_rtdetr(task: str = "detect", model: Path = WEIGHTS_DIR / "rtdetr-l.pt", data: str = "coco8.yaml") -> None:
|
52
54
|
"""Test the RTDETR functionality within Ultralytics for detection tasks using specified model and data."""
|
53
|
-
#
|
54
|
-
run(f"yolo train {task} model={model} data={data} --imgsz= 160 epochs =1, cache = disk fraction=0.25") # spaces
|
55
|
+
# Add comma, spaces, fraction=0.25 args to test single-image training
|
55
56
|
run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
|
56
|
-
|
57
|
-
weights = WEIGHTS_DIR / "rtdetr-l.pt"
|
58
|
-
run(f"yolo predict {task} model={weights} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
|
59
|
-
run(f"yolo train {task} model={weights} epochs=1 imgsz=160 cache=disk data=coco8.yaml")
|
57
|
+
run(f"yolo train {task} model={model} data={data} --imgsz= 160 epochs =1, cache = disk fraction=0.25")
|
60
58
|
|
61
59
|
|
62
60
|
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="MobileSAM with CLIP is not supported in Python 3.12")
|
tests/test_cuda.py
CHANGED
@@ -70,6 +70,7 @@ def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
|
|
70
70
|
simplify=simplify,
|
71
71
|
nms=nms,
|
72
72
|
device=DEVICES[0],
|
73
|
+
# opset=20 if nms else None, # fix ONNX Runtime errors with NMS
|
73
74
|
)
|
74
75
|
YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32, device=DEVICES[0]) # exported model inference
|
75
76
|
Path(file).unlink() # cleanup
|
@@ -114,7 +115,9 @@ def test_train():
|
|
114
115
|
device = tuple(DEVICES) if len(DEVICES) > 1 else DEVICES[0]
|
115
116
|
# NVIDIA Jetson only has one GPU and therefore skipping checks
|
116
117
|
if not IS_JETSON:
|
117
|
-
results = YOLO(MODEL).train(
|
118
|
+
results = YOLO(MODEL).train(
|
119
|
+
data="coco8.yaml", imgsz=64, epochs=1, device=device, batch=15
|
120
|
+
) # requires imgsz>=64
|
118
121
|
visible = eval(os.environ["CUDA_VISIBLE_DEVICES"])
|
119
122
|
assert visible == device, f"Passed GPUs '{device}', but used GPUs '{visible}'"
|
120
123
|
assert (
|
tests/test_exports.py
CHANGED
@@ -20,7 +20,7 @@ from ultralytics.utils import (
|
|
20
20
|
WINDOWS,
|
21
21
|
checks,
|
22
22
|
)
|
23
|
-
from ultralytics.utils.torch_utils import
|
23
|
+
from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13, TORCH_2_1
|
24
24
|
|
25
25
|
|
26
26
|
def test_export_torchscript():
|
@@ -35,7 +35,7 @@ def test_export_onnx():
|
|
35
35
|
YOLO(file)(SOURCE, imgsz=32) # exported model inference
|
36
36
|
|
37
37
|
|
38
|
-
@pytest.mark.skipif(not
|
38
|
+
@pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
|
39
39
|
def test_export_openvino():
|
40
40
|
"""Test YOLO export to OpenVINO format for model inference compatibility."""
|
41
41
|
file = YOLO(MODEL).export(format="openvino", imgsz=32)
|
@@ -43,7 +43,7 @@ def test_export_openvino():
|
|
43
43
|
|
44
44
|
|
45
45
|
@pytest.mark.slow
|
46
|
-
@pytest.mark.skipif(not
|
46
|
+
@pytest.mark.skipif(not TORCH_2_1, reason="OpenVINO requires torch>=2.1")
|
47
47
|
@pytest.mark.parametrize(
|
48
48
|
"task, dynamic, int8, half, batch, nms",
|
49
49
|
[ # generate all combinations except for exclusion cases
|
@@ -83,7 +83,7 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
|
|
83
83
|
for task, dynamic, int8, half, batch, simplify, nms in product(
|
84
84
|
TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
|
85
85
|
)
|
86
|
-
if not ((int8 and half) or (task == "classify" and nms) or (
|
86
|
+
if not ((int8 and half) or (task == "classify" and nms) or (nms and not TORCH_1_13))
|
87
87
|
],
|
88
88
|
)
|
89
89
|
def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
|
@@ -117,7 +117,7 @@ def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
|
|
117
117
|
|
118
118
|
@pytest.mark.slow
|
119
119
|
@pytest.mark.skipif(not MACOS, reason="CoreML inference only supported on macOS")
|
120
|
-
@pytest.mark.skipif(not
|
120
|
+
@pytest.mark.skipif(not TORCH_1_11, reason="CoreML export requires torch>=1.11")
|
121
121
|
@pytest.mark.skipif(checks.IS_PYTHON_3_13, reason="CoreML not supported in Python 3.13")
|
122
122
|
@pytest.mark.parametrize(
|
123
123
|
"task, dynamic, int8, half, nms, batch",
|
@@ -157,7 +157,7 @@ def test_export_coreml_matrix(task, dynamic, int8, half, nms, batch):
|
|
157
157
|
for task, dynamic, int8, half, batch, nms in product(
|
158
158
|
TASKS, [False], [True, False], [True, False], [1], [True, False]
|
159
159
|
)
|
160
|
-
if not ((int8 and half) or (task == "classify" and nms) or (ARM64 and nms))
|
160
|
+
if not ((int8 and half) or (task == "classify" and nms) or (ARM64 and nms) or (nms and not TORCH_1_13))
|
161
161
|
],
|
162
162
|
)
|
163
163
|
def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms):
|
@@ -169,7 +169,7 @@ def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms):
|
|
169
169
|
Path(file).unlink() # cleanup
|
170
170
|
|
171
171
|
|
172
|
-
@pytest.mark.skipif(not
|
172
|
+
@pytest.mark.skipif(not TORCH_1_11, reason="CoreML export requires torch>=1.11")
|
173
173
|
@pytest.mark.skipif(WINDOWS, reason="CoreML not supported on Windows") # RuntimeError: BlobWriter not loaded
|
174
174
|
@pytest.mark.skipif(LINUX and ARM64, reason="CoreML not supported on aarch64 Linux")
|
175
175
|
@pytest.mark.skipif(checks.IS_PYTHON_3_13, reason="CoreML not supported in Python 3.13")
|
tests/test_python.py
CHANGED
@@ -34,7 +34,7 @@ from ultralytics.utils import (
|
|
34
34
|
is_github_action_running,
|
35
35
|
)
|
36
36
|
from ultralytics.utils.downloads import download
|
37
|
-
from ultralytics.utils.torch_utils import
|
37
|
+
from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13
|
38
38
|
|
39
39
|
IS_TMP_WRITEABLE = is_dir_writeable(TMP) # WARNING: must be run once tests start as TMP does not exist on tests/init
|
40
40
|
|
@@ -125,7 +125,9 @@ def test_predict_img(model_name):
|
|
125
125
|
batch = [
|
126
126
|
str(SOURCE), # filename
|
127
127
|
Path(SOURCE), # Path
|
128
|
-
"https://github.com/ultralytics/assets/releases/download/v0.0.0/zidane.jpg"
|
128
|
+
"https://github.com/ultralytics/assets/releases/download/v0.0.0/zidane.jpg?token=123"
|
129
|
+
if ONLINE
|
130
|
+
else SOURCE, # URI
|
129
131
|
im, # OpenCV
|
130
132
|
Image.open(SOURCE), # PIL
|
131
133
|
np.zeros((320, 640, channels), dtype=np.uint8), # numpy
|
@@ -246,7 +248,7 @@ def test_all_model_yamls():
|
|
246
248
|
"""Test YOLO model creation for all available YAML configurations in the `cfg/models` directory."""
|
247
249
|
for m in (ROOT / "cfg" / "models").rglob("*.yaml"):
|
248
250
|
if "rtdetr" in m.name:
|
249
|
-
if
|
251
|
+
if TORCH_1_11:
|
250
252
|
_ = RTDETR(m.name)(SOURCE, imgsz=640) # must be 640
|
251
253
|
else:
|
252
254
|
YOLO(m.name)
|
@@ -634,7 +636,8 @@ def test_yolo_world():
|
|
634
636
|
)
|
635
637
|
|
636
638
|
|
637
|
-
@pytest.mark.skipif(
|
639
|
+
@pytest.mark.skipif(not TORCH_1_13, reason="YOLOE with CLIP requires torch>=1.13")
|
640
|
+
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="YOLOE with CLIP is not supported in Python 3.12")
|
638
641
|
@pytest.mark.skipif(
|
639
642
|
checks.IS_PYTHON_3_8 and LINUX and ARM64,
|
640
643
|
reason="YOLOE with CLIP is not supported in Python 3.8 and aarch64 Linux",
|
@@ -648,16 +651,12 @@ def test_yoloe():
|
|
648
651
|
model.set_classes(names, model.get_text_pe(names))
|
649
652
|
model(SOURCE, conf=0.01)
|
650
653
|
|
651
|
-
import numpy as np
|
652
|
-
|
653
654
|
from ultralytics import YOLOE
|
654
655
|
from ultralytics.models.yolo.yoloe import YOLOEVPSegPredictor
|
655
656
|
|
656
657
|
# visual-prompts
|
657
658
|
visuals = dict(
|
658
|
-
bboxes=np.array(
|
659
|
-
[[221.52, 405.8, 344.98, 857.54], [120, 425, 160, 445]],
|
660
|
-
),
|
659
|
+
bboxes=np.array([[221.52, 405.8, 344.98, 857.54], [120, 425, 160, 445]]),
|
661
660
|
cls=np.array([0, 1]),
|
662
661
|
)
|
663
662
|
model.predict(
|
@@ -674,7 +673,7 @@ def test_yoloe():
|
|
674
673
|
model.val(data="coco128-seg.yaml", load_vp=True, imgsz=32)
|
675
674
|
|
676
675
|
# Train, fine-tune
|
677
|
-
from ultralytics.models.yolo.yoloe import YOLOEPESegTrainer
|
676
|
+
from ultralytics.models.yolo.yoloe import YOLOEPESegTrainer, YOLOESegTrainerFromScratch
|
678
677
|
|
679
678
|
model = YOLOE("yoloe-11s-seg.pt")
|
680
679
|
model.train(
|
@@ -684,6 +683,15 @@ def test_yoloe():
|
|
684
683
|
trainer=YOLOEPESegTrainer,
|
685
684
|
imgsz=32,
|
686
685
|
)
|
686
|
+
# Train, from scratch
|
687
|
+
model = YOLOE("yoloe-11s-seg.yaml")
|
688
|
+
model.train(
|
689
|
+
data=dict(train=dict(yolo_data=["coco128-seg.yaml"]), val=dict(yolo_data=["coco128-seg.yaml"])),
|
690
|
+
epochs=1,
|
691
|
+
close_mosaic=1,
|
692
|
+
trainer=YOLOESegTrainerFromScratch,
|
693
|
+
imgsz=32,
|
694
|
+
)
|
687
695
|
|
688
696
|
# prompt-free
|
689
697
|
# predict
|
tests/test_solutions.py
CHANGED
@@ -12,8 +12,9 @@ import pytest
|
|
12
12
|
|
13
13
|
from tests import MODEL, TMP
|
14
14
|
from ultralytics import solutions
|
15
|
-
from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, checks
|
15
|
+
from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, TORCH_VERSION, checks
|
16
16
|
from ultralytics.utils.downloads import safe_download
|
17
|
+
from ultralytics.utils.torch_utils import TORCH_2_4
|
17
18
|
|
18
19
|
# Pre-defined arguments values
|
19
20
|
SHOW = False
|
@@ -205,15 +206,6 @@ def test_solution(name, solution_class, needs_frame_count, video, kwargs):
|
|
205
206
|
)
|
206
207
|
|
207
208
|
|
208
|
-
@pytest.mark.skipif(checks.IS_PYTHON_3_8, reason="Disabled due to unsupported CLIP dependencies.")
|
209
|
-
@pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
|
210
|
-
def test_similarity_search():
|
211
|
-
"""Test similarity search solution with sample images and text query."""
|
212
|
-
safe_download(f"{ASSETS_URL}/4-imgs-similaritysearch.zip", dir=TMP) # 4 dog images for testing in a zip file
|
213
|
-
searcher = solutions.VisualAISearch(data=str(TMP / "4-imgs-similaritysearch"))
|
214
|
-
_ = searcher("a dog sitting on a bench") # Returns the results in format "- img name | similarity score"
|
215
|
-
|
216
|
-
|
217
209
|
def test_left_click_selection():
|
218
210
|
"""Test distance calculation left click selection functionality."""
|
219
211
|
dc = solutions.DistanceCalculation()
|
@@ -297,7 +289,16 @@ def test_streamlit_handle_video_upload_creates_file():
|
|
297
289
|
os.remove("ultralytics.mp4")
|
298
290
|
|
299
291
|
|
300
|
-
@pytest.mark.skipif(
|
292
|
+
@pytest.mark.skipif(not TORCH_2_4, reason=f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})")
|
293
|
+
@pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
|
294
|
+
def test_similarity_search():
|
295
|
+
"""Test similarity search solution with sample images and text query."""
|
296
|
+
safe_download(f"{ASSETS_URL}/4-imgs-similaritysearch.zip", dir=TMP) # 4 dog images for testing in a zip file
|
297
|
+
searcher = solutions.VisualAISearch(data=str(TMP / "4-imgs-similaritysearch"))
|
298
|
+
_ = searcher("a dog sitting on a bench") # Returns the results in format "- img name | similarity score"
|
299
|
+
|
300
|
+
|
301
|
+
@pytest.mark.skipif(not TORCH_2_4, reason=f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})")
|
301
302
|
@pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
|
302
303
|
def test_similarity_search_app_init():
|
303
304
|
"""Test SearchApp initializes with required attributes."""
|
@@ -306,6 +307,7 @@ def test_similarity_search_app_init():
|
|
306
307
|
assert hasattr(app, "run")
|
307
308
|
|
308
309
|
|
310
|
+
@pytest.mark.skipif(not TORCH_2_4, reason=f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})")
|
309
311
|
@pytest.mark.skipif(IS_RASPBERRYPI, reason="Disabled due to slow performance on Raspberry Pi.")
|
310
312
|
def test_similarity_search_complete(tmp_path):
|
311
313
|
"""Test VisualAISearch end-to-end with sample image and query."""
|
ultralytics/__init__.py
CHANGED
ultralytics/data/build.py
CHANGED
@@ -7,6 +7,7 @@ import random
|
|
7
7
|
from collections.abc import Iterator
|
8
8
|
from pathlib import Path
|
9
9
|
from typing import Any
|
10
|
+
from urllib.parse import urlsplit
|
10
11
|
|
11
12
|
import numpy as np
|
12
13
|
import torch
|
@@ -247,8 +248,10 @@ def check_source(source):
|
|
247
248
|
if isinstance(source, (str, int, Path)): # int for local usb camera
|
248
249
|
source = str(source)
|
249
250
|
source_lower = source.lower()
|
250
|
-
is_file = source_lower.rpartition(".")[-1] in (IMG_FORMATS | VID_FORMATS)
|
251
251
|
is_url = source_lower.startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://"))
|
252
|
+
is_file = (urlsplit(source_lower).path if is_url else source_lower).rpartition(".")[-1] in (
|
253
|
+
IMG_FORMATS | VID_FORMATS
|
254
|
+
)
|
252
255
|
webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
|
253
256
|
screenshot = source_lower == "screen"
|
254
257
|
if is_url and is_file:
|
ultralytics/data/utils.py
CHANGED
@@ -512,6 +512,11 @@ def check_cls_dataset(dataset: str | Path, split: str = "") -> dict[str, Any]:
|
|
512
512
|
dataset = Path(dataset)
|
513
513
|
data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve()
|
514
514
|
if not data_dir.is_dir():
|
515
|
+
if data_dir.suffix != "":
|
516
|
+
raise ValueError(
|
517
|
+
f'Classification datasets must be a directory (data="path/to/dir") not a file (data="{dataset}"), '
|
518
|
+
"See https://docs.ultralytics.com/datasets/classify/"
|
519
|
+
)
|
515
520
|
LOGGER.info("")
|
516
521
|
LOGGER.warning(f"Dataset not found, missing path {data_dir}, attempting download...")
|
517
522
|
t = time.time()
|
ultralytics/engine/exporter.py
CHANGED
@@ -112,7 +112,7 @@ from ultralytics.utils.metrics import batch_probiou
|
|
112
112
|
from ultralytics.utils.nms import TorchNMS
|
113
113
|
from ultralytics.utils.ops import Profile
|
114
114
|
from ultralytics.utils.patches import arange_patch
|
115
|
-
from ultralytics.utils.torch_utils import TORCH_1_13,
|
115
|
+
from ultralytics.utils.torch_utils import TORCH_1_11, TORCH_1_13, TORCH_2_1, TORCH_2_4, select_device
|
116
116
|
|
117
117
|
|
118
118
|
def export_formats():
|
@@ -152,6 +152,34 @@ def export_formats():
|
|
152
152
|
return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU", "Arguments"], zip(*x)))
|
153
153
|
|
154
154
|
|
155
|
+
def best_onnx_opset(onnx, cuda=False) -> int:
|
156
|
+
"""Return max ONNX opset for this torch version with ONNX fallback."""
|
157
|
+
version = ".".join(TORCH_VERSION.split(".")[:2])
|
158
|
+
if TORCH_2_4: # _constants.ONNX_MAX_OPSET first defined in torch 1.13
|
159
|
+
opset = torch.onnx.utils._constants.ONNX_MAX_OPSET - 1 # use second-latest version for safety
|
160
|
+
if cuda:
|
161
|
+
opset -= 2 # fix CUDA ONNXRuntime NMS squeeze op errors
|
162
|
+
else:
|
163
|
+
opset = {
|
164
|
+
"1.8": 12,
|
165
|
+
"1.9": 12,
|
166
|
+
"1.10": 13,
|
167
|
+
"1.11": 14,
|
168
|
+
"1.12": 15,
|
169
|
+
"1.13": 17,
|
170
|
+
"2.0": 17, # reduced from 18 to fix ONNX errors
|
171
|
+
"2.1": 17, # reduced from 19
|
172
|
+
"2.2": 17, # reduced from 19
|
173
|
+
"2.3": 17, # reduced from 19
|
174
|
+
"2.4": 20,
|
175
|
+
"2.5": 20,
|
176
|
+
"2.6": 20,
|
177
|
+
"2.7": 20,
|
178
|
+
"2.8": 23,
|
179
|
+
}.get(version, 12)
|
180
|
+
return min(opset, onnx.defs.onnx_opset_version())
|
181
|
+
|
182
|
+
|
155
183
|
def validate_args(format, passed_args, valid_args):
|
156
184
|
"""
|
157
185
|
Validate arguments based on the export format.
|
@@ -355,6 +383,8 @@ class Exporter:
|
|
355
383
|
if self.args.nms:
|
356
384
|
assert not isinstance(model, ClassificationModel), "'nms=True' is not valid for classification models."
|
357
385
|
assert not tflite or not ARM64 or not LINUX, "TFLite export with NMS unsupported on ARM64 Linux"
|
386
|
+
assert not is_tf_format or TORCH_1_13, "TensorFlow exports with NMS require torch>=1.13"
|
387
|
+
assert not onnx or TORCH_1_13, "ONNX export with NMS requires torch>=1.13"
|
358
388
|
if getattr(model, "end2end", False):
|
359
389
|
LOGGER.warning("'nms=True' is not available for end2end models. Forcing 'nms=False'.")
|
360
390
|
self.args.nms = False
|
@@ -586,8 +616,11 @@ class Exporter:
|
|
586
616
|
check_requirements(requirements)
|
587
617
|
import onnx # noqa
|
588
618
|
|
589
|
-
|
590
|
-
LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {
|
619
|
+
opset = self.args.opset or best_onnx_opset(onnx, cuda="cuda" in self.device.type)
|
620
|
+
LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset}...")
|
621
|
+
if self.args.nms:
|
622
|
+
assert TORCH_1_13, f"'nms=True' ONNX export requires torch>=1.13 (found torch=={TORCH_VERSION})"
|
623
|
+
|
591
624
|
f = str(self.file.with_suffix(".onnx"))
|
592
625
|
output_names = ["output0", "output1"] if isinstance(self.model, SegmentationModel) else ["output0"]
|
593
626
|
dynamic = self.args.dynamic
|
@@ -601,14 +634,14 @@ class Exporter:
|
|
601
634
|
if self.args.nms: # only batch size is dynamic with NMS
|
602
635
|
dynamic["output0"].pop(2)
|
603
636
|
if self.args.nms and self.model.task == "obb":
|
604
|
-
self.args.opset =
|
637
|
+
self.args.opset = opset # for NMSModel
|
605
638
|
|
606
639
|
with arange_patch(self.args):
|
607
640
|
torch2onnx(
|
608
641
|
NMSModel(self.model, self.args) if self.args.nms else self.model,
|
609
642
|
self.im,
|
610
643
|
f,
|
611
|
-
opset=
|
644
|
+
opset=opset,
|
612
645
|
input_names=["images"],
|
613
646
|
output_names=output_names,
|
614
647
|
dynamic=dynamic or None,
|
@@ -633,6 +666,11 @@ class Exporter:
|
|
633
666
|
meta = model_onnx.metadata_props.add()
|
634
667
|
meta.key, meta.value = k, str(v)
|
635
668
|
|
669
|
+
# IR version
|
670
|
+
if getattr(model_onnx, "ir_version", 0) > 10:
|
671
|
+
LOGGER.info(f"{prefix} limiting IR version {model_onnx.ir_version} to 10 for ONNXRuntime compatibility...")
|
672
|
+
model_onnx.ir_version = 10
|
673
|
+
|
636
674
|
onnx.save(model_onnx, f)
|
637
675
|
return f
|
638
676
|
|
@@ -644,7 +682,7 @@ class Exporter:
|
|
644
682
|
import openvino as ov
|
645
683
|
|
646
684
|
LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
|
647
|
-
assert
|
685
|
+
assert TORCH_2_1, f"OpenVINO export requires torch>=2.1 but torch=={TORCH_VERSION} is installed"
|
648
686
|
ov_model = ov.convert_model(
|
649
687
|
NMSModel(self.model, self.args) if self.args.nms else self.model,
|
650
688
|
input=None if self.args.dynamic else [self.im.shape],
|
@@ -837,6 +875,7 @@ class Exporter:
|
|
837
875
|
|
838
876
|
LOGGER.info(f"\n{prefix} starting export with coremltools {ct.__version__}...")
|
839
877
|
assert not WINDOWS, "CoreML export is not supported on Windows, please run on macOS or Linux."
|
878
|
+
assert TORCH_1_11, "CoreML export requires torch>=1.11"
|
840
879
|
assert self.args.batch == 1, "CoreML batch sizes > 1 are not supported. Please retry at 'batch=1'."
|
841
880
|
f = self.file.with_suffix(".mlmodel" if mlmodel else ".mlpackage")
|
842
881
|
if f.is_dir():
|
ultralytics/engine/trainer.py
CHANGED
@@ -123,7 +123,7 @@ class BaseTrainer:
|
|
123
123
|
self.hub_session = overrides.pop("session", None) # HUB
|
124
124
|
self.args = get_cfg(cfg, overrides)
|
125
125
|
self.check_resume(overrides)
|
126
|
-
self.device = select_device(self.args.device
|
126
|
+
self.device = select_device(self.args.device)
|
127
127
|
# Update "-1" devices so post-training val does not repeat search
|
128
128
|
self.args.device = os.getenv("CUDA_VISIBLE_DEVICES") if "cuda" in str(self.device) else str(self.device)
|
129
129
|
self.validator = None
|
@@ -216,10 +216,10 @@ class BaseTrainer:
|
|
216
216
|
LOGGER.warning("'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'")
|
217
217
|
self.args.rect = False
|
218
218
|
if self.args.batch < 1.0:
|
219
|
-
|
220
|
-
"
|
219
|
+
raise ValueError(
|
220
|
+
"AutoBatch with batch<1 not supported for Multi-GPU training, "
|
221
|
+
f"please specify a valid batch size multiple of GPU count {self.world_size}, i.e. batch={self.world_size * 8}."
|
221
222
|
)
|
222
|
-
self.args.batch = 16
|
223
223
|
|
224
224
|
# Command
|
225
225
|
cmd, file = generate_ddp_command(self)
|
@@ -260,10 +260,6 @@ class BaseTrainer:
|
|
260
260
|
self.model = self.model.to(self.device)
|
261
261
|
self.set_model_attributes()
|
262
262
|
|
263
|
-
# Initialize loss criterion before compilation for torch.compile compatibility
|
264
|
-
if hasattr(self.model, "init_criterion"):
|
265
|
-
self.model.criterion = self.model.init_criterion()
|
266
|
-
|
267
263
|
# Compile model
|
268
264
|
self.model = attempt_compile(self.model, device=self.device, mode=self.args.compile)
|
269
265
|
|
@@ -415,9 +411,12 @@ class BaseTrainer:
|
|
415
411
|
# Forward
|
416
412
|
with autocast(self.amp):
|
417
413
|
batch = self.preprocess_batch(batch)
|
418
|
-
|
419
|
-
|
420
|
-
|
414
|
+
if self.args.compile:
|
415
|
+
# Decouple inference and loss calculations for improved compile performance
|
416
|
+
preds = self.model(batch["img"])
|
417
|
+
loss, self.loss_items = unwrap_model(self.model).loss(batch, preds)
|
418
|
+
else:
|
419
|
+
loss, self.loss_items = self.model(batch)
|
421
420
|
self.loss = loss.sum()
|
422
421
|
if RANK != -1:
|
423
422
|
self.loss *= self.world_size
|
@@ -581,6 +580,7 @@ class BaseTrainer:
|
|
581
580
|
"ema": deepcopy(unwrap_model(self.ema.ema)).half(),
|
582
581
|
"updates": self.ema.updates,
|
583
582
|
"optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())),
|
583
|
+
"scaler": self.scaler.state_dict(),
|
584
584
|
"train_args": vars(self.args), # save as dict
|
585
585
|
"train_metrics": {**self.metrics, **{"fitness": self.fitness}},
|
586
586
|
"train_results": self.read_results_csv(),
|
@@ -809,9 +809,11 @@ class BaseTrainer:
|
|
809
809
|
return
|
810
810
|
best_fitness = 0.0
|
811
811
|
start_epoch = ckpt.get("epoch", -1) + 1
|
812
|
-
if ckpt.get("optimizer"
|
812
|
+
if ckpt.get("optimizer") is not None:
|
813
813
|
self.optimizer.load_state_dict(ckpt["optimizer"]) # optimizer
|
814
814
|
best_fitness = ckpt["best_fitness"]
|
815
|
+
if ckpt.get("scaler") is not None:
|
816
|
+
self.scaler.load_state_dict(ckpt["scaler"])
|
815
817
|
if self.ema and ckpt.get("ema"):
|
816
818
|
self.ema.ema.load_state_dict(ckpt["ema"].float().state_dict()) # EMA
|
817
819
|
self.ema.updates = ckpt["updates"]
|
ultralytics/engine/tuner.py
CHANGED
@@ -435,7 +435,7 @@ class Tuner:
|
|
435
435
|
best_metrics = {k: round(v, 5) for k, v in metrics.items()}
|
436
436
|
for ckpt in weights_dir.glob("*.pt"):
|
437
437
|
shutil.copy2(ckpt, self.tune_dir / "weights")
|
438
|
-
elif cleanup:
|
438
|
+
elif cleanup and best_save_dir:
|
439
439
|
shutil.rmtree(best_save_dir, ignore_errors=True) # remove iteration dirs to reduce storage space
|
440
440
|
|
441
441
|
# Plot tune results
|
ultralytics/engine/validator.py
CHANGED
@@ -160,7 +160,7 @@ class BaseValidator:
|
|
160
160
|
callbacks.add_integration_callbacks(self)
|
161
161
|
model = AutoBackend(
|
162
162
|
model=model or self.args.model,
|
163
|
-
device=select_device(self.args.device
|
163
|
+
device=select_device(self.args.device),
|
164
164
|
dnn=self.args.dnn,
|
165
165
|
data=self.args.data,
|
166
166
|
fp16=self.args.half,
|
@@ -7,6 +7,7 @@ from ultralytics.models.yolo.segment import SegmentationPredictor
|
|
7
7
|
from ultralytics.utils import DEFAULT_CFG, checks
|
8
8
|
from ultralytics.utils.metrics import box_iou
|
9
9
|
from ultralytics.utils.ops import scale_masks
|
10
|
+
from ultralytics.utils.torch_utils import TORCH_1_10
|
10
11
|
|
11
12
|
from .utils import adjust_bboxes_to_image_border
|
12
13
|
|
@@ -135,7 +136,7 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
135
136
|
crop_ims, filter_idx = [], []
|
136
137
|
for i, b in enumerate(result.boxes.xyxy.tolist()):
|
137
138
|
x1, y1, x2, y2 = (int(x) for x in b)
|
138
|
-
if masks[i].sum() <= 100:
|
139
|
+
if (masks[i].sum() if TORCH_1_10 else masks[i].sum(0).sum()) <= 100: # torch 1.9 bug workaround
|
139
140
|
filter_idx.append(i)
|
140
141
|
continue
|
141
142
|
crop_ims.append(Image.fromarray(result.orig_img[y1:y2, x1:x2, ::-1]))
|
@@ -11,6 +11,7 @@ References:
|
|
11
11
|
|
12
12
|
from ultralytics.engine.model import Model
|
13
13
|
from ultralytics.nn.tasks import RTDETRDetectionModel
|
14
|
+
from ultralytics.utils.torch_utils import TORCH_1_11
|
14
15
|
|
15
16
|
from .predict import RTDETRPredictor
|
16
17
|
from .train import RTDETRTrainer
|
@@ -44,6 +45,7 @@ class RTDETR(Model):
|
|
44
45
|
Args:
|
45
46
|
model (str): Path to the pre-trained model. Supports .pt, .yaml, and .yml formats.
|
46
47
|
"""
|
48
|
+
assert TORCH_1_11, "RTDETR requires torch>=1.11"
|
47
49
|
super().__init__(model=model, task="detect")
|
48
50
|
|
49
51
|
@property
|
@@ -712,7 +712,7 @@ class SAM2Model(torch.nn.Module):
|
|
712
712
|
continue # skip padding frames
|
713
713
|
# "maskmem_features" might have been offloaded to CPU in demo use cases,
|
714
714
|
# so we load it back to inference device (it's a no-op if it's already on device).
|
715
|
-
feats = prev["maskmem_features"].to(device=device, non_blocking=
|
715
|
+
feats = prev["maskmem_features"].to(device=device, non_blocking=device.type == "cuda")
|
716
716
|
to_cat_memory.append(feats.flatten(2).permute(2, 0, 1))
|
717
717
|
# Spatial positional encoding (it might have been offloaded to CPU in eval)
|
718
718
|
maskmem_enc = prev["maskmem_pos_enc"][-1].to(device=device)
|
@@ -1126,7 +1126,9 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1126
1126
|
)
|
1127
1127
|
|
1128
1128
|
if prev_out is not None and prev_out.get("pred_masks") is not None:
|
1129
|
-
prev_sam_mask_logits = prev_out["pred_masks"].to(
|
1129
|
+
prev_sam_mask_logits = prev_out["pred_masks"].to(
|
1130
|
+
device=self.device, non_blocking=self.device.type == "cuda"
|
1131
|
+
)
|
1130
1132
|
# Clamp the scale of prev_sam_mask_logits to avoid rare numerical issues.
|
1131
1133
|
prev_sam_mask_logits.clamp_(-32.0, 32.0)
|
1132
1134
|
current_out = self._run_single_frame_inference(
|
@@ -1418,12 +1420,12 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1418
1420
|
maskmem_features = current_out["maskmem_features"]
|
1419
1421
|
if maskmem_features is not None:
|
1420
1422
|
current_out["maskmem_features"] = maskmem_features.to(
|
1421
|
-
dtype=torch.float16, device=self.device, non_blocking=
|
1423
|
+
dtype=torch.float16, device=self.device, non_blocking=self.device.type == "cuda"
|
1422
1424
|
)
|
1423
1425
|
# NOTE: Do not support the `fill_holes_in_mask_scores` function since it needs cuda extensions
|
1424
1426
|
# potentially fill holes in the predicted masks
|
1425
1427
|
# if self.fill_hole_area > 0:
|
1426
|
-
# pred_masks = current_out["pred_masks"].to(self.device, non_blocking=
|
1428
|
+
# pred_masks = current_out["pred_masks"].to(self.device, non_blocking=self.device.type == "cuda")
|
1427
1429
|
# pred_masks = fill_holes_in_mask_scores(pred_masks, self.fill_hole_area)
|
1428
1430
|
|
1429
1431
|
# "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
|
@@ -1636,7 +1638,9 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1636
1638
|
|
1637
1639
|
# "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
|
1638
1640
|
maskmem_pos_enc = self._get_maskmem_pos_enc(maskmem_pos_enc)
|
1639
|
-
return maskmem_features.to(
|
1641
|
+
return maskmem_features.to(
|
1642
|
+
dtype=torch.float16, device=self.device, non_blocking=self.device.type == "cuda"
|
1643
|
+
), maskmem_pos_enc
|
1640
1644
|
|
1641
1645
|
def _add_output_per_object(self, frame_idx, current_out, storage_key):
|
1642
1646
|
"""
|
@@ -1906,7 +1910,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
1906
1910
|
consolidated_out["object_score_logits"][obj_idx : obj_idx + 1] = out["object_score_logits"]
|
1907
1911
|
|
1908
1912
|
high_res_masks = F.interpolate(
|
1909
|
-
consolidated_out["pred_masks"].to(self.device, non_blocking=
|
1913
|
+
consolidated_out["pred_masks"].to(self.device, non_blocking=self.device.type == "cuda"),
|
1910
1914
|
size=self.imgsz,
|
1911
1915
|
mode="bilinear",
|
1912
1916
|
align_corners=False,
|
@@ -155,8 +155,8 @@ class ClassificationTrainer(BaseTrainer):
|
|
155
155
|
|
156
156
|
def preprocess_batch(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
|
157
157
|
"""Preprocess a batch of images and classes."""
|
158
|
-
batch["img"] = batch["img"].to(self.device, non_blocking=
|
159
|
-
batch["cls"] = batch["cls"].to(self.device, non_blocking=
|
158
|
+
batch["img"] = batch["img"].to(self.device, non_blocking=self.device.type == "cuda")
|
159
|
+
batch["cls"] = batch["cls"].to(self.device, non_blocking=self.device.type == "cuda")
|
160
160
|
return batch
|
161
161
|
|
162
162
|
def progress_string(self) -> str:
|
@@ -89,9 +89,9 @@ class ClassificationValidator(BaseValidator):
|
|
89
89
|
|
90
90
|
def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
|
91
91
|
"""Preprocess input batch by moving data to device and converting to appropriate dtype."""
|
92
|
-
batch["img"] = batch["img"].to(self.device, non_blocking=
|
92
|
+
batch["img"] = batch["img"].to(self.device, non_blocking=self.device.type == "cuda")
|
93
93
|
batch["img"] = batch["img"].half() if self.args.half else batch["img"].float()
|
94
|
-
batch["cls"] = batch["cls"].to(self.device, non_blocking=
|
94
|
+
batch["cls"] = batch["cls"].to(self.device, non_blocking=self.device.type == "cuda")
|
95
95
|
return batch
|
96
96
|
|
97
97
|
def update_metrics(self, preds: torch.Tensor, batch: dict[str, Any]) -> None:
|
@@ -120,7 +120,7 @@ class DetectionTrainer(BaseTrainer):
|
|
120
120
|
"""
|
121
121
|
for k, v in batch.items():
|
122
122
|
if isinstance(v, torch.Tensor):
|
123
|
-
batch[k] = v.to(self.device, non_blocking=
|
123
|
+
batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
|
124
124
|
batch["img"] = batch["img"].float() / 255
|
125
125
|
if self.args.multi_scale:
|
126
126
|
imgs = batch["img"]
|
@@ -73,7 +73,7 @@ class DetectionValidator(BaseValidator):
|
|
73
73
|
"""
|
74
74
|
for k, v in batch.items():
|
75
75
|
if isinstance(v, torch.Tensor):
|
76
|
-
batch[k] = v.to(self.device, non_blocking=
|
76
|
+
batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
|
77
77
|
batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
|
78
78
|
return batch
|
79
79
|
|