ultralytics-opencv-headless 8.3.253__py3-none-any.whl → 8.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. tests/__init__.py +2 -2
  2. tests/conftest.py +1 -1
  3. tests/test_cuda.py +8 -2
  4. tests/test_engine.py +6 -6
  5. tests/test_exports.py +10 -3
  6. tests/test_integrations.py +9 -9
  7. tests/test_python.py +14 -14
  8. tests/test_solutions.py +3 -3
  9. ultralytics/__init__.py +1 -1
  10. ultralytics/cfg/__init__.py +6 -6
  11. ultralytics/cfg/default.yaml +3 -1
  12. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  13. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  14. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  15. ultralytics/cfg/models/26/yolo26-p6.yaml +60 -0
  16. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  17. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  18. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  19. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  20. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  21. ultralytics/data/augment.py +7 -0
  22. ultralytics/data/dataset.py +1 -1
  23. ultralytics/engine/exporter.py +10 -3
  24. ultralytics/engine/model.py +1 -1
  25. ultralytics/engine/trainer.py +40 -15
  26. ultralytics/engine/tuner.py +15 -7
  27. ultralytics/models/fastsam/predict.py +1 -1
  28. ultralytics/models/yolo/detect/train.py +3 -2
  29. ultralytics/models/yolo/detect/val.py +6 -0
  30. ultralytics/models/yolo/model.py +1 -1
  31. ultralytics/models/yolo/obb/predict.py +1 -1
  32. ultralytics/models/yolo/obb/train.py +1 -1
  33. ultralytics/models/yolo/pose/train.py +1 -1
  34. ultralytics/models/yolo/segment/predict.py +1 -1
  35. ultralytics/models/yolo/segment/train.py +1 -1
  36. ultralytics/models/yolo/segment/val.py +3 -1
  37. ultralytics/models/yolo/yoloe/train.py +6 -1
  38. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  39. ultralytics/nn/autobackend.py +7 -3
  40. ultralytics/nn/modules/__init__.py +8 -0
  41. ultralytics/nn/modules/block.py +127 -8
  42. ultralytics/nn/modules/head.py +818 -205
  43. ultralytics/nn/tasks.py +74 -29
  44. ultralytics/nn/text_model.py +5 -2
  45. ultralytics/optim/__init__.py +5 -0
  46. ultralytics/optim/muon.py +338 -0
  47. ultralytics/utils/benchmarks.py +1 -0
  48. ultralytics/utils/callbacks/platform.py +9 -7
  49. ultralytics/utils/downloads.py +3 -1
  50. ultralytics/utils/export/engine.py +19 -10
  51. ultralytics/utils/export/imx.py +22 -11
  52. ultralytics/utils/export/tensorflow.py +1 -41
  53. ultralytics/utils/loss.py +584 -203
  54. ultralytics/utils/metrics.py +1 -0
  55. ultralytics/utils/ops.py +11 -2
  56. ultralytics/utils/tal.py +98 -19
  57. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.0.dist-info}/METADATA +31 -39
  58. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.0.dist-info}/RECORD +62 -51
  59. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.0.dist-info}/WHEEL +0 -0
  60. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.0.dist-info}/entry_points.txt +0 -0
  61. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.0.dist-info}/licenses/LICENSE +0 -0
  62. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/pose
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
12
+ scales: # model compound scaling constants, i.e. 'model=YOLO26n.yaml' will call YOLO26.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 363 layers, 3,747,554 parameters, 3,747,554 gradients, 10.7 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 363 layers, 11,870,498 parameters, 11,870,498 gradients, 29.6 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 383 layers, 24,344,482 parameters, 24,344,482 gradients, 85.9 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 495 layers, 28,747,938 parameters, 28,747,938 gradients, 104.3 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 495 layers, 62,914,350 parameters, 62,914,350 gradients, 226.3 GFLOPs
19
+
20
+ # YOLO26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLO26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, Pose26, [nc, kpt_shape]] # Detect(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=YOLO26n.yaml' will call YOLO26.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 309 layers, 3,126,280 parameters, 3,126,280 gradients, 10.5 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 309 layers, 11,505,800 parameters, 11,505,800 gradients, 37.4 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 329 layers, 27,112,072 parameters, 27,112,072 gradients, 132.5 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 441 layers, 31,515,528 parameters, 31,515,528 gradients, 150.9 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 441 layers, 70,693,800 parameters, 70,693,800 gradients, 337.7 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Segment26, [nc, 32, 256]] # Segment(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=YOLO26n.yaml' will call YOLO26.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 260 layers, 2,572,280 parameters, 2,572,280 gradients, 6.1 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 260 layers, 10,009,784 parameters, 10,009,784 gradients, 22.8 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 280 layers, 21,896,248 parameters, 21,896,248 gradients, 75.4 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 392 layers, 26,299,704 parameters, 26,299,704 gradients, 93.8 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 392 layers, 58,993,368 parameters, 58,993,368 gradients, 209.5 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=YOLO26n.yaml' will call YOLO26.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 181 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 181 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 231 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 357 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 357 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs
19
+
20
+ # YOLO26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLO26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOESegment26, [nc, 32, 256, 512, True]] # Detect(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=YOLO26n.yaml' will call YOLO26.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 181 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 181 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 231 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 357 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 357 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs
19
+
20
+ # YOLO26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLO26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOEDetect, [nc, 512, True]] # Detect(P3, P4, P5)
@@ -2062,11 +2062,18 @@ class Format:
2062
2062
  if nl:
2063
2063
  masks, instances, cls = self._format_segments(instances, cls, w, h)
2064
2064
  masks = torch.from_numpy(masks)
2065
+ cls_tensor = torch.from_numpy(cls.squeeze(1))
2066
+ if self.mask_overlap:
2067
+ sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
2068
+ else:
2069
+ sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
2065
2070
  else:
2066
2071
  masks = torch.zeros(
2067
2072
  1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
2068
2073
  )
2074
+ sem_masks = torch.zeros(img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio)
2069
2075
  labels["masks"] = masks
2076
+ labels["sem_masks"] = sem_masks.float()
2070
2077
  labels["img"] = self._format_img(img)
2071
2078
  labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl, 1)
2072
2079
  labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
@@ -294,7 +294,7 @@ class YOLODataset(BaseDataset):
294
294
  values = list(zip(*[list(b.values()) for b in batch]))
295
295
  for i, k in enumerate(keys):
296
296
  value = values[i]
297
- if k in {"img", "text_feats"}:
297
+ if k in {"img", "text_feats", "sem_masks"}:
298
298
  value = torch.stack(value, 0)
299
299
  elif k == "visuals":
300
300
  value = torch.nn.utils.rnn.pad_sequence(value, batch_first=True)
@@ -503,7 +503,9 @@ class Exporter:
503
503
  m.dynamic = self.args.dynamic
504
504
  m.export = True
505
505
  m.format = self.args.format
506
- m.max_det = self.args.max_det
506
+ # Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
507
+ anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
508
+ m.max_det = min(self.args.max_det, anchors)
507
509
  m.xyxy = self.args.nms and not coreml
508
510
  m.shape = None # reset cached shape for new export input size
509
511
  if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
@@ -551,6 +553,8 @@ class Exporter:
551
553
  self.metadata["kpt_shape"] = model.model[-1].kpt_shape
552
554
  if hasattr(model, "kpt_names"):
553
555
  self.metadata["kpt_names"] = model.kpt_names
556
+ if getattr(model.model[-1], "end2end", False):
557
+ self.metadata["end2end"] = True
554
558
 
555
559
  LOGGER.info(
556
560
  f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
@@ -860,8 +864,11 @@ class Exporter:
860
864
  @try_export
861
865
  def export_ncnn(self, prefix=colorstr("NCNN:")):
862
866
  """Export YOLO model to NCNN format using PNNX https://github.com/pnnx/pnnx."""
863
- # use git source for ARM64 due to broken PyPI packages https://github.com/Tencent/ncnn/issues/6509
864
- check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
867
+ if ARM64:
868
+ raise NotImplementedError(
869
+ "NCNN export is not supported on ARM64"
870
+ ) # https://github.com/Tencent/ncnn/issues/6509
871
+ check_requirements("ncnn", cmds="--no-deps") # no deps to avoid installing opencv-python
865
872
  check_requirements("pnnx")
866
873
  import ncnn
867
874
  import pnnx
@@ -825,7 +825,7 @@ class Model(torch.nn.Module):
825
825
 
826
826
  custom = {} # method defaults
827
827
  args = {**self.overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
828
- return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)
828
+ return Tuner(args=args, _callbacks=self.callbacks)(iterations=iterations)
829
829
 
830
830
  def _apply(self, fn) -> Model:
831
831
  """Apply a function to model tensors that are not parameters or registered buffers.
@@ -27,6 +27,7 @@ from ultralytics import __version__
27
27
  from ultralytics.cfg import get_cfg, get_save_dir
28
28
  from ultralytics.data.utils import check_cls_dataset, check_det_dataset
29
29
  from ultralytics.nn.tasks import load_checkpoint
30
+ from ultralytics.optim import MuSGD
30
31
  from ultralytics.utils import (
31
32
  DEFAULT_CFG,
32
33
  GIT,
@@ -464,6 +465,9 @@ class BaseTrainer:
464
465
 
465
466
  self.run_callbacks("on_train_batch_end")
466
467
 
468
+ if hasattr(unwrap_model(self.model).criterion, "update"):
469
+ unwrap_model(self.model).criterion.update()
470
+
467
471
  self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
468
472
 
469
473
  self.run_callbacks("on_train_epoch_end")
@@ -930,7 +934,7 @@ class BaseTrainer:
930
934
  Returns:
931
935
  (torch.optim.Optimizer): The constructed optimizer.
932
936
  """
933
- g = [], [], [] # optimizer parameter groups
937
+ g = [{}, {}, {}, {}] # optimizer parameter groups
934
938
  bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
935
939
  if name == "auto":
936
940
  LOGGER.info(
@@ -940,38 +944,59 @@ class BaseTrainer:
940
944
  )
941
945
  nc = self.data.get("nc", 10) # number of classes
942
946
  lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
943
- name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
947
+ name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("MuSGD", lr_fit, 0.9)
944
948
  self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
945
949
 
946
- for module_name, module in model.named_modules():
950
+ use_muon = name == "MuSGD"
951
+ for module_name, module in unwrap_model(model).named_modules():
947
952
  for param_name, param in module.named_parameters(recurse=False):
948
953
  fullname = f"{module_name}.{param_name}" if module_name else param_name
949
- if "bias" in fullname: # bias (no decay)
950
- g[2].append(param)
954
+ if param.ndim >= 2 and use_muon:
955
+ g[3][fullname] = param # muon params
956
+ elif "bias" in fullname: # bias (no decay)
957
+ g[2][fullname] = param
951
958
  elif isinstance(module, bn) or "logit_scale" in fullname: # weight (no decay)
952
959
  # ContrastiveHead and BNContrastiveHead included here with 'logit_scale'
953
- g[1].append(param)
960
+ g[1][fullname] = param
954
961
  else: # weight (with decay)
955
- g[0].append(param)
962
+ g[0][fullname] = param
963
+ if not use_muon:
964
+ g = [x.values() for x in g[:3]] # convert to list of params
956
965
 
957
- optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"}
966
+ optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "MuSGD", "auto"}
958
967
  name = {x.lower(): x for x in optimizers}.get(name.lower())
959
968
  if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}:
960
- optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
969
+ optim_args = dict(lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
961
970
  elif name == "RMSProp":
962
- optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum)
963
- elif name == "SGD":
964
- optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
971
+ optim_args = dict(lr=lr, momentum=momentum)
972
+ elif name == "SGD" or name == "MuSGD":
973
+ optim_args = dict(lr=lr, momentum=momentum, nesterov=True)
965
974
  else:
966
975
  raise NotImplementedError(
967
976
  f"Optimizer '{name}' not found in list of available optimizers {optimizers}. "
968
977
  "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
969
978
  )
970
979
 
971
- optimizer.add_param_group({"params": g[0], "weight_decay": decay}) # add g0 with weight_decay
972
- optimizer.add_param_group({"params": g[1], "weight_decay": 0.0}) # add g1 (BatchNorm2d weights)
980
+ g[2] = {"params": g[2], **optim_args}
981
+ g[0] = {"params": g[0], **optim_args, "weight_decay": decay}
982
+ g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0}
983
+ if name == "MuSGD":
984
+ g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True}
985
+ import re
986
+
987
+ # higher lr for certain parameters in MuSGD
988
+ pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
989
+ g_ = [] # new param groups
990
+ for x in g:
991
+ p = x.pop("params")
992
+ p1 = [v for k, v in p.items() if pattern.search(k)]
993
+ p2 = [v for k, v in p.items() if not pattern.search(k)]
994
+ g_.extend([{"params": p1, **x, "lr": lr * 3}, {"params": p2, **x}])
995
+ g = g_
996
+ optimizer = getattr(optim, name, MuSGD)(params=g)
997
+
973
998
  LOGGER.info(
974
999
  f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
975
- f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)"
1000
+ f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
976
1001
  )
977
1002
  return optimizer
@@ -90,15 +90,15 @@ class Tuner:
90
90
  """
91
91
  self.space = args.pop("space", None) or { # key: (min, max, gain(optional))
92
92
  # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
93
- "lr0": (1e-5, 1e-1), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
94
- "lrf": (0.0001, 0.1), # final OneCycleLR learning rate (lr0 * lrf)
93
+ "lr0": (1e-5, 1e-2), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
94
+ "lrf": (0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
95
95
  "momentum": (0.7, 0.98, 0.3), # SGD momentum/Adam beta1
96
96
  "weight_decay": (0.0, 0.001), # optimizer weight decay 5e-4
97
97
  "warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
98
98
  "warmup_momentum": (0.0, 0.95), # warmup initial momentum
99
99
  "box": (1.0, 20.0), # box loss gain
100
100
  "cls": (0.1, 4.0), # cls loss gain (scale with pixels)
101
- "dfl": (0.4, 6.0), # dfl loss gain
101
+ "dfl": (0.4, 12.0), # dfl loss gain
102
102
  "hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
103
103
  "hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
104
104
  "hsv_v": (0.0, 0.9), # image HSV-Value augmentation (fraction)
@@ -254,7 +254,7 @@ class Tuner:
254
254
  f.write(headers)
255
255
  for result in all_results:
256
256
  fitness = result["fitness"]
257
- hyp_values = [result["hyperparameters"][k] for k in self.space.keys()]
257
+ hyp_values = [result["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
258
258
  log_row = [round(fitness, 5), *hyp_values]
259
259
  f.write(",".join(map(str, log_row)) + "\n")
260
260
 
@@ -273,6 +273,8 @@ class Tuner:
273
273
  parents_mat = np.stack([x[i][1:] for i in idxs], 0) # (k, ng) strip fitness
274
274
  lo, hi = parents_mat.min(0), parents_mat.max(0)
275
275
  span = hi - lo
276
+ # given a small value when span is zero to avoid no mutation
277
+ span = np.where(span == 0, np.random.uniform(0.01, 0.1, span.shape), span)
276
278
  return np.random.uniform(lo - alpha * span, hi + alpha * span)
277
279
 
278
280
  def _mutate(
@@ -297,7 +299,12 @@ class Tuner:
297
299
  if self.mongodb:
298
300
  if results := self._get_mongodb_results(n):
299
301
  # MongoDB already sorted by fitness DESC, so results[0] is best
300
- x = np.array([[r["fitness"]] + [r["hyperparameters"][k] for k in self.space.keys()] for r in results])
302
+ x = np.array(
303
+ [
304
+ [r["fitness"]] + [r["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
305
+ for r in results
306
+ ]
307
+ )
301
308
  elif self.collection.name in self.collection.database.list_collection_names(): # Tuner started elsewhere
302
309
  x = np.array([[0.0] + [getattr(self.args, k) for k in self.space.keys()]])
303
310
 
@@ -335,10 +342,12 @@ class Tuner:
335
342
  # Update types
336
343
  if "close_mosaic" in hyp:
337
344
  hyp["close_mosaic"] = round(hyp["close_mosaic"])
345
+ if "epochs" in hyp:
346
+ hyp["epochs"] = round(hyp["epochs"])
338
347
 
339
348
  return hyp
340
349
 
341
- def __call__(self, model=None, iterations: int = 10, cleanup: bool = True):
350
+ def __call__(self, iterations: int = 10, cleanup: bool = True):
342
351
  """Execute the hyperparameter evolution process when the Tuner instance is called.
343
352
 
344
353
  This method iterates through the specified number of iterations, performing the following steps:
@@ -349,7 +358,6 @@ class Tuner:
349
358
  5. Track the best performing configuration across all iterations
350
359
 
351
360
  Args:
352
- model (Model | None, optional): A pre-initialized YOLO model to be used for training.
353
361
  iterations (int): The number of generations to run the evolution for.
354
362
  cleanup (bool): Whether to delete iteration weights to reduce storage space during tuning.
355
363
  """
@@ -63,7 +63,7 @@ class FastSAMPredictor(SegmentationPredictor):
63
63
  results = super().postprocess(preds, img, orig_imgs)
64
64
  for result in results:
65
65
  full_box = torch.tensor(
66
- [0, 0, result.orig_shape[1], result.orig_shape[0]], device=preds[0].device, dtype=torch.float32
66
+ [0, 0, result.orig_shape[1], result.orig_shape[0]], device=result.boxes.data.device, dtype=torch.float32
67
67
  )
68
68
  boxes = adjust_bboxes_to_image_border(result.boxes.xyxy, result.orig_shape)
69
69
  idx = torch.nonzero(box_iou(full_box[None], boxes) > 0.9).flatten()
@@ -117,10 +117,11 @@ class DetectionTrainer(BaseTrainer):
117
117
  if isinstance(v, torch.Tensor):
118
118
  batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
119
119
  batch["img"] = batch["img"].float() / 255
120
- if self.args.multi_scale:
120
+ multi_scale = self.args.multi_scale
121
+ if random.random() < multi_scale:
121
122
  imgs = batch["img"]
122
123
  sz = (
123
- random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1.5 + self.stride))
124
+ random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
124
125
  // self.stride
125
126
  * self.stride
126
127
  ) # size
@@ -494,6 +494,12 @@ class DetectionValidator(BaseValidator):
494
494
  # update mAP50-95 and mAP50
495
495
  stats[f"metrics/mAP50({suffix[i][0]})"] = val.stats_as_dict["AP_50"]
496
496
  stats[f"metrics/mAP50-95({suffix[i][0]})"] = val.stats_as_dict["AP_all"]
497
+ # record mAP for small, medium, large objects as well
498
+ stats["metrics/mAP_small(B)"] = val.stats_as_dict["AP_small"]
499
+ stats["metrics/mAP_medium(B)"] = val.stats_as_dict["AP_medium"]
500
+ stats["metrics/mAP_large(B)"] = val.stats_as_dict["AP_large"]
501
+ # update fitness
502
+ stats["fitness"] = 0.9 * val.stats_as_dict["AP_all"] + 0.1 * val.stats_as_dict["AP_50"]
497
503
 
498
504
  if self.is_lvis:
499
505
  stats[f"metrics/APr({suffix[i][0]})"] = val.stats_as_dict["APr"]
@@ -399,7 +399,7 @@ class YOLOE(Model):
399
399
  "batch": 1,
400
400
  "device": kwargs.get("device", None),
401
401
  "half": kwargs.get("half", False),
402
- "imgsz": kwargs.get("imgsz", self.overrides["imgsz"]),
402
+ "imgsz": kwargs.get("imgsz", self.overrides.get("imgsz", 640)),
403
403
  },
404
404
  _callbacks=self.callbacks,
405
405
  )
@@ -50,7 +50,7 @@ class OBBPredictor(DetectionPredictor):
50
50
  (Results): The result object containing the original image, image path, class names, and oriented bounding
51
51
  boxes.
52
52
  """
53
- rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
53
+ rboxes = torch.cat([pred[:, :4], pred[:, -1:]], dim=-1)
54
54
  rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
55
55
  obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
56
56
  return Results(orig_img, path=img_path, names=self.model.names, obb=obb)
@@ -73,7 +73,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
73
73
 
74
74
  def get_validator(self):
75
75
  """Return an instance of OBBValidator for validation of YOLO model."""
76
- self.loss_names = "box_loss", "cls_loss", "dfl_loss"
76
+ self.loss_names = "box_loss", "cls_loss", "dfl_loss", "angle_loss"
77
77
  return yolo.obb.OBBValidator(
78
78
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
79
79
  )
@@ -90,7 +90,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
90
90
 
91
91
  def get_validator(self):
92
92
  """Return an instance of the PoseValidator class for validation."""
93
- self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
93
+ self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss", "rle_loss"
94
94
  return yolo.pose.PoseValidator(
95
95
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
96
96
  )
@@ -60,7 +60,7 @@ class SegmentationPredictor(DetectionPredictor):
60
60
  >>> results = predictor.postprocess(preds, img, orig_img)
61
61
  """
62
62
  # Extract protos - tuple if PyTorch model or array if exported
63
- protos = preds[1][-1] if isinstance(preds[1], tuple) else preds[1]
63
+ protos = preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
64
64
  return super().postprocess(preds[0], img, orig_imgs, protos=protos)
65
65
 
66
66
  def construct_results(self, preds, img, orig_imgs, protos):
@@ -63,7 +63,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
63
63
 
64
64
  def get_validator(self):
65
65
  """Return an instance of SegmentationValidator for validation of YOLO model."""
66
- self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss"
66
+ self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss", "sem_loss"
67
67
  return yolo.segment.SegmentationValidator(
68
68
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
69
69
  )
@@ -99,7 +99,9 @@ class SegmentationValidator(DetectionValidator):
99
99
  Returns:
100
100
  list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
101
101
  """
102
- proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
102
+ proto = (
103
+ preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
104
+ ) # second output is len 3 if pt, but only 1 if exported
103
105
  preds = super().postprocess(preds[0])
104
106
  imgsz = [4 * x for x in proto.shape[2:]] # get image size from proto
105
107
  for i, pred in enumerate(preds):
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
147
147
  model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
148
148
  model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
149
149
  model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
150
- del model.pe
150
+
151
+ if getattr(model.model[-1], "one2one_cv3", None) is not None:
152
+ model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
153
+ model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
154
+ model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
155
+
151
156
  model.train()
152
157
 
153
158
  return model