ultralytics-opencv-headless 8.3.253__py3-none-any.whl → 8.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. tests/__init__.py +2 -2
  2. tests/conftest.py +1 -1
  3. tests/test_cuda.py +8 -2
  4. tests/test_engine.py +8 -8
  5. tests/test_exports.py +11 -4
  6. tests/test_integrations.py +9 -9
  7. tests/test_python.py +14 -14
  8. tests/test_solutions.py +3 -3
  9. ultralytics/__init__.py +1 -1
  10. ultralytics/cfg/__init__.py +7 -9
  11. ultralytics/cfg/default.yaml +3 -1
  12. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  13. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  14. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  15. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  16. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  17. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  18. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  19. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  20. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  21. ultralytics/data/augment.py +7 -0
  22. ultralytics/data/converter.py +49 -30
  23. ultralytics/data/dataset.py +1 -1
  24. ultralytics/engine/exporter.py +9 -4
  25. ultralytics/engine/model.py +1 -1
  26. ultralytics/engine/results.py +19 -10
  27. ultralytics/engine/trainer.py +48 -25
  28. ultralytics/engine/tuner.py +15 -7
  29. ultralytics/models/fastsam/predict.py +1 -1
  30. ultralytics/models/yolo/detect/train.py +3 -2
  31. ultralytics/models/yolo/detect/val.py +6 -0
  32. ultralytics/models/yolo/model.py +1 -1
  33. ultralytics/models/yolo/obb/predict.py +1 -1
  34. ultralytics/models/yolo/obb/train.py +1 -1
  35. ultralytics/models/yolo/pose/train.py +1 -1
  36. ultralytics/models/yolo/segment/predict.py +1 -1
  37. ultralytics/models/yolo/segment/train.py +1 -1
  38. ultralytics/models/yolo/segment/val.py +3 -1
  39. ultralytics/models/yolo/yoloe/train.py +6 -1
  40. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  41. ultralytics/nn/autobackend.py +3 -3
  42. ultralytics/nn/modules/__init__.py +8 -0
  43. ultralytics/nn/modules/block.py +128 -8
  44. ultralytics/nn/modules/head.py +789 -204
  45. ultralytics/nn/tasks.py +74 -29
  46. ultralytics/nn/text_model.py +5 -2
  47. ultralytics/optim/__init__.py +5 -0
  48. ultralytics/optim/muon.py +338 -0
  49. ultralytics/utils/callbacks/platform.py +9 -7
  50. ultralytics/utils/downloads.py +3 -1
  51. ultralytics/utils/export/engine.py +19 -10
  52. ultralytics/utils/export/imx.py +22 -11
  53. ultralytics/utils/export/tensorflow.py +21 -21
  54. ultralytics/utils/loss.py +587 -203
  55. ultralytics/utils/metrics.py +1 -0
  56. ultralytics/utils/ops.py +11 -2
  57. ultralytics/utils/tal.py +98 -19
  58. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/METADATA +31 -39
  59. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/RECORD +63 -52
  60. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/WHEEL +0 -0
  61. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/entry_points.txt +0 -0
  62. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/licenses/LICENSE +0 -0
  63. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,62 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P6/64 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-p6.yaml' will call yolo26-p6.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 349 layers, 4,063,872 parameters, 4,063,872 gradients, 6.0 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 349 layers, 15,876,448 parameters, 15,876,448 gradients, 22.3 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 369 layers, 32,400,096 parameters, 32,400,096 gradients, 77.3 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 523 layers, 39,365,600 parameters, 39,365,600 gradients, 97.0 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 523 layers, 88,330,368 parameters, 88,330,368 gradients, 216.6 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [768, True]]
31
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
32
+ - [-1, 2, C3k2, [1024, True]]
33
+ - [-1, 1, SPPF, [1024, 5]] # 11
34
+ - [-1, 2, C2PSA, [1024]] # 12
35
+
36
+ # YOLO26n head
37
+ head:
38
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
40
+ - [-1, 2, C3k2, [768, True]] # 15
41
+
42
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
43
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
44
+ - [-1, 2, C3k2, [512, True]] # 18
45
+
46
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
47
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
48
+ - [-1, 2, C3k2, [256, True]] # 21 (P3/8-small)
49
+
50
+ - [-1, 1, Conv, [256, 3, 2]]
51
+ - [[-1, 18], 1, Concat, [1]] # cat head P4
52
+ - [-1, 2, C3k2, [512, True]] # 24 (P4/16-medium)
53
+
54
+ - [-1, 1, Conv, [512, 3, 2]]
55
+ - [[-1, 15], 1, Concat, [1]] # cat head P5
56
+ - [-1, 2, C3k2, [768, True]] # 27 (P5/32-large)
57
+
58
+ - [-1, 1, Conv, [768, 3, 2]]
59
+ - [[-1, 12], 1, Concat, [1]] # cat head P6
60
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 30 (P6/64-large)
61
+
62
+ - [[21, 24, 27, 30], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26-pose keypoints/pose estimation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/pose
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
12
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-pose.yaml' will call yolo26-pose.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 363 layers, 3,747,554 parameters, 3,747,554 gradients, 10.7 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 363 layers, 11,870,498 parameters, 11,870,498 gradients, 29.6 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 383 layers, 24,344,482 parameters, 24,344,482 gradients, 85.9 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 495 layers, 28,747,938 parameters, 28,747,938 gradients, 104.3 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 495 layers, 62,914,350 parameters, 62,914,350 gradients, 226.3 GFLOPs
19
+
20
+ # YOLO26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLO26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, Pose26, [nc, kpt_shape]] # Detect(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26-seg instance segmentation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n-seg.yaml' will call yolo26-seg.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 309 layers, 3,126,280 parameters, 3,126,280 gradients, 10.5 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 309 layers, 11,505,800 parameters, 11,505,800 gradients, 37.4 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 329 layers, 27,112,072 parameters, 27,112,072 gradients, 132.5 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 441 layers, 31,515,528 parameters, 31,515,528 gradients, 150.9 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 441 layers, 70,693,800 parameters, 70,693,800 gradients, 337.7 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Segment26, [nc, 32, 256]] # Segment(P3, P4, P5)
@@ -0,0 +1,52 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLO26 object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ scales: # model compound scaling constants, i.e. 'model=yolo26n.yaml' will call yolo26.yaml with scale 'n'
12
+ # [depth, width, max_channels]
13
+ n: [0.50, 0.25, 1024] # summary: 260 layers, 2,572,280 parameters, 2,572,280 gradients, 6.1 GFLOPs
14
+ s: [0.50, 0.50, 1024] # summary: 260 layers, 10,009,784 parameters, 10,009,784 gradients, 22.8 GFLOPs
15
+ m: [0.50, 1.00, 512] # summary: 280 layers, 21,896,248 parameters, 21,896,248 gradients, 75.4 GFLOPs
16
+ l: [1.00, 1.00, 512] # summary: 392 layers, 26,299,704 parameters, 26,299,704 gradients, 93.8 GFLOPs
17
+ x: [1.00, 1.50, 512] # summary: 392 layers, 58,993,368 parameters, 58,993,368 gradients, 209.5 GFLOPs
18
+
19
+ # YOLO26n backbone
20
+ backbone:
21
+ # [from, repeats, module, args]
22
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
23
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
24
+ - [-1, 2, C3k2, [256, False, 0.25]]
25
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
26
+ - [-1, 2, C3k2, [512, False, 0.25]]
27
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
28
+ - [-1, 2, C3k2, [512, True]]
29
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
30
+ - [-1, 2, C3k2, [1024, True]]
31
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
32
+ - [-1, 2, C2PSA, [1024]] # 10
33
+
34
+ # YOLO26n head
35
+ head:
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 2, C3k2, [512, True]] # 13
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
46
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
51
+
52
+ - [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLOE-26 open-vocabulary instance segmentation model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/segment
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=yoloe-26n-seg.yaml' will call yoloe-26-seg.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 347 layers, 5,615,540 parameters, 5,615,540 gradients, 11.7 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 347 layers, 15,272,852 parameters, 15,272,852 gradients, 39.3 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 367 layers, 34,922,132 parameters, 34,922,132 gradients, 136.3 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 479 layers, 39,325,588 parameters, 39,325,588 gradients, 154.7 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 479 layers, 85,397,684 parameters, 85,397,684 gradients, 343.3 GFLOPs
19
+
20
+ # YOLOE26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLOE26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOESegment26, [nc, 32, 256, 512, True]] # YOLOESegment26(P3, P4, P5)
@@ -0,0 +1,53 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Ultralytics YOLOE-26 open-vocabulary object detection model with P3/8 - P5/32 outputs
4
+ # Model docs: https://docs.ultralytics.com/models/yolo26
5
+ # Task docs: https://docs.ultralytics.com/tasks/detect
6
+
7
+ # Parameters
8
+ nc: 80 # number of classes
9
+ end2end: True # whether to use end-to-end mode
10
+ reg_max: 1 # DFL bins
11
+ text_model: mobileclip2:b
12
+ scales: # model compound scaling constants, i.e. 'model=yoloe-26n.yaml' will call yoloe-26.yaml with scale 'n'
13
+ # [depth, width, max_channels]
14
+ n: [0.50, 0.25, 1024] # summary: 298 layers, 5,061,540 parameters, 5,061,540 gradients, 7.3 GFLOPs
15
+ s: [0.50, 0.50, 1024] # summary: 298 layers, 13,776,836 parameters, 13,776,836 gradients, 24.8 GFLOPs
16
+ m: [0.50, 1.00, 512] # summary: 318 layers, 29,706,308 parameters, 29,706,308 gradients, 79.2 GFLOPs
17
+ l: [1.00, 1.00, 512] # summary: 430 layers, 34,109,764 parameters, 34,109,764 gradients, 97.6 GFLOPs
18
+ x: [1.00, 1.50, 512] # summary: 430 layers, 73,697,252 parameters, 73,697,252 gradients, 215.2 GFLOPs
19
+
20
+ # YOLOE26n backbone
21
+ backbone:
22
+ # [from, repeats, module, args]
23
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
24
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
25
+ - [-1, 2, C3k2, [256, False, 0.25]]
26
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
27
+ - [-1, 2, C3k2, [512, False, 0.25]]
28
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
29
+ - [-1, 2, C3k2, [512, True]]
30
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
31
+ - [-1, 2, C3k2, [1024, True]]
32
+ - [-1, 1, SPPF, [1024, 5, 3, True]] # 9
33
+ - [-1, 2, C2PSA, [1024]] # 10
34
+
35
+ # YOLOE26n head
36
+ head:
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 2, C3k2, [512, True]] # 13
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
47
+ - [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
51
+ - [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
52
+
53
+ - [[16, 19, 22], 1, YOLOEDetect, [nc, 512, True]] # YOLOEDetect(P3, P4, P5)
@@ -2062,11 +2062,18 @@ class Format:
2062
2062
  if nl:
2063
2063
  masks, instances, cls = self._format_segments(instances, cls, w, h)
2064
2064
  masks = torch.from_numpy(masks)
2065
+ cls_tensor = torch.from_numpy(cls.squeeze(1))
2066
+ if self.mask_overlap:
2067
+ sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
2068
+ else:
2069
+ sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
2065
2070
  else:
2066
2071
  masks = torch.zeros(
2067
2072
  1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
2068
2073
  )
2074
+ sem_masks = torch.zeros(img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio)
2069
2075
  labels["masks"] = masks
2076
+ labels["sem_masks"] = sem_masks.float()
2070
2077
  labels["img"] = self._format_img(img)
2071
2078
  labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl, 1)
2072
2079
  labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
@@ -749,12 +749,13 @@ def convert_to_multispectral(path: str | Path, n_channels: int = 10, replace: bo
749
749
  async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Path | None = None) -> Path:
750
750
  """Convert NDJSON dataset format to Ultralytics YOLO11 dataset structure.
751
751
 
752
- This function converts datasets stored in NDJSON (Newline Delimited JSON) format to the standard YOLO format with
753
- separate directories for images and labels. It supports parallel processing for efficient conversion of large
754
- datasets and can download images from URLs if they don't exist locally.
752
+ This function converts datasets stored in NDJSON (Newline Delimited JSON) format to the standard YOLO format. For
753
+ detection/segmentation/pose/obb tasks, it creates separate directories for images and labels. For classification
754
+ tasks, it creates the ImageNet-style {split}/{class_name}/ folder structure. It supports parallel processing for
755
+ efficient conversion of large datasets and can download images from URLs.
755
756
 
756
757
  The NDJSON format consists of:
757
- - First line: Dataset metadata with class names and configuration
758
+ - First line: Dataset metadata with class names, task type, and configuration
758
759
  - Subsequent lines: Individual image records with annotations and optional URLs
759
760
 
760
761
  Args:
@@ -763,7 +764,7 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
763
764
  None, uses the parent directory of the NDJSON file. Defaults to None.
764
765
 
765
766
  Returns:
766
- (Path): Path to the generated data.yaml file that can be used for YOLO training.
767
+ (Path): Path to the generated data.yaml file (detection) or dataset directory (classification).
767
768
 
768
769
  Examples:
769
770
  Convert a local NDJSON file:
@@ -790,36 +791,51 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
790
791
  dataset_dir = output_path / ndjson_path.stem
791
792
  splits = {record["split"] for record in image_records}
792
793
 
793
- # Create directories and prepare YAML structure
794
- dataset_dir.mkdir(parents=True, exist_ok=True)
795
- data_yaml = dict(dataset_record)
796
- data_yaml["names"] = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
797
- data_yaml.pop("class_names")
794
+ # Check if this is a classification dataset
795
+ is_classification = dataset_record.get("task") == "classify"
796
+ class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
798
797
 
799
- for split in sorted(splits):
800
- (dataset_dir / "images" / split).mkdir(parents=True, exist_ok=True)
801
- (dataset_dir / "labels" / split).mkdir(parents=True, exist_ok=True)
802
- data_yaml[split] = f"images/{split}"
798
+ # Create base directories
799
+ dataset_dir.mkdir(parents=True, exist_ok=True)
800
+ data_yaml = None
801
+
802
+ if not is_classification:
803
+ # Detection/segmentation/pose/obb: prepare YAML and create base structure
804
+ data_yaml = dict(dataset_record)
805
+ data_yaml["names"] = class_names
806
+ data_yaml.pop("class_names", None)
807
+ data_yaml.pop("type", None) # Remove NDJSON-specific fields
808
+ for split in sorted(splits):
809
+ (dataset_dir / "images" / split).mkdir(parents=True, exist_ok=True)
810
+ (dataset_dir / "labels" / split).mkdir(parents=True, exist_ok=True)
811
+ data_yaml[split] = f"images/{split}"
803
812
 
804
813
  async def process_record(session, semaphore, record):
805
814
  """Process single image record with async session."""
806
815
  async with semaphore:
807
816
  split, original_name = record["split"], record["file"]
808
- label_path = dataset_dir / "labels" / split / f"{Path(original_name).stem}.txt"
809
- image_path = dataset_dir / "images" / split / original_name
810
-
811
817
  annotations = record.get("annotations", {})
812
- lines_to_write = []
813
- for key in annotations.keys():
814
- lines_to_write = [" ".join(map(str, item)) for item in annotations[key]]
815
- break
816
- if "classification" in annotations:
817
- lines_to_write = [str(cls) for cls in annotations["classification"]]
818
-
819
- label_path.write_text("\n".join(lines_to_write) + "\n" if lines_to_write else "")
820
818
 
819
+ if is_classification:
820
+ # Classification: place image in {split}/{class_name}/ folder
821
+ class_ids = annotations.get("classification", [])
822
+ class_id = class_ids[0] if class_ids else 0
823
+ class_name = class_names.get(class_id, str(class_id))
824
+ image_path = dataset_dir / split / class_name / original_name
825
+ else:
826
+ # Detection: write label file and place image in images/{split}/
827
+ image_path = dataset_dir / "images" / split / original_name
828
+ label_path = dataset_dir / "labels" / split / f"{Path(original_name).stem}.txt"
829
+ lines_to_write = []
830
+ for key in annotations.keys():
831
+ lines_to_write = [" ".join(map(str, item)) for item in annotations[key]]
832
+ break
833
+ label_path.write_text("\n".join(lines_to_write) + "\n" if lines_to_write else "")
834
+
835
+ # Download image if URL provided and file doesn't exist
821
836
  if http_url := record.get("url"):
822
837
  if not image_path.exists():
838
+ image_path.parent.mkdir(parents=True, exist_ok=True) # Ensure parent dir exists
823
839
  try:
824
840
  async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
825
841
  response.raise_for_status()
@@ -848,8 +864,11 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
848
864
  await asyncio.gather(*[tracked_process(record) for record in image_records])
849
865
  pbar.close()
850
866
 
851
- # Write data.yaml
852
- yaml_path = dataset_dir / "data.yaml"
853
- YAML.save(yaml_path, data_yaml)
854
-
855
- return yaml_path
867
+ if is_classification:
868
+ # Classification: return dataset directory (check_cls_dataset expects a directory path)
869
+ return dataset_dir
870
+ else:
871
+ # Detection: write data.yaml and return its path
872
+ yaml_path = dataset_dir / "data.yaml"
873
+ YAML.save(yaml_path, data_yaml)
874
+ return yaml_path
@@ -294,7 +294,7 @@ class YOLODataset(BaseDataset):
294
294
  values = list(zip(*[list(b.values()) for b in batch]))
295
295
  for i, k in enumerate(keys):
296
296
  value = values[i]
297
- if k in {"img", "text_feats"}:
297
+ if k in {"img", "text_feats", "sem_masks"}:
298
298
  value = torch.stack(value, 0)
299
299
  elif k == "visuals":
300
300
  value = torch.nn.utils.rnn.pad_sequence(value, batch_first=True)
@@ -463,6 +463,9 @@ class Exporter:
463
463
  )
464
464
  if tfjs and (ARM64 and LINUX):
465
465
  raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
466
+ if ncnn and hasattr(model.model[-1], "one2one_cv2"):
467
+ del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
468
+ LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
466
469
  # Recommend OpenVINO if export and Intel CPU
467
470
  if SETTINGS.get("openvino_msg"):
468
471
  if is_intel():
@@ -503,7 +506,9 @@ class Exporter:
503
506
  m.dynamic = self.args.dynamic
504
507
  m.export = True
505
508
  m.format = self.args.format
506
- m.max_det = self.args.max_det
509
+ # Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
510
+ anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
511
+ m.max_det = min(self.args.max_det, anchors)
507
512
  m.xyxy = self.args.nms and not coreml
508
513
  m.shape = None # reset cached shape for new export input size
509
514
  if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
@@ -551,6 +556,8 @@ class Exporter:
551
556
  self.metadata["kpt_shape"] = model.model[-1].kpt_shape
552
557
  if hasattr(model, "kpt_names"):
553
558
  self.metadata["kpt_names"] = model.kpt_names
559
+ if getattr(model.model[-1], "end2end", False):
560
+ self.metadata["end2end"] = True
554
561
 
555
562
  LOGGER.info(
556
563
  f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
@@ -787,7 +794,6 @@ class Exporter:
787
794
  f".*{head_module_name}/.*/Sub*",
788
795
  f".*{head_module_name}/.*/Mul*",
789
796
  f".*{head_module_name}/.*/Div*",
790
- f".*{head_module_name}\\.dfl.*",
791
797
  ],
792
798
  types=["Sigmoid"],
793
799
  )
@@ -860,8 +866,7 @@ class Exporter:
860
866
  @try_export
861
867
  def export_ncnn(self, prefix=colorstr("NCNN:")):
862
868
  """Export YOLO model to NCNN format using PNNX https://github.com/pnnx/pnnx."""
863
- # use git source for ARM64 due to broken PyPI packages https://github.com/Tencent/ncnn/issues/6509
864
- check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
869
+ check_requirements("ncnn", cmds="--no-deps") # no deps to avoid installing opencv-python
865
870
  check_requirements("pnnx")
866
871
  import ncnn
867
872
  import pnnx
@@ -825,7 +825,7 @@ class Model(torch.nn.Module):
825
825
 
826
826
  custom = {} # method defaults
827
827
  args = {**self.overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
828
- return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)
828
+ return Tuner(args=args, _callbacks=self.callbacks)(iterations=iterations)
829
829
 
830
830
  def _apply(self, fn) -> Model:
831
831
  """Apply a function to model tensors that are not parameters or registered buffers.
@@ -750,8 +750,8 @@ class Results(SimpleClass, DataExportMixin):
750
750
  """Convert inference results to a summarized dictionary with optional normalization for box coordinates.
751
751
 
752
752
  This method creates a list of detection dictionaries, each containing information about a single detection or
753
- classification result. For classification tasks, it returns the top class and its
754
- confidence. For detection tasks, it includes class information, bounding box coordinates, and
753
+ classification result. For classification tasks, it returns the top 5 classes and their
754
+ confidences. For detection tasks, it includes class information, bounding box coordinates, and
755
755
  optionally mask segments and keypoints.
756
756
 
757
757
  Args:
@@ -772,14 +772,23 @@ class Results(SimpleClass, DataExportMixin):
772
772
  # Create list of detection dictionaries
773
773
  results = []
774
774
  if self.probs is not None:
775
- class_id = self.probs.top1
776
- results.append(
777
- {
778
- "name": self.names[class_id],
779
- "class": class_id,
780
- "confidence": round(self.probs.top1conf.item(), decimals),
781
- }
782
- )
775
+ # Return top 5 classification results
776
+ for class_id, conf in zip(self.probs.top5, self.probs.top5conf.tolist()):
777
+ class_id = int(class_id)
778
+ results.append(
779
+ {
780
+ "name": self.names[class_id],
781
+ "class": class_id,
782
+ "confidence": round(conf, decimals),
783
+ }
784
+ )
785
+ results.append(
786
+ {
787
+ "name": self.names[class_id],
788
+ "class": class_id,
789
+ "confidence": round(conf, decimals),
790
+ }
791
+ )
783
792
  return results
784
793
 
785
794
  is_obb = self.obb is not None