ultralytics 8.2.37__py3-none-any.whl → 8.2.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- tests/test_python.py +9 -0
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/models/v10/yolov10b.yaml +42 -0
- ultralytics/cfg/models/v10/yolov10l.yaml +42 -0
- ultralytics/cfg/models/v10/yolov10m.yaml +42 -0
- ultralytics/cfg/models/v10/yolov10n.yaml +42 -0
- ultralytics/cfg/models/v10/yolov10s.yaml +42 -0
- ultralytics/cfg/models/v10/yolov10x.yaml +42 -0
- ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
- ultralytics/data/augment.py +13 -16
- ultralytics/data/converter.py +10 -11
- ultralytics/data/split_dota.py +4 -4
- ultralytics/engine/exporter.py +3 -2
- ultralytics/engine/model.py +0 -1
- ultralytics/models/sam/modules/tiny_encoder.py +6 -7
- ultralytics/nn/modules/__init__.py +14 -1
- ultralytics/nn/modules/block.py +256 -1
- ultralytics/nn/modules/head.py +114 -4
- ultralytics/nn/tasks.py +40 -18
- ultralytics/solutions/__init__.py +1 -0
- ultralytics/utils/__init__.py +1 -1
- ultralytics/utils/benchmarks.py +5 -0
- ultralytics/utils/downloads.py +1 -0
- ultralytics/utils/loss.py +20 -2
- ultralytics/utils/metrics.py +2 -1
- ultralytics/utils/ops.py +3 -0
- {ultralytics-8.2.37.dist-info → ultralytics-8.2.39.dist-info}/METADATA +6 -6
- {ultralytics-8.2.37.dist-info → ultralytics-8.2.39.dist-info}/RECORD +32 -26
- {ultralytics-8.2.37.dist-info → ultralytics-8.2.39.dist-info}/LICENSE +0 -0
- {ultralytics-8.2.37.dist-info → ultralytics-8.2.39.dist-info}/WHEEL +0 -0
- {ultralytics-8.2.37.dist-info → ultralytics-8.2.39.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.2.37.dist-info → ultralytics-8.2.39.dist-info}/top_level.txt +0 -0
tests/test_python.py
CHANGED
|
@@ -577,3 +577,12 @@ def test_yolo_world():
|
|
|
577
577
|
close_mosaic=1,
|
|
578
578
|
trainer=WorldTrainerFromScratch,
|
|
579
579
|
)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def test_yolov10():
|
|
583
|
+
"""A simple test for yolov10 for now."""
|
|
584
|
+
model = YOLO("yolov10n.yaml")
|
|
585
|
+
# train/val/predict
|
|
586
|
+
model.train(data="coco8.yaml", epochs=1, imgsz=32, close_mosaic=1, cache="disk")
|
|
587
|
+
model.val(data="coco8.yaml", imgsz=32)
|
|
588
|
+
model(SOURCE)
|
ultralytics/__init__.py
CHANGED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 80 # number of classes
|
|
6
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
7
|
+
# [depth, width, max_channels]
|
|
8
|
+
b: [0.67, 1.00, 512]
|
|
9
|
+
|
|
10
|
+
backbone:
|
|
11
|
+
# [from, repeats, module, args]
|
|
12
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
13
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
14
|
+
- [-1, 3, C2f, [128, True]]
|
|
15
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
16
|
+
- [-1, 6, C2f, [256, True]]
|
|
17
|
+
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
|
18
|
+
- [-1, 6, C2f, [512, True]]
|
|
19
|
+
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
|
20
|
+
- [-1, 3, C2fCIB, [1024, True]]
|
|
21
|
+
- [-1, 1, SPPF, [1024, 5]] # 9
|
|
22
|
+
- [-1, 1, PSA, [1024]] # 10
|
|
23
|
+
|
|
24
|
+
# YOLOv8.0n head
|
|
25
|
+
head:
|
|
26
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
27
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
28
|
+
- [-1, 3, C2fCIB, [512, True]] # 13
|
|
29
|
+
|
|
30
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
31
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
32
|
+
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
|
33
|
+
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
36
|
+
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
|
37
|
+
|
|
38
|
+
- [-1, 1, SCDown, [512, 3, 2]]
|
|
39
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
40
|
+
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
|
41
|
+
|
|
42
|
+
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 80 # number of classes
|
|
6
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
7
|
+
# [depth, width, max_channels]
|
|
8
|
+
l: [1.00, 1.00, 512]
|
|
9
|
+
|
|
10
|
+
backbone:
|
|
11
|
+
# [from, repeats, module, args]
|
|
12
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
13
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
14
|
+
- [-1, 3, C2f, [128, True]]
|
|
15
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
16
|
+
- [-1, 6, C2f, [256, True]]
|
|
17
|
+
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
|
18
|
+
- [-1, 6, C2f, [512, True]]
|
|
19
|
+
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
|
20
|
+
- [-1, 3, C2fCIB, [1024, True]]
|
|
21
|
+
- [-1, 1, SPPF, [1024, 5]] # 9
|
|
22
|
+
- [-1, 1, PSA, [1024]] # 10
|
|
23
|
+
|
|
24
|
+
# YOLOv8.0n head
|
|
25
|
+
head:
|
|
26
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
27
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
28
|
+
- [-1, 3, C2fCIB, [512, True]] # 13
|
|
29
|
+
|
|
30
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
31
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
32
|
+
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
|
33
|
+
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
36
|
+
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
|
37
|
+
|
|
38
|
+
- [-1, 1, SCDown, [512, 3, 2]]
|
|
39
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
40
|
+
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
|
41
|
+
|
|
42
|
+
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 80 # number of classes
|
|
6
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
7
|
+
# [depth, width, max_channels]
|
|
8
|
+
m: [0.67, 0.75, 768]
|
|
9
|
+
|
|
10
|
+
backbone:
|
|
11
|
+
# [from, repeats, module, args]
|
|
12
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
13
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
14
|
+
- [-1, 3, C2f, [128, True]]
|
|
15
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
16
|
+
- [-1, 6, C2f, [256, True]]
|
|
17
|
+
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
|
18
|
+
- [-1, 6, C2f, [512, True]]
|
|
19
|
+
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
|
20
|
+
- [-1, 3, C2fCIB, [1024, True]]
|
|
21
|
+
- [-1, 1, SPPF, [1024, 5]] # 9
|
|
22
|
+
- [-1, 1, PSA, [1024]] # 10
|
|
23
|
+
|
|
24
|
+
# YOLOv8.0n head
|
|
25
|
+
head:
|
|
26
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
27
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
28
|
+
- [-1, 3, C2f, [512]] # 13
|
|
29
|
+
|
|
30
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
31
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
32
|
+
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
|
33
|
+
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
36
|
+
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
|
37
|
+
|
|
38
|
+
- [-1, 1, SCDown, [512, 3, 2]]
|
|
39
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
40
|
+
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
|
41
|
+
|
|
42
|
+
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 80 # number of classes
|
|
6
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
7
|
+
# [depth, width, max_channels]
|
|
8
|
+
n: [0.33, 0.25, 1024]
|
|
9
|
+
|
|
10
|
+
backbone:
|
|
11
|
+
# [from, repeats, module, args]
|
|
12
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
13
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
14
|
+
- [-1, 3, C2f, [128, True]]
|
|
15
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
16
|
+
- [-1, 6, C2f, [256, True]]
|
|
17
|
+
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
|
18
|
+
- [-1, 6, C2f, [512, True]]
|
|
19
|
+
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
|
20
|
+
- [-1, 3, C2f, [1024, True]]
|
|
21
|
+
- [-1, 1, SPPF, [1024, 5]] # 9
|
|
22
|
+
- [-1, 1, PSA, [1024]] # 10
|
|
23
|
+
|
|
24
|
+
# YOLOv8.0n head
|
|
25
|
+
head:
|
|
26
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
27
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
28
|
+
- [-1, 3, C2f, [512]] # 13
|
|
29
|
+
|
|
30
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
31
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
32
|
+
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
|
33
|
+
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
36
|
+
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
|
|
37
|
+
|
|
38
|
+
- [-1, 1, SCDown, [512, 3, 2]]
|
|
39
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
40
|
+
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
|
|
41
|
+
|
|
42
|
+
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 80 # number of classes
|
|
6
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
7
|
+
# [depth, width, max_channels]
|
|
8
|
+
s: [0.33, 0.50, 1024]
|
|
9
|
+
|
|
10
|
+
backbone:
|
|
11
|
+
# [from, repeats, module, args]
|
|
12
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
13
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
14
|
+
- [-1, 3, C2f, [128, True]]
|
|
15
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
16
|
+
- [-1, 6, C2f, [256, True]]
|
|
17
|
+
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
|
18
|
+
- [-1, 6, C2f, [512, True]]
|
|
19
|
+
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
|
20
|
+
- [-1, 3, C2fCIB, [1024, True, True]]
|
|
21
|
+
- [-1, 1, SPPF, [1024, 5]] # 9
|
|
22
|
+
- [-1, 1, PSA, [1024]] # 10
|
|
23
|
+
|
|
24
|
+
# YOLOv8.0n head
|
|
25
|
+
head:
|
|
26
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
27
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
28
|
+
- [-1, 3, C2f, [512]] # 13
|
|
29
|
+
|
|
30
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
31
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
32
|
+
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
|
33
|
+
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
36
|
+
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
|
|
37
|
+
|
|
38
|
+
- [-1, 1, SCDown, [512, 3, 2]]
|
|
39
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
40
|
+
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
|
|
41
|
+
|
|
42
|
+
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 80 # number of classes
|
|
6
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
|
7
|
+
# [depth, width, max_channels]
|
|
8
|
+
x: [1.00, 1.25, 512]
|
|
9
|
+
|
|
10
|
+
backbone:
|
|
11
|
+
# [from, repeats, module, args]
|
|
12
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
13
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
14
|
+
- [-1, 3, C2f, [128, True]]
|
|
15
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
16
|
+
- [-1, 6, C2f, [256, True]]
|
|
17
|
+
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
|
18
|
+
- [-1, 6, C2fCIB, [512, True]]
|
|
19
|
+
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
|
20
|
+
- [-1, 3, C2fCIB, [1024, True]]
|
|
21
|
+
- [-1, 1, SPPF, [1024, 5]] # 9
|
|
22
|
+
- [-1, 1, PSA, [1024]] # 10
|
|
23
|
+
|
|
24
|
+
# YOLOv8.0n head
|
|
25
|
+
head:
|
|
26
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
27
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
28
|
+
- [-1, 3, C2fCIB, [512, True]] # 13
|
|
29
|
+
|
|
30
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
31
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
32
|
+
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
|
33
|
+
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
36
|
+
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
|
37
|
+
|
|
38
|
+
- [-1, 1, SCDown, [512, 3, 2]]
|
|
39
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
40
|
+
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
|
41
|
+
|
|
42
|
+
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
nc: 80 # number of classes
|
|
6
6
|
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
|
7
7
|
# [depth, width, max_channels]
|
|
8
|
-
n: [0.33, 0.25, 1024]
|
|
9
|
-
s: [0.33, 0.50, 1024]
|
|
10
|
-
m: [0.67, 0.75, 768]
|
|
11
|
-
l: [1.00, 1.00, 512]
|
|
12
|
-
x: [1.00, 1.25, 512]
|
|
8
|
+
n: [0.33, 0.25, 1024] # YOLOv8n-p6 summary (fused): 220 layers, 4976656 parameters, 42560 gradients, 8.7 GFLOPs
|
|
9
|
+
s: [0.33, 0.50, 1024] # YOLOv8s-p6 summary (fused): 220 layers, 17897168 parameters, 57920 gradients, 28.5 GFLOPs
|
|
10
|
+
m: [0.67, 0.75, 768] # YOLOv8m-p6 summary (fused): 285 layers, 44862352 parameters, 78400 gradients, 83.1 GFLOPs
|
|
11
|
+
l: [1.00, 1.00, 512] # YOLOv8l-p6 summary (fused): 350 layers, 62351440 parameters, 98880 gradients, 167.3 GFLOPs
|
|
12
|
+
x: [1.00, 1.25, 512] # YOLOv8x-p6 summary (fused): 350 layers, 97382352 parameters, 123456 gradients, 261.1 GFLOPs
|
|
13
13
|
|
|
14
14
|
# YOLOv8.0x6 backbone
|
|
15
15
|
backbone:
|
ultralytics/data/augment.py
CHANGED
|
@@ -1223,16 +1223,13 @@ def classify_transforms(
|
|
|
1223
1223
|
else:
|
|
1224
1224
|
# Resize the shortest edge to matching target dim for non-square target
|
|
1225
1225
|
tfl = [T.Resize(scale_size)]
|
|
1226
|
-
tfl
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
),
|
|
1234
|
-
]
|
|
1235
|
-
|
|
1226
|
+
tfl.extend(
|
|
1227
|
+
[
|
|
1228
|
+
T.CenterCrop(size),
|
|
1229
|
+
T.ToTensor(),
|
|
1230
|
+
T.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)),
|
|
1231
|
+
]
|
|
1232
|
+
)
|
|
1236
1233
|
return T.Compose(tfl)
|
|
1237
1234
|
|
|
1238
1235
|
|
|
@@ -1284,9 +1281,9 @@ def classify_augmentations(
|
|
|
1284
1281
|
ratio = tuple(ratio or (3.0 / 4.0, 4.0 / 3.0)) # default imagenet ratio range
|
|
1285
1282
|
primary_tfl = [T.RandomResizedCrop(size, scale=scale, ratio=ratio, interpolation=interpolation)]
|
|
1286
1283
|
if hflip > 0.0:
|
|
1287
|
-
primary_tfl
|
|
1284
|
+
primary_tfl.append(T.RandomHorizontalFlip(p=hflip))
|
|
1288
1285
|
if vflip > 0.0:
|
|
1289
|
-
primary_tfl
|
|
1286
|
+
primary_tfl.append(T.RandomVerticalFlip(p=vflip))
|
|
1290
1287
|
|
|
1291
1288
|
secondary_tfl = []
|
|
1292
1289
|
disable_color_jitter = False
|
|
@@ -1298,19 +1295,19 @@ def classify_augmentations(
|
|
|
1298
1295
|
|
|
1299
1296
|
if auto_augment == "randaugment":
|
|
1300
1297
|
if TORCHVISION_0_11:
|
|
1301
|
-
secondary_tfl
|
|
1298
|
+
secondary_tfl.append(T.RandAugment(interpolation=interpolation))
|
|
1302
1299
|
else:
|
|
1303
1300
|
LOGGER.warning('"auto_augment=randaugment" requires torchvision >= 0.11.0. Disabling it.')
|
|
1304
1301
|
|
|
1305
1302
|
elif auto_augment == "augmix":
|
|
1306
1303
|
if TORCHVISION_0_13:
|
|
1307
|
-
secondary_tfl
|
|
1304
|
+
secondary_tfl.append(T.AugMix(interpolation=interpolation))
|
|
1308
1305
|
else:
|
|
1309
1306
|
LOGGER.warning('"auto_augment=augmix" requires torchvision >= 0.13.0. Disabling it.')
|
|
1310
1307
|
|
|
1311
1308
|
elif auto_augment == "autoaugment":
|
|
1312
1309
|
if TORCHVISION_0_10:
|
|
1313
|
-
secondary_tfl
|
|
1310
|
+
secondary_tfl.append(T.AutoAugment(interpolation=interpolation))
|
|
1314
1311
|
else:
|
|
1315
1312
|
LOGGER.warning('"auto_augment=autoaugment" requires torchvision >= 0.10.0. Disabling it.')
|
|
1316
1313
|
|
|
@@ -1321,7 +1318,7 @@ def classify_augmentations(
|
|
|
1321
1318
|
)
|
|
1322
1319
|
|
|
1323
1320
|
if not disable_color_jitter:
|
|
1324
|
-
secondary_tfl
|
|
1321
|
+
secondary_tfl.append(T.ColorJitter(brightness=hsv_v, contrast=hsv_v, saturation=hsv_s, hue=hsv_h))
|
|
1325
1322
|
|
|
1326
1323
|
final_tfl = [
|
|
1327
1324
|
T.ToTensor(),
|
ultralytics/data/converter.py
CHANGED
|
@@ -329,8 +329,7 @@ def convert_coco(
|
|
|
329
329
|
|
|
330
330
|
if lvis:
|
|
331
331
|
with open((Path(save_dir) / json_file.name.replace("lvis_v1_", "").replace(".json", ".txt")), "a") as f:
|
|
332
|
-
for
|
|
333
|
-
f.write(f"{l}\n")
|
|
332
|
+
f.writelines(f"{line}\n" for line in image_txt)
|
|
334
333
|
|
|
335
334
|
LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")
|
|
336
335
|
|
|
@@ -534,25 +533,25 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
|
|
|
534
533
|
|
|
535
534
|
LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
|
|
536
535
|
sam_model = SAM(sam_model)
|
|
537
|
-
for
|
|
538
|
-
h, w =
|
|
539
|
-
boxes =
|
|
536
|
+
for label in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
|
|
537
|
+
h, w = label["shape"]
|
|
538
|
+
boxes = label["bboxes"]
|
|
540
539
|
if len(boxes) == 0: # skip empty labels
|
|
541
540
|
continue
|
|
542
541
|
boxes[:, [0, 2]] *= w
|
|
543
542
|
boxes[:, [1, 3]] *= h
|
|
544
|
-
im = cv2.imread(
|
|
543
|
+
im = cv2.imread(label["im_file"])
|
|
545
544
|
sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
|
|
546
|
-
|
|
545
|
+
label["segments"] = sam_results[0].masks.xyn
|
|
547
546
|
|
|
548
547
|
save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
|
|
549
548
|
save_dir.mkdir(parents=True, exist_ok=True)
|
|
550
|
-
for
|
|
549
|
+
for label in dataset.labels:
|
|
551
550
|
texts = []
|
|
552
|
-
lb_name = Path(
|
|
551
|
+
lb_name = Path(label["im_file"]).with_suffix(".txt").name
|
|
553
552
|
txt_file = save_dir / lb_name
|
|
554
|
-
cls =
|
|
555
|
-
for i, s in enumerate(
|
|
553
|
+
cls = label["cls"]
|
|
554
|
+
for i, s in enumerate(label["segments"]):
|
|
556
555
|
line = (int(cls[i]), *s.reshape(-1))
|
|
557
556
|
texts.append(("%g " * len(line)).rstrip() % line)
|
|
558
557
|
if texts:
|
ultralytics/data/split_dota.py
CHANGED
|
@@ -26,8 +26,8 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):
|
|
|
26
26
|
bbox2 (np.ndarray): Bounding boxes, (n ,4).
|
|
27
27
|
"""
|
|
28
28
|
polygon1 = polygon1.reshape(-1, 4, 2)
|
|
29
|
-
lt_point = np.min(polygon1, axis=-2)
|
|
30
|
-
rb_point = np.max(polygon1, axis=-2)
|
|
29
|
+
lt_point = np.min(polygon1, axis=-2) # left-top
|
|
30
|
+
rb_point = np.max(polygon1, axis=-2) # right-bottom
|
|
31
31
|
bbox1 = np.concatenate([lt_point, rb_point], axis=-1)
|
|
32
32
|
|
|
33
33
|
lt = np.maximum(bbox1[:, None, :2], bbox2[..., :2])
|
|
@@ -35,8 +35,8 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):
|
|
|
35
35
|
wh = np.clip(rb - lt, 0, np.inf)
|
|
36
36
|
h_overlaps = wh[..., 0] * wh[..., 1]
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
polygon2 = np.stack([
|
|
38
|
+
left, top, right, bottom = (bbox2[..., i] for i in range(4))
|
|
39
|
+
polygon2 = np.stack([left, top, right, top, right, bottom, left, bottom], axis=-1).reshape(-1, 4, 2)
|
|
40
40
|
|
|
41
41
|
sg_polys1 = [Polygon(p) for p in polygon1]
|
|
42
42
|
sg_polys2 = [Polygon(p) for p in polygon2]
|
ultralytics/engine/exporter.py
CHANGED
|
@@ -388,7 +388,7 @@ class Exporter:
|
|
|
388
388
|
"""YOLOv8 ONNX export."""
|
|
389
389
|
requirements = ["onnx>=1.12.0"]
|
|
390
390
|
if self.args.simplify:
|
|
391
|
-
requirements += ["onnxslim
|
|
391
|
+
requirements += ["onnxslim>=0.1.31", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
|
|
392
392
|
check_requirements(requirements)
|
|
393
393
|
import onnx # noqa
|
|
394
394
|
|
|
@@ -827,7 +827,7 @@ class Exporter:
|
|
|
827
827
|
"onnx>=1.12.0",
|
|
828
828
|
"onnx2tf>1.17.5,<=1.22.3",
|
|
829
829
|
"sng4onnx>=1.0.1",
|
|
830
|
-
"onnxslim
|
|
830
|
+
"onnxslim>=0.1.31",
|
|
831
831
|
"onnx_graphsurgeon>=0.3.26",
|
|
832
832
|
"tflite_support<=0.4.3" if IS_JETSON else "tflite_support", # fix ImportError 'GLIBCXX_3.4.29'
|
|
833
833
|
"flatbuffers>=23.5.26,<100", # update old 'flatbuffers' included inside tensorflow package
|
|
@@ -920,6 +920,7 @@ class Exporter:
|
|
|
920
920
|
@try_export
|
|
921
921
|
def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorFlow Lite:")):
|
|
922
922
|
"""YOLOv8 TensorFlow Lite export."""
|
|
923
|
+
# BUG https://github.com/ultralytics/ultralytics/issues/13436
|
|
923
924
|
import tensorflow as tf # noqa
|
|
924
925
|
|
|
925
926
|
LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
|
ultralytics/engine/model.py
CHANGED
|
@@ -384,8 +384,8 @@ class TinyViTBlock(nn.Module):
|
|
|
384
384
|
convolution.
|
|
385
385
|
"""
|
|
386
386
|
h, w = self.input_resolution
|
|
387
|
-
b,
|
|
388
|
-
assert
|
|
387
|
+
b, hw, c = x.shape # batch, height*width, channels
|
|
388
|
+
assert hw == h * w, "input feature has wrong size"
|
|
389
389
|
res_x = x
|
|
390
390
|
if h == self.window_size and w == self.window_size:
|
|
391
391
|
x = self.attn(x)
|
|
@@ -394,13 +394,13 @@ class TinyViTBlock(nn.Module):
|
|
|
394
394
|
pad_b = (self.window_size - h % self.window_size) % self.window_size
|
|
395
395
|
pad_r = (self.window_size - w % self.window_size) % self.window_size
|
|
396
396
|
padding = pad_b > 0 or pad_r > 0
|
|
397
|
-
|
|
398
397
|
if padding:
|
|
399
398
|
x = F.pad(x, (0, 0, 0, pad_r, 0, pad_b))
|
|
400
399
|
|
|
401
400
|
pH, pW = h + pad_b, w + pad_r
|
|
402
401
|
nH = pH // self.window_size
|
|
403
402
|
nW = pW // self.window_size
|
|
403
|
+
|
|
404
404
|
# Window partition
|
|
405
405
|
x = (
|
|
406
406
|
x.view(b, nH, self.window_size, nW, self.window_size, c)
|
|
@@ -408,19 +408,18 @@ class TinyViTBlock(nn.Module):
|
|
|
408
408
|
.reshape(b * nH * nW, self.window_size * self.window_size, c)
|
|
409
409
|
)
|
|
410
410
|
x = self.attn(x)
|
|
411
|
+
|
|
411
412
|
# Window reverse
|
|
412
413
|
x = x.view(b, nH, nW, self.window_size, self.window_size, c).transpose(2, 3).reshape(b, pH, pW, c)
|
|
413
|
-
|
|
414
414
|
if padding:
|
|
415
415
|
x = x[:, :h, :w].contiguous()
|
|
416
416
|
|
|
417
|
-
x = x.view(b,
|
|
417
|
+
x = x.view(b, hw, c)
|
|
418
418
|
|
|
419
419
|
x = res_x + self.drop_path(x)
|
|
420
|
-
|
|
421
420
|
x = x.transpose(1, 2).reshape(b, c, h, w)
|
|
422
421
|
x = self.local_conv(x)
|
|
423
|
-
x = x.view(b, c,
|
|
422
|
+
x = x.view(b, c, hw).transpose(1, 2)
|
|
424
423
|
|
|
425
424
|
return x + self.drop_path(self.mlp(x))
|
|
426
425
|
|
|
@@ -22,18 +22,22 @@ from .block import (
|
|
|
22
22
|
C2,
|
|
23
23
|
C3,
|
|
24
24
|
C3TR,
|
|
25
|
+
CIB,
|
|
25
26
|
DFL,
|
|
26
27
|
ELAN1,
|
|
28
|
+
PSA,
|
|
27
29
|
SPP,
|
|
28
30
|
SPPELAN,
|
|
29
31
|
SPPF,
|
|
30
32
|
AConv,
|
|
31
33
|
ADown,
|
|
34
|
+
Attention,
|
|
32
35
|
BNContrastiveHead,
|
|
33
36
|
Bottleneck,
|
|
34
37
|
BottleneckCSP,
|
|
35
38
|
C2f,
|
|
36
39
|
C2fAttn,
|
|
40
|
+
C2fCIB,
|
|
37
41
|
C3Ghost,
|
|
38
42
|
C3x,
|
|
39
43
|
CBFuse,
|
|
@@ -46,7 +50,9 @@ from .block import (
|
|
|
46
50
|
Proto,
|
|
47
51
|
RepC3,
|
|
48
52
|
RepNCSPELAN4,
|
|
53
|
+
RepVGGDW,
|
|
49
54
|
ResNetLayer,
|
|
55
|
+
SCDown,
|
|
50
56
|
)
|
|
51
57
|
from .conv import (
|
|
52
58
|
CBAM,
|
|
@@ -63,7 +69,7 @@ from .conv import (
|
|
|
63
69
|
RepConv,
|
|
64
70
|
SpatialAttention,
|
|
65
71
|
)
|
|
66
|
-
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect
|
|
72
|
+
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect, v10Detect
|
|
67
73
|
from .transformer import (
|
|
68
74
|
AIFI,
|
|
69
75
|
MLP,
|
|
@@ -127,6 +133,7 @@ __all__ = (
|
|
|
127
133
|
"ResNetLayer",
|
|
128
134
|
"OBB",
|
|
129
135
|
"WorldDetect",
|
|
136
|
+
"v10Detect",
|
|
130
137
|
"ImagePoolingAttn",
|
|
131
138
|
"ContrastiveHead",
|
|
132
139
|
"BNContrastiveHead",
|
|
@@ -137,4 +144,10 @@ __all__ = (
|
|
|
137
144
|
"CBLinear",
|
|
138
145
|
"AConv",
|
|
139
146
|
"ELAN1",
|
|
147
|
+
"RepVGGDW",
|
|
148
|
+
"CIB",
|
|
149
|
+
"C2fCIB",
|
|
150
|
+
"Attention",
|
|
151
|
+
"PSA",
|
|
152
|
+
"SCDown",
|
|
140
153
|
)
|