ultralytics 8.1.29__py3-none-any.whl → 8.3.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +22 -0
- tests/conftest.py +83 -0
- tests/test_cli.py +122 -0
- tests/test_cuda.py +155 -0
- tests/test_engine.py +131 -0
- tests/test_exports.py +216 -0
- tests/test_integrations.py +150 -0
- tests/test_python.py +615 -0
- tests/test_solutions.py +94 -0
- ultralytics/__init__.py +11 -8
- ultralytics/cfg/__init__.py +569 -131
- ultralytics/cfg/datasets/Argoverse.yaml +2 -1
- ultralytics/cfg/datasets/DOTAv1.5.yaml +3 -2
- ultralytics/cfg/datasets/DOTAv1.yaml +3 -2
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +3 -2
- ultralytics/cfg/datasets/ImageNet.yaml +2 -1
- ultralytics/cfg/datasets/Objects365.yaml +5 -4
- ultralytics/cfg/datasets/SKU-110K.yaml +2 -1
- ultralytics/cfg/datasets/VOC.yaml +3 -2
- ultralytics/cfg/datasets/VisDrone.yaml +6 -5
- ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
- ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
- ultralytics/cfg/datasets/carparts-seg.yaml +3 -2
- ultralytics/cfg/datasets/coco-pose.yaml +7 -6
- ultralytics/cfg/datasets/coco.yaml +3 -2
- ultralytics/cfg/datasets/coco128-seg.yaml +4 -3
- ultralytics/cfg/datasets/coco128.yaml +4 -3
- ultralytics/cfg/datasets/coco8-pose.yaml +3 -2
- ultralytics/cfg/datasets/coco8-seg.yaml +3 -2
- ultralytics/cfg/datasets/coco8.yaml +3 -2
- ultralytics/cfg/datasets/crack-seg.yaml +3 -2
- ultralytics/cfg/datasets/dog-pose.yaml +24 -0
- ultralytics/cfg/datasets/dota8.yaml +3 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
- ultralytics/cfg/datasets/lvis.yaml +1236 -0
- ultralytics/cfg/datasets/medical-pills.yaml +22 -0
- ultralytics/cfg/datasets/open-images-v7.yaml +2 -1
- ultralytics/cfg/datasets/package-seg.yaml +5 -4
- ultralytics/cfg/datasets/signature.yaml +21 -0
- ultralytics/cfg/datasets/tiger-pose.yaml +3 -2
- ultralytics/cfg/datasets/xView.yaml +2 -1
- ultralytics/cfg/default.yaml +14 -11
- ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +24 -0
- ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
- ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
- ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
- ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
- ultralytics/cfg/models/11/yolo11.yaml +50 -0
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +5 -2
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +5 -2
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +5 -2
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +5 -2
- ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
- ultralytics/cfg/models/v3/yolov3-spp.yaml +5 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +5 -2
- ultralytics/cfg/models/v3/yolov3.yaml +5 -2
- ultralytics/cfg/models/v5/yolov5-p6.yaml +5 -2
- ultralytics/cfg/models/v5/yolov5.yaml +5 -2
- ultralytics/cfg/models/v6/yolov6.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-cls.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +6 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +6 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-p2.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-p6.yaml +10 -7
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-pose.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-seg.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-world.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -2
- ultralytics/cfg/models/v8/yolov8.yaml +5 -2
- ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9c.yaml +30 -25
- ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
- ultralytics/cfg/models/v9/yolov9e.yaml +46 -42
- ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
- ultralytics/cfg/solutions/default.yaml +24 -0
- ultralytics/cfg/trackers/botsort.yaml +8 -5
- ultralytics/cfg/trackers/bytetrack.yaml +8 -5
- ultralytics/data/__init__.py +14 -3
- ultralytics/data/annotator.py +37 -15
- ultralytics/data/augment.py +1783 -289
- ultralytics/data/base.py +62 -27
- ultralytics/data/build.py +36 -8
- ultralytics/data/converter.py +196 -36
- ultralytics/data/dataset.py +233 -94
- ultralytics/data/loaders.py +199 -96
- ultralytics/data/split_dota.py +39 -29
- ultralytics/data/utils.py +110 -40
- ultralytics/engine/__init__.py +1 -1
- ultralytics/engine/exporter.py +569 -242
- ultralytics/engine/model.py +604 -252
- ultralytics/engine/predictor.py +22 -11
- ultralytics/engine/results.py +1228 -218
- ultralytics/engine/trainer.py +190 -129
- ultralytics/engine/tuner.py +18 -18
- ultralytics/engine/validator.py +18 -15
- ultralytics/hub/__init__.py +31 -13
- ultralytics/hub/auth.py +11 -7
- ultralytics/hub/google/__init__.py +159 -0
- ultralytics/hub/session.py +128 -94
- ultralytics/hub/utils.py +20 -21
- ultralytics/models/__init__.py +4 -2
- ultralytics/models/fastsam/__init__.py +2 -3
- ultralytics/models/fastsam/model.py +26 -4
- ultralytics/models/fastsam/predict.py +127 -63
- ultralytics/models/fastsam/utils.py +1 -44
- ultralytics/models/fastsam/val.py +1 -1
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +21 -10
- ultralytics/models/nas/predict.py +3 -6
- ultralytics/models/nas/val.py +4 -4
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +6 -8
- ultralytics/models/rtdetr/train.py +6 -2
- ultralytics/models/rtdetr/val.py +3 -3
- ultralytics/models/sam/__init__.py +3 -3
- ultralytics/models/sam/amg.py +29 -23
- ultralytics/models/sam/build.py +211 -13
- ultralytics/models/sam/model.py +91 -30
- ultralytics/models/sam/modules/__init__.py +1 -1
- ultralytics/models/sam/modules/blocks.py +1129 -0
- ultralytics/models/sam/modules/decoders.py +381 -53
- ultralytics/models/sam/modules/encoders.py +515 -324
- ultralytics/models/sam/modules/memory_attention.py +237 -0
- ultralytics/models/sam/modules/sam.py +969 -21
- ultralytics/models/sam/modules/tiny_encoder.py +425 -154
- ultralytics/models/sam/modules/transformer.py +159 -60
- ultralytics/models/sam/modules/utils.py +293 -0
- ultralytics/models/sam/predict.py +1263 -132
- ultralytics/models/utils/__init__.py +1 -1
- ultralytics/models/utils/loss.py +36 -24
- ultralytics/models/utils/ops.py +3 -7
- ultralytics/models/yolo/__init__.py +3 -3
- ultralytics/models/yolo/classify/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +7 -8
- ultralytics/models/yolo/classify/train.py +17 -22
- ultralytics/models/yolo/classify/val.py +8 -4
- ultralytics/models/yolo/detect/__init__.py +1 -1
- ultralytics/models/yolo/detect/predict.py +3 -5
- ultralytics/models/yolo/detect/train.py +11 -4
- ultralytics/models/yolo/detect/val.py +90 -52
- ultralytics/models/yolo/model.py +14 -9
- ultralytics/models/yolo/obb/__init__.py +1 -1
- ultralytics/models/yolo/obb/predict.py +2 -2
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +41 -23
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +3 -5
- ultralytics/models/yolo/pose/train.py +2 -2
- ultralytics/models/yolo/pose/val.py +51 -17
- ultralytics/models/yolo/segment/__init__.py +1 -1
- ultralytics/models/yolo/segment/predict.py +3 -5
- ultralytics/models/yolo/segment/train.py +2 -2
- ultralytics/models/yolo/segment/val.py +60 -19
- ultralytics/models/yolo/world/__init__.py +5 -0
- ultralytics/models/yolo/world/train.py +92 -0
- ultralytics/models/yolo/world/train_world.py +109 -0
- ultralytics/nn/__init__.py +1 -1
- ultralytics/nn/autobackend.py +228 -93
- ultralytics/nn/modules/__init__.py +39 -14
- ultralytics/nn/modules/activation.py +21 -0
- ultralytics/nn/modules/block.py +526 -66
- ultralytics/nn/modules/conv.py +24 -7
- ultralytics/nn/modules/head.py +177 -34
- ultralytics/nn/modules/transformer.py +6 -5
- ultralytics/nn/modules/utils.py +1 -2
- ultralytics/nn/tasks.py +225 -77
- ultralytics/solutions/__init__.py +30 -1
- ultralytics/solutions/ai_gym.py +96 -143
- ultralytics/solutions/analytics.py +247 -0
- ultralytics/solutions/distance_calculation.py +78 -135
- ultralytics/solutions/heatmap.py +93 -247
- ultralytics/solutions/object_counter.py +184 -259
- ultralytics/solutions/parking_management.py +246 -0
- ultralytics/solutions/queue_management.py +112 -0
- ultralytics/solutions/region_counter.py +116 -0
- ultralytics/solutions/security_alarm.py +144 -0
- ultralytics/solutions/solutions.py +178 -0
- ultralytics/solutions/speed_estimation.py +86 -174
- ultralytics/solutions/streamlit_inference.py +190 -0
- ultralytics/solutions/trackzone.py +68 -0
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +32 -13
- ultralytics/trackers/bot_sort.py +61 -28
- ultralytics/trackers/byte_tracker.py +83 -51
- ultralytics/trackers/track.py +21 -6
- ultralytics/trackers/utils/__init__.py +1 -1
- ultralytics/trackers/utils/gmc.py +62 -48
- ultralytics/trackers/utils/kalman_filter.py +166 -35
- ultralytics/trackers/utils/matching.py +40 -21
- ultralytics/utils/__init__.py +511 -239
- ultralytics/utils/autobatch.py +40 -22
- ultralytics/utils/benchmarks.py +266 -85
- ultralytics/utils/callbacks/__init__.py +1 -1
- ultralytics/utils/callbacks/base.py +1 -3
- ultralytics/utils/callbacks/clearml.py +7 -6
- ultralytics/utils/callbacks/comet.py +39 -17
- ultralytics/utils/callbacks/dvc.py +1 -1
- ultralytics/utils/callbacks/hub.py +16 -16
- ultralytics/utils/callbacks/mlflow.py +28 -24
- ultralytics/utils/callbacks/neptune.py +6 -2
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +18 -18
- ultralytics/utils/callbacks/wb.py +27 -20
- ultralytics/utils/checks.py +160 -100
- ultralytics/utils/dist.py +2 -1
- ultralytics/utils/downloads.py +40 -34
- ultralytics/utils/errors.py +1 -1
- ultralytics/utils/files.py +72 -38
- ultralytics/utils/instance.py +41 -19
- ultralytics/utils/loss.py +83 -55
- ultralytics/utils/metrics.py +61 -56
- ultralytics/utils/ops.py +94 -89
- ultralytics/utils/patches.py +30 -14
- ultralytics/utils/plotting.py +600 -269
- ultralytics/utils/tal.py +67 -26
- ultralytics/utils/torch_utils.py +302 -102
- ultralytics/utils/triton.py +2 -1
- ultralytics/utils/tuner.py +21 -12
- ultralytics-8.3.62.dist-info/METADATA +370 -0
- ultralytics-8.3.62.dist-info/RECORD +241 -0
- {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/WHEEL +1 -1
- ultralytics/data/explorer/__init__.py +0 -5
- ultralytics/data/explorer/explorer.py +0 -472
- ultralytics/data/explorer/gui/__init__.py +0 -1
- ultralytics/data/explorer/gui/dash.py +0 -268
- ultralytics/data/explorer/utils.py +0 -166
- ultralytics/models/fastsam/prompt.py +0 -357
- ultralytics-8.1.29.dist-info/METADATA +0 -373
- ultralytics-8.1.29.dist-info/RECORD +0 -197
- {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/LICENSE +0 -0
- {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/top_level.txt +0 -0
ultralytics/nn/modules/conv.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Ultralytics
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Convolution modules."""
|
3
3
|
|
4
4
|
import math
|
@@ -21,6 +21,7 @@ __all__ = (
|
|
21
21
|
"CBAM",
|
22
22
|
"Concat",
|
23
23
|
"RepConv",
|
24
|
+
"Index",
|
24
25
|
)
|
25
26
|
|
26
27
|
|
@@ -50,7 +51,7 @@ class Conv(nn.Module):
|
|
50
51
|
return self.act(self.bn(self.conv(x)))
|
51
52
|
|
52
53
|
def forward_fuse(self, x):
|
53
|
-
"""
|
54
|
+
"""Apply convolution and activation without batch normalization."""
|
54
55
|
return self.act(self.conv(x))
|
55
56
|
|
56
57
|
|
@@ -158,9 +159,7 @@ class GhostConv(nn.Module):
|
|
158
159
|
"""Ghost Convolution https://github.com/huawei-noah/ghostnet."""
|
159
160
|
|
160
161
|
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
|
161
|
-
"""Initializes
|
162
|
-
activation.
|
163
|
-
"""
|
162
|
+
"""Initializes Ghost Convolution module with primary and cheap operations for efficient feature learning."""
|
164
163
|
super().__init__()
|
165
164
|
c_ = c2 // 2 # hidden channels
|
166
165
|
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
|
@@ -211,7 +210,8 @@ class RepConv(nn.Module):
|
|
211
210
|
kernelid, biasid = self._fuse_bn_tensor(self.bn)
|
212
211
|
return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
|
213
212
|
|
214
|
-
|
213
|
+
@staticmethod
|
214
|
+
def _pad_1x1_to_3x3_tensor(kernel1x1):
|
215
215
|
"""Pads a 1x1 tensor to a 3x3 tensor."""
|
216
216
|
if kernel1x1 is None:
|
217
217
|
return 0
|
@@ -296,7 +296,7 @@ class SpatialAttention(nn.Module):
|
|
296
296
|
def __init__(self, kernel_size=7):
|
297
297
|
"""Initialize Spatial-attention module with kernel size argument."""
|
298
298
|
super().__init__()
|
299
|
-
assert kernel_size in
|
299
|
+
assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
|
300
300
|
padding = 3 if kernel_size == 7 else 1
|
301
301
|
self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
|
302
302
|
self.act = nn.Sigmoid()
|
@@ -331,3 +331,20 @@ class Concat(nn.Module):
|
|
331
331
|
def forward(self, x):
|
332
332
|
"""Forward pass for the YOLOv8 mask Proto module."""
|
333
333
|
return torch.cat(x, self.d)
|
334
|
+
|
335
|
+
|
336
|
+
class Index(nn.Module):
|
337
|
+
"""Returns a particular index of the input."""
|
338
|
+
|
339
|
+
def __init__(self, c1, c2, index=0):
|
340
|
+
"""Returns a particular index of the input."""
|
341
|
+
super().__init__()
|
342
|
+
self.index = index
|
343
|
+
|
344
|
+
def forward(self, x):
|
345
|
+
"""
|
346
|
+
Forward pass.
|
347
|
+
|
348
|
+
Expects a list of tensors as input.
|
349
|
+
"""
|
350
|
+
return x[self.index]
|
ultralytics/nn/modules/head.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
# Ultralytics
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Model head modules."""
|
3
3
|
|
4
|
+
import copy
|
4
5
|
import math
|
5
6
|
|
6
7
|
import torch
|
@@ -8,25 +9,30 @@ import torch.nn as nn
|
|
8
9
|
from torch.nn.init import constant_, xavier_uniform_
|
9
10
|
|
10
11
|
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
|
11
|
-
|
12
|
-
from .
|
12
|
+
|
13
|
+
from .block import DFL, BNContrastiveHead, ContrastiveHead, Proto
|
14
|
+
from .conv import Conv, DWConv
|
13
15
|
from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
|
14
16
|
from .utils import bias_init_with_prob, linear_init
|
15
17
|
|
16
|
-
__all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"
|
18
|
+
__all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder", "v10Detect"
|
17
19
|
|
18
20
|
|
19
21
|
class Detect(nn.Module):
|
20
|
-
"""
|
22
|
+
"""YOLO Detect head for detection models."""
|
21
23
|
|
22
24
|
dynamic = False # force grid reconstruction
|
23
25
|
export = False # export mode
|
26
|
+
format = None # export format
|
27
|
+
end2end = False # end2end
|
28
|
+
max_det = 300 # max_det
|
24
29
|
shape = None
|
25
30
|
anchors = torch.empty(0) # init
|
26
31
|
strides = torch.empty(0) # init
|
32
|
+
legacy = False # backward compatibility for v3/v5/v8/v9 models
|
27
33
|
|
28
34
|
def __init__(self, nc=80, ch=()):
|
29
|
-
"""Initializes the
|
35
|
+
"""Initializes the YOLO detection layer with specified number of classes and channels."""
|
30
36
|
super().__init__()
|
31
37
|
self.nc = nc # number of classes
|
32
38
|
self.nl = len(ch) # number of detection layers
|
@@ -37,30 +43,76 @@ class Detect(nn.Module):
|
|
37
43
|
self.cv2 = nn.ModuleList(
|
38
44
|
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch
|
39
45
|
)
|
40
|
-
self.cv3 =
|
46
|
+
self.cv3 = (
|
47
|
+
nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
|
48
|
+
if self.legacy
|
49
|
+
else nn.ModuleList(
|
50
|
+
nn.Sequential(
|
51
|
+
nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)),
|
52
|
+
nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)),
|
53
|
+
nn.Conv2d(c3, self.nc, 1),
|
54
|
+
)
|
55
|
+
for x in ch
|
56
|
+
)
|
57
|
+
)
|
41
58
|
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
|
42
59
|
|
60
|
+
if self.end2end:
|
61
|
+
self.one2one_cv2 = copy.deepcopy(self.cv2)
|
62
|
+
self.one2one_cv3 = copy.deepcopy(self.cv3)
|
63
|
+
|
43
64
|
def forward(self, x):
|
44
65
|
"""Concatenates and returns predicted bounding boxes and class probabilities."""
|
66
|
+
if self.end2end:
|
67
|
+
return self.forward_end2end(x)
|
68
|
+
|
45
69
|
for i in range(self.nl):
|
46
70
|
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
|
47
71
|
if self.training: # Training path
|
48
72
|
return x
|
73
|
+
y = self._inference(x)
|
74
|
+
return y if self.export else (y, x)
|
75
|
+
|
76
|
+
def forward_end2end(self, x):
|
77
|
+
"""
|
78
|
+
Performs forward pass of the v10Detect module.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
x (tensor): Input tensor.
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
(dict, tensor): If not in training mode, returns a dictionary containing the outputs of both one2many and one2one detections.
|
85
|
+
If in training mode, returns a dictionary containing the outputs of one2many and one2one detections separately.
|
86
|
+
"""
|
87
|
+
x_detach = [xi.detach() for xi in x]
|
88
|
+
one2one = [
|
89
|
+
torch.cat((self.one2one_cv2[i](x_detach[i]), self.one2one_cv3[i](x_detach[i])), 1) for i in range(self.nl)
|
90
|
+
]
|
91
|
+
for i in range(self.nl):
|
92
|
+
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
|
93
|
+
if self.training: # Training path
|
94
|
+
return {"one2many": x, "one2one": one2one}
|
95
|
+
|
96
|
+
y = self._inference(one2one)
|
97
|
+
y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
|
98
|
+
return y if self.export else (y, {"one2many": x, "one2one": one2one})
|
49
99
|
|
100
|
+
def _inference(self, x):
|
101
|
+
"""Decode predicted bounding boxes and class probabilities based on multiple-level feature maps."""
|
50
102
|
# Inference path
|
51
103
|
shape = x[0].shape # BCHW
|
52
104
|
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
|
53
|
-
if self.dynamic or self.shape != shape:
|
105
|
+
if self.format != "imx" and (self.dynamic or self.shape != shape):
|
54
106
|
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
55
107
|
self.shape = shape
|
56
108
|
|
57
|
-
if self.export and self.format in
|
109
|
+
if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}: # avoid TF FlexSplitV ops
|
58
110
|
box = x_cat[:, : self.reg_max * 4]
|
59
111
|
cls = x_cat[:, self.reg_max * 4 :]
|
60
112
|
else:
|
61
113
|
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
|
62
114
|
|
63
|
-
if self.export and self.format in
|
115
|
+
if self.export and self.format in {"tflite", "edgetpu"}:
|
64
116
|
# Precompute normalization factor to increase numerical stability
|
65
117
|
# See https://github.com/ultralytics/ultralytics/issues/7371
|
66
118
|
grid_h = shape[2]
|
@@ -68,11 +120,15 @@ class Detect(nn.Module):
|
|
68
120
|
grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
|
69
121
|
norm = self.strides / (self.stride[0] * grid_size)
|
70
122
|
dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
|
123
|
+
elif self.export and self.format == "imx":
|
124
|
+
dbox = self.decode_bboxes(
|
125
|
+
self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
|
126
|
+
)
|
127
|
+
return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
|
71
128
|
else:
|
72
129
|
dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
|
73
130
|
|
74
|
-
|
75
|
-
return y if self.export else (y, x)
|
131
|
+
return torch.cat((dbox, cls.sigmoid()), 1)
|
76
132
|
|
77
133
|
def bias_init(self):
|
78
134
|
"""Initialize Detect() biases, WARNING: requires stride availability."""
|
@@ -82,14 +138,42 @@ class Detect(nn.Module):
|
|
82
138
|
for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
|
83
139
|
a[-1].bias.data[:] = 1.0 # box
|
84
140
|
b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
141
|
+
if self.end2end:
|
142
|
+
for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride): # from
|
143
|
+
a[-1].bias.data[:] = 1.0 # box
|
144
|
+
b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
85
145
|
|
86
|
-
def decode_bboxes(self, bboxes, anchors):
|
146
|
+
def decode_bboxes(self, bboxes, anchors, xywh=True):
|
87
147
|
"""Decode bounding boxes."""
|
88
|
-
return dist2bbox(bboxes, anchors, xywh=
|
148
|
+
return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1)
|
149
|
+
|
150
|
+
@staticmethod
|
151
|
+
def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
|
152
|
+
"""
|
153
|
+
Post-processes YOLO model predictions.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
|
157
|
+
format [x, y, w, h, class_probs].
|
158
|
+
max_det (int): Maximum detections per image.
|
159
|
+
nc (int, optional): Number of classes. Default: 80.
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
(torch.Tensor): Processed predictions with shape (batch_size, min(max_det, num_anchors), 6) and last
|
163
|
+
dimension format [x, y, w, h, max_class_prob, class_index].
|
164
|
+
"""
|
165
|
+
batch_size, anchors, _ = preds.shape # i.e. shape(16,8400,84)
|
166
|
+
boxes, scores = preds.split([4, nc], dim=-1)
|
167
|
+
index = scores.amax(dim=-1).topk(min(max_det, anchors))[1].unsqueeze(-1)
|
168
|
+
boxes = boxes.gather(dim=1, index=index.repeat(1, 1, 4))
|
169
|
+
scores = scores.gather(dim=1, index=index.repeat(1, 1, nc))
|
170
|
+
scores, index = scores.flatten(1).topk(min(max_det, anchors))
|
171
|
+
i = torch.arange(batch_size)[..., None] # batch indices
|
172
|
+
return torch.cat([boxes[i, index // nc], scores[..., None], (index % nc)[..., None].float()], dim=-1)
|
89
173
|
|
90
174
|
|
91
175
|
class Segment(Detect):
|
92
|
-
"""
|
176
|
+
"""YOLO Segment head for segmentation models."""
|
93
177
|
|
94
178
|
def __init__(self, nc=80, nm=32, npr=256, ch=()):
|
95
179
|
"""Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers."""
|
@@ -97,7 +181,6 @@ class Segment(Detect):
|
|
97
181
|
self.nm = nm # number of masks
|
98
182
|
self.npr = npr # number of protos
|
99
183
|
self.proto = Proto(ch[0], self.npr, self.nm) # protos
|
100
|
-
self.detect = Detect.forward
|
101
184
|
|
102
185
|
c4 = max(ch[0] // 4, self.nm)
|
103
186
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
|
@@ -108,20 +191,19 @@ class Segment(Detect):
|
|
108
191
|
bs = p.shape[0] # batch size
|
109
192
|
|
110
193
|
mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients
|
111
|
-
x =
|
194
|
+
x = Detect.forward(self, x)
|
112
195
|
if self.training:
|
113
196
|
return x, mc, p
|
114
197
|
return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
|
115
198
|
|
116
199
|
|
117
200
|
class OBB(Detect):
|
118
|
-
"""
|
201
|
+
"""YOLO OBB detection head for detection with rotation models."""
|
119
202
|
|
120
203
|
def __init__(self, nc=80, ne=1, ch=()):
|
121
204
|
"""Initialize OBB with number of classes `nc` and layer channels `ch`."""
|
122
205
|
super().__init__(nc, ch)
|
123
206
|
self.ne = ne # number of extra parameters
|
124
|
-
self.detect = Detect.forward
|
125
207
|
|
126
208
|
c4 = max(ch[0] // 4, self.ne)
|
127
209
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
|
@@ -135,7 +217,7 @@ class OBB(Detect):
|
|
135
217
|
# angle = angle.sigmoid() * math.pi / 2 # [0, pi/2]
|
136
218
|
if not self.training:
|
137
219
|
self.angle = angle
|
138
|
-
x =
|
220
|
+
x = Detect.forward(self, x)
|
139
221
|
if self.training:
|
140
222
|
return x, angle
|
141
223
|
return torch.cat([x, angle], 1) if self.export else (torch.cat([x[0], angle], 1), (x[1], angle))
|
@@ -146,14 +228,13 @@ class OBB(Detect):
|
|
146
228
|
|
147
229
|
|
148
230
|
class Pose(Detect):
|
149
|
-
"""
|
231
|
+
"""YOLO Pose head for keypoints models."""
|
150
232
|
|
151
233
|
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
|
152
234
|
"""Initialize YOLO network with default parameters and Convolutional Layers."""
|
153
235
|
super().__init__(nc, ch)
|
154
236
|
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
155
237
|
self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
|
156
|
-
self.detect = Detect.forward
|
157
238
|
|
158
239
|
c4 = max(ch[0] // 4, self.nk)
|
159
240
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
|
@@ -162,7 +243,7 @@ class Pose(Detect):
|
|
162
243
|
"""Perform forward pass through YOLO model and return predictions."""
|
163
244
|
bs = x[0].shape[0] # batch size
|
164
245
|
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
|
165
|
-
x =
|
246
|
+
x = Detect.forward(self, x)
|
166
247
|
if self.training:
|
167
248
|
return x, kpt
|
168
249
|
pred_kpt = self.kpts_decode(bs, kpt)
|
@@ -171,9 +252,21 @@ class Pose(Detect):
|
|
171
252
|
def kpts_decode(self, bs, kpts):
|
172
253
|
"""Decodes keypoints."""
|
173
254
|
ndim = self.kpt_shape[1]
|
174
|
-
if self.export:
|
175
|
-
|
176
|
-
|
255
|
+
if self.export:
|
256
|
+
if self.format in {
|
257
|
+
"tflite",
|
258
|
+
"edgetpu",
|
259
|
+
}: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
|
260
|
+
# Precompute normalization factor to increase numerical stability
|
261
|
+
y = kpts.view(bs, *self.kpt_shape, -1)
|
262
|
+
grid_h, grid_w = self.shape[2], self.shape[3]
|
263
|
+
grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
|
264
|
+
norm = self.strides / (self.stride[0] * grid_size)
|
265
|
+
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
|
266
|
+
else:
|
267
|
+
# NCNN fix
|
268
|
+
y = kpts.view(bs, *self.kpt_shape, -1)
|
269
|
+
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
|
177
270
|
if ndim == 3:
|
178
271
|
a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
|
179
272
|
return a.view(bs, self.nk, -1)
|
@@ -187,12 +280,12 @@ class Pose(Detect):
|
|
187
280
|
|
188
281
|
|
189
282
|
class Classify(nn.Module):
|
190
|
-
"""
|
283
|
+
"""YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
|
284
|
+
|
285
|
+
export = False # export mode
|
191
286
|
|
192
287
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
|
193
|
-
"""Initializes
|
194
|
-
padding, and groups.
|
195
|
-
"""
|
288
|
+
"""Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape."""
|
196
289
|
super().__init__()
|
197
290
|
c_ = 1280 # efficientnet_b0 size
|
198
291
|
self.conv = Conv(c1, c_, k, s, p, g)
|
@@ -205,12 +298,17 @@ class Classify(nn.Module):
|
|
205
298
|
if isinstance(x, list):
|
206
299
|
x = torch.cat(x, 1)
|
207
300
|
x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
|
208
|
-
|
301
|
+
if self.training:
|
302
|
+
return x
|
303
|
+
y = x.softmax(1) # get final output
|
304
|
+
return y if self.export else (y, x)
|
209
305
|
|
210
306
|
|
211
307
|
class WorldDetect(Detect):
|
308
|
+
"""Head for integrating YOLO detection models with semantic understanding from text embeddings."""
|
309
|
+
|
212
310
|
def __init__(self, nc=80, embed=512, with_bn=False, ch=()):
|
213
|
-
"""Initialize
|
311
|
+
"""Initialize YOLO detection layer with nc classes and layer channels ch."""
|
214
312
|
super().__init__(nc, ch)
|
215
313
|
c3 = max(ch[0], min(self.nc, 100))
|
216
314
|
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
|
@@ -230,13 +328,13 @@ class WorldDetect(Detect):
|
|
230
328
|
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
231
329
|
self.shape = shape
|
232
330
|
|
233
|
-
if self.export and self.format in
|
331
|
+
if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}: # avoid TF FlexSplitV ops
|
234
332
|
box = x_cat[:, : self.reg_max * 4]
|
235
333
|
cls = x_cat[:, self.reg_max * 4 :]
|
236
334
|
else:
|
237
335
|
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
|
238
336
|
|
239
|
-
if self.export and self.format in
|
337
|
+
if self.export and self.format in {"tflite", "edgetpu"}:
|
240
338
|
# Precompute normalization factor to increase numerical stability
|
241
339
|
# See https://github.com/ultralytics/ultralytics/issues/7371
|
242
340
|
grid_h = shape[2]
|
@@ -250,6 +348,15 @@ class WorldDetect(Detect):
|
|
250
348
|
y = torch.cat((dbox, cls.sigmoid()), 1)
|
251
349
|
return y if self.export else (y, x)
|
252
350
|
|
351
|
+
def bias_init(self):
|
352
|
+
"""Initialize Detect() biases, WARNING: requires stride availability."""
|
353
|
+
m = self # self.model[-1] # Detect() module
|
354
|
+
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
|
355
|
+
# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
|
356
|
+
for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
|
357
|
+
a[-1].bias.data[:] = 1.0 # box
|
358
|
+
# b[-1].bias.data[:] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
359
|
+
|
253
360
|
|
254
361
|
class RTDETRDecoder(nn.Module):
|
255
362
|
"""
|
@@ -480,3 +587,39 @@ class RTDETRDecoder(nn.Module):
|
|
480
587
|
xavier_uniform_(self.query_pos_head.layers[1].weight)
|
481
588
|
for layer in self.input_proj:
|
482
589
|
xavier_uniform_(layer[0].weight)
|
590
|
+
|
591
|
+
|
592
|
+
class v10Detect(Detect):
|
593
|
+
"""
|
594
|
+
v10 Detection head from https://arxiv.org/pdf/2405.14458.
|
595
|
+
|
596
|
+
Args:
|
597
|
+
nc (int): Number of classes.
|
598
|
+
ch (tuple): Tuple of channel sizes.
|
599
|
+
|
600
|
+
Attributes:
|
601
|
+
max_det (int): Maximum number of detections.
|
602
|
+
|
603
|
+
Methods:
|
604
|
+
__init__(self, nc=80, ch=()): Initializes the v10Detect object.
|
605
|
+
forward(self, x): Performs forward pass of the v10Detect module.
|
606
|
+
bias_init(self): Initializes biases of the Detect module.
|
607
|
+
|
608
|
+
"""
|
609
|
+
|
610
|
+
end2end = True
|
611
|
+
|
612
|
+
def __init__(self, nc=80, ch=()):
|
613
|
+
"""Initializes the v10Detect object with the specified number of classes and input channels."""
|
614
|
+
super().__init__(nc, ch)
|
615
|
+
c3 = max(ch[0], min(self.nc, 100)) # channels
|
616
|
+
# Light cls head
|
617
|
+
self.cv3 = nn.ModuleList(
|
618
|
+
nn.Sequential(
|
619
|
+
nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)),
|
620
|
+
nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)),
|
621
|
+
nn.Conv2d(c3, self.nc, 1),
|
622
|
+
)
|
623
|
+
for x in ch
|
624
|
+
)
|
625
|
+
self.one2one_cv3 = copy.deepcopy(self.cv3)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Ultralytics
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Transformer modules."""
|
3
3
|
|
4
4
|
import math
|
@@ -174,18 +174,20 @@ class MLPBlock(nn.Module):
|
|
174
174
|
class MLP(nn.Module):
|
175
175
|
"""Implements a simple multi-layer perceptron (also called FFN)."""
|
176
176
|
|
177
|
-
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
|
177
|
+
def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act=nn.ReLU, sigmoid=False):
|
178
178
|
"""Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
|
179
179
|
super().__init__()
|
180
180
|
self.num_layers = num_layers
|
181
181
|
h = [hidden_dim] * (num_layers - 1)
|
182
182
|
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
|
183
|
+
self.sigmoid = sigmoid
|
184
|
+
self.act = act()
|
183
185
|
|
184
186
|
def forward(self, x):
|
185
187
|
"""Forward pass for the entire MLP."""
|
186
188
|
for i, layer in enumerate(self.layers):
|
187
|
-
x =
|
188
|
-
return x
|
189
|
+
x = getattr(self, "act", nn.ReLU())(layer(x)) if i < self.num_layers - 1 else layer(x)
|
190
|
+
return x.sigmoid() if getattr(self, "sigmoid", False) else x
|
189
191
|
|
190
192
|
|
191
193
|
class LayerNorm2d(nn.Module):
|
@@ -350,7 +352,6 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
350
352
|
|
351
353
|
def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
|
352
354
|
"""Perform the forward pass through the entire decoder layer."""
|
353
|
-
|
354
355
|
# Self attention
|
355
356
|
q = k = self.with_pos_embed(embed, query_pos)
|
356
357
|
tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1), attn_mask=attn_mask)[
|
ultralytics/nn/modules/utils.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Ultralytics
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Module utils."""
|
3
3
|
|
4
4
|
import copy
|
@@ -50,7 +50,6 @@ def multi_scale_deformable_attn_pytorch(
|
|
50
50
|
|
51
51
|
https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
|
52
52
|
"""
|
53
|
-
|
54
53
|
bs, _, num_heads, embed_dims = value.shape
|
55
54
|
_, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
|
56
55
|
value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
|