ultralytics-opencv-headless 8.3.253__py3-none-any.whl → 8.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +2 -2
- tests/conftest.py +1 -1
- tests/test_cuda.py +8 -2
- tests/test_engine.py +8 -8
- tests/test_exports.py +13 -4
- tests/test_integrations.py +9 -9
- tests/test_python.py +14 -14
- tests/test_solutions.py +3 -3
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +6 -6
- ultralytics/cfg/default.yaml +3 -1
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/data/augment.py +7 -0
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +9 -4
- ultralytics/engine/model.py +1 -1
- ultralytics/engine/trainer.py +40 -15
- ultralytics/engine/tuner.py +15 -7
- ultralytics/models/fastsam/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +3 -2
- ultralytics/models/yolo/detect/val.py +6 -0
- ultralytics/models/yolo/model.py +1 -1
- ultralytics/models/yolo/obb/predict.py +1 -1
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/pose/train.py +1 -1
- ultralytics/models/yolo/segment/predict.py +1 -1
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +3 -1
- ultralytics/models/yolo/yoloe/train.py +6 -1
- ultralytics/models/yolo/yoloe/train_seg.py +6 -1
- ultralytics/nn/autobackend.py +3 -3
- ultralytics/nn/modules/__init__.py +8 -0
- ultralytics/nn/modules/block.py +128 -8
- ultralytics/nn/modules/head.py +789 -204
- ultralytics/nn/tasks.py +74 -29
- ultralytics/nn/text_model.py +5 -2
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/utils/callbacks/platform.py +9 -7
- ultralytics/utils/downloads.py +3 -1
- ultralytics/utils/export/engine.py +19 -10
- ultralytics/utils/export/imx.py +22 -11
- ultralytics/utils/export/tensorflow.py +21 -21
- ultralytics/utils/loss.py +587 -203
- ultralytics/utils/metrics.py +1 -0
- ultralytics/utils/ops.py +11 -2
- ultralytics/utils/tal.py +98 -19
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/METADATA +31 -39
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/RECORD +61 -50
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/WHEEL +0 -0
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/entry_points.txt +0 -0
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/licenses/LICENSE +0 -0
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/top_level.txt +0 -0
|
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
|
|
|
147
147
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
148
148
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
149
149
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
150
|
-
|
|
150
|
+
|
|
151
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
152
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
153
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
154
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
155
|
+
|
|
151
156
|
model.train()
|
|
152
157
|
|
|
153
158
|
return model
|
|
@@ -104,7 +104,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
|
|
|
104
104
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
105
105
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
106
106
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
107
|
-
|
|
107
|
+
|
|
108
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
109
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
110
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
111
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
112
|
+
|
|
108
113
|
model.train()
|
|
109
114
|
|
|
110
115
|
return model
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -221,6 +221,7 @@ class AutoBackend(nn.Module):
|
|
|
221
221
|
for p in model.parameters():
|
|
222
222
|
p.requires_grad = False
|
|
223
223
|
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
|
224
|
+
end2end = getattr(model, "end2end", False)
|
|
224
225
|
|
|
225
226
|
# TorchScript
|
|
226
227
|
elif jit:
|
|
@@ -545,8 +546,7 @@ class AutoBackend(nn.Module):
|
|
|
545
546
|
# NCNN
|
|
546
547
|
elif ncnn:
|
|
547
548
|
LOGGER.info(f"Loading {w} for NCNN inference...")
|
|
548
|
-
|
|
549
|
-
check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
|
|
549
|
+
check_requirements("ncnn", cmds="--no-deps")
|
|
550
550
|
import ncnn as pyncnn
|
|
551
551
|
|
|
552
552
|
net = pyncnn.Net()
|
|
@@ -657,7 +657,7 @@ class AutoBackend(nn.Module):
|
|
|
657
657
|
names = metadata["names"]
|
|
658
658
|
kpt_shape = metadata.get("kpt_shape")
|
|
659
659
|
kpt_names = metadata.get("kpt_names")
|
|
660
|
-
end2end = metadata.get("args", {}).get("nms", False)
|
|
660
|
+
end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
|
|
661
661
|
dynamic = metadata.get("args", {}).get("dynamic", dynamic)
|
|
662
662
|
ch = metadata.get("channels", 3)
|
|
663
663
|
elif not (pt or triton or nn_module):
|
|
@@ -78,15 +78,19 @@ from .conv import (
|
|
|
78
78
|
)
|
|
79
79
|
from .head import (
|
|
80
80
|
OBB,
|
|
81
|
+
OBB26,
|
|
81
82
|
Classify,
|
|
82
83
|
Detect,
|
|
83
84
|
LRPCHead,
|
|
84
85
|
Pose,
|
|
86
|
+
Pose26,
|
|
85
87
|
RTDETRDecoder,
|
|
86
88
|
Segment,
|
|
89
|
+
Segment26,
|
|
87
90
|
WorldDetect,
|
|
88
91
|
YOLOEDetect,
|
|
89
92
|
YOLOESegment,
|
|
93
|
+
YOLOESegment26,
|
|
90
94
|
v10Detect,
|
|
91
95
|
)
|
|
92
96
|
from .transformer import (
|
|
@@ -115,6 +119,7 @@ __all__ = (
|
|
|
115
119
|
"ELAN1",
|
|
116
120
|
"MLP",
|
|
117
121
|
"OBB",
|
|
122
|
+
"OBB26",
|
|
118
123
|
"PSA",
|
|
119
124
|
"SPP",
|
|
120
125
|
"SPPELAN",
|
|
@@ -161,6 +166,7 @@ __all__ = (
|
|
|
161
166
|
"MSDeformAttn",
|
|
162
167
|
"MaxSigmoidAttnBlock",
|
|
163
168
|
"Pose",
|
|
169
|
+
"Pose26",
|
|
164
170
|
"Proto",
|
|
165
171
|
"RTDETRDecoder",
|
|
166
172
|
"RepC3",
|
|
@@ -170,6 +176,7 @@ __all__ = (
|
|
|
170
176
|
"ResNetLayer",
|
|
171
177
|
"SCDown",
|
|
172
178
|
"Segment",
|
|
179
|
+
"Segment26",
|
|
173
180
|
"SpatialAttention",
|
|
174
181
|
"TorchVision",
|
|
175
182
|
"TransformerBlock",
|
|
@@ -178,5 +185,6 @@ __all__ = (
|
|
|
178
185
|
"WorldDetect",
|
|
179
186
|
"YOLOEDetect",
|
|
180
187
|
"YOLOESegment",
|
|
188
|
+
"YOLOESegment26",
|
|
181
189
|
"v10Detect",
|
|
182
190
|
)
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -208,28 +208,33 @@ class SPP(nn.Module):
|
|
|
208
208
|
class SPPF(nn.Module):
|
|
209
209
|
"""Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
|
|
210
210
|
|
|
211
|
-
def __init__(self, c1: int, c2: int, k: int = 5):
|
|
211
|
+
def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
|
|
212
212
|
"""Initialize the SPPF layer with given input/output channels and kernel size.
|
|
213
213
|
|
|
214
214
|
Args:
|
|
215
215
|
c1 (int): Input channels.
|
|
216
216
|
c2 (int): Output channels.
|
|
217
217
|
k (int): Kernel size.
|
|
218
|
+
n (int): Number of pooling iterations.
|
|
219
|
+
shortcut (bool): Whether to use shortcut connection.
|
|
218
220
|
|
|
219
221
|
Notes:
|
|
220
222
|
This module is equivalent to SPP(k=(5, 9, 13)).
|
|
221
223
|
"""
|
|
222
224
|
super().__init__()
|
|
223
225
|
c_ = c1 // 2 # hidden channels
|
|
224
|
-
self.cv1 = Conv(c1, c_, 1, 1)
|
|
225
|
-
self.cv2 = Conv(c_ *
|
|
226
|
+
self.cv1 = Conv(c1, c_, 1, 1, act=False)
|
|
227
|
+
self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
|
|
226
228
|
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
|
229
|
+
self.n = n
|
|
230
|
+
self.add = shortcut and c1 == c2
|
|
227
231
|
|
|
228
232
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
229
233
|
"""Apply sequential pooling operations to input and return concatenated feature maps."""
|
|
230
234
|
y = [self.cv1(x)]
|
|
231
|
-
y.extend(self.m(y[-1]) for _ in range(3))
|
|
232
|
-
|
|
235
|
+
y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
|
|
236
|
+
y = self.cv2(torch.cat(y, 1))
|
|
237
|
+
return y + x if getattr(self, "add", False) else y
|
|
233
238
|
|
|
234
239
|
|
|
235
240
|
class C1(nn.Module):
|
|
@@ -1065,7 +1070,15 @@ class C3k2(C2f):
|
|
|
1065
1070
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
1066
1071
|
|
|
1067
1072
|
def __init__(
|
|
1068
|
-
self,
|
|
1073
|
+
self,
|
|
1074
|
+
c1: int,
|
|
1075
|
+
c2: int,
|
|
1076
|
+
n: int = 1,
|
|
1077
|
+
c3k: bool = False,
|
|
1078
|
+
e: float = 0.5,
|
|
1079
|
+
attn: bool = False,
|
|
1080
|
+
g: int = 1,
|
|
1081
|
+
shortcut: bool = True,
|
|
1069
1082
|
):
|
|
1070
1083
|
"""Initialize C3k2 module.
|
|
1071
1084
|
|
|
@@ -1075,12 +1088,21 @@ class C3k2(C2f):
|
|
|
1075
1088
|
n (int): Number of blocks.
|
|
1076
1089
|
c3k (bool): Whether to use C3k blocks.
|
|
1077
1090
|
e (float): Expansion ratio.
|
|
1091
|
+
attn (bool): Whether to use attention blocks.
|
|
1078
1092
|
g (int): Groups for convolutions.
|
|
1079
1093
|
shortcut (bool): Whether to use shortcut connections.
|
|
1080
1094
|
"""
|
|
1081
1095
|
super().__init__(c1, c2, n, shortcut, g, e)
|
|
1082
1096
|
self.m = nn.ModuleList(
|
|
1083
|
-
|
|
1097
|
+
nn.Sequential(
|
|
1098
|
+
Bottleneck(self.c, self.c, shortcut, g),
|
|
1099
|
+
PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
|
|
1100
|
+
)
|
|
1101
|
+
if attn
|
|
1102
|
+
else C3k(self.c, self.c, 2, shortcut, g)
|
|
1103
|
+
if c3k
|
|
1104
|
+
else Bottleneck(self.c, self.c, shortcut, g)
|
|
1105
|
+
for _ in range(n)
|
|
1084
1106
|
)
|
|
1085
1107
|
|
|
1086
1108
|
|
|
@@ -1148,6 +1170,8 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1148
1170
|
|
|
1149
1171
|
This method fuses the convolutional layers and updates the weights and biases accordingly.
|
|
1150
1172
|
"""
|
|
1173
|
+
if not hasattr(self, "conv1"):
|
|
1174
|
+
return # already fused
|
|
1151
1175
|
conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
|
|
1152
1176
|
conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
|
|
1153
1177
|
|
|
@@ -1391,7 +1415,7 @@ class PSA(nn.Module):
|
|
|
1391
1415
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
1392
1416
|
self.cv2 = Conv(2 * self.c, c1, 1)
|
|
1393
1417
|
|
|
1394
|
-
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
|
|
1418
|
+
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
|
|
1395
1419
|
self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
|
|
1396
1420
|
|
|
1397
1421
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
@@ -1945,3 +1969,99 @@ class SAVPE(nn.Module):
|
|
|
1945
1969
|
aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
|
|
1946
1970
|
|
|
1947
1971
|
return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
|
|
1972
|
+
|
|
1973
|
+
|
|
1974
|
+
class Proto26(Proto):
|
|
1975
|
+
"""Ultralytics YOLO26 models mask Proto module for segmentation models."""
|
|
1976
|
+
|
|
1977
|
+
def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
|
|
1978
|
+
"""Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
|
|
1979
|
+
|
|
1980
|
+
Args:
|
|
1981
|
+
ch (tuple): Tuple of channel sizes from backbone feature maps.
|
|
1982
|
+
c_ (int): Intermediate channels.
|
|
1983
|
+
c2 (int): Output channels (number of protos).
|
|
1984
|
+
nc (int): Number of classes for semantic segmentation.
|
|
1985
|
+
"""
|
|
1986
|
+
super().__init__(c_, c_, c2)
|
|
1987
|
+
self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
|
|
1988
|
+
self.feat_fuse = Conv(ch[0], c_, k=3)
|
|
1989
|
+
self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
|
|
1990
|
+
|
|
1991
|
+
def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
|
|
1992
|
+
"""Perform a forward pass through layers using an upsampled input image."""
|
|
1993
|
+
feat = x[0]
|
|
1994
|
+
for i, f in enumerate(self.feat_refine):
|
|
1995
|
+
up_feat = f(x[i + 1])
|
|
1996
|
+
up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
|
|
1997
|
+
feat = feat + up_feat
|
|
1998
|
+
p = super().forward(self.feat_fuse(feat))
|
|
1999
|
+
if self.training and return_semseg:
|
|
2000
|
+
semseg = self.semseg(feat)
|
|
2001
|
+
return (p, semseg)
|
|
2002
|
+
return p
|
|
2003
|
+
|
|
2004
|
+
def fuse(self):
|
|
2005
|
+
"""Fuse the model for inference by removing the semantic segmentation head."""
|
|
2006
|
+
self.semseg = None
|
|
2007
|
+
|
|
2008
|
+
|
|
2009
|
+
class RealNVP(nn.Module):
|
|
2010
|
+
"""RealNVP: a flow-based generative model.
|
|
2011
|
+
|
|
2012
|
+
References:
|
|
2013
|
+
https://arxiv.org/abs/1605.08803
|
|
2014
|
+
https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
|
|
2015
|
+
"""
|
|
2016
|
+
|
|
2017
|
+
@staticmethod
|
|
2018
|
+
def nets():
|
|
2019
|
+
"""Get the scale model in a single invertable mapping."""
|
|
2020
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
|
|
2021
|
+
|
|
2022
|
+
@staticmethod
|
|
2023
|
+
def nett():
|
|
2024
|
+
"""Get the translation model in a single invertable mapping."""
|
|
2025
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
|
|
2026
|
+
|
|
2027
|
+
@property
|
|
2028
|
+
def prior(self):
|
|
2029
|
+
"""The prior distribution."""
|
|
2030
|
+
return torch.distributions.MultivariateNormal(self.loc, self.cov)
|
|
2031
|
+
|
|
2032
|
+
def __init__(self):
|
|
2033
|
+
super().__init__()
|
|
2034
|
+
|
|
2035
|
+
self.register_buffer("loc", torch.zeros(2))
|
|
2036
|
+
self.register_buffer("cov", torch.eye(2))
|
|
2037
|
+
self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
|
|
2038
|
+
|
|
2039
|
+
self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
|
|
2040
|
+
self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
|
|
2041
|
+
self.init_weights()
|
|
2042
|
+
|
|
2043
|
+
def init_weights(self):
|
|
2044
|
+
"""Initialization model weights."""
|
|
2045
|
+
for m in self.modules():
|
|
2046
|
+
if isinstance(m, nn.Linear):
|
|
2047
|
+
nn.init.xavier_uniform_(m.weight, gain=0.01)
|
|
2048
|
+
|
|
2049
|
+
def backward_p(self, x):
|
|
2050
|
+
"""Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
|
|
2051
|
+
matrix.
|
|
2052
|
+
"""
|
|
2053
|
+
log_det_jacob, z = x.new_zeros(x.shape[0]), x
|
|
2054
|
+
for i in reversed(range(len(self.t))):
|
|
2055
|
+
z_ = self.mask[i] * z
|
|
2056
|
+
s = self.s[i](z_) * (1 - self.mask[i])
|
|
2057
|
+
t = self.t[i](z_) * (1 - self.mask[i])
|
|
2058
|
+
z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
|
|
2059
|
+
log_det_jacob -= s.sum(dim=1)
|
|
2060
|
+
return z, log_det_jacob
|
|
2061
|
+
|
|
2062
|
+
def log_prob(self, x):
|
|
2063
|
+
"""Calculate the log probability of given sample in data space."""
|
|
2064
|
+
if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
|
|
2065
|
+
self.float()
|
|
2066
|
+
z, log_det = self.backward_p(x)
|
|
2067
|
+
return self.prior.log_prob(z) + log_det
|