ultralytics-opencv-headless 8.3.253__py3-none-any.whl → 8.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. tests/__init__.py +2 -2
  2. tests/conftest.py +1 -1
  3. tests/test_cuda.py +8 -2
  4. tests/test_engine.py +8 -8
  5. tests/test_exports.py +13 -4
  6. tests/test_integrations.py +9 -9
  7. tests/test_python.py +14 -14
  8. tests/test_solutions.py +3 -3
  9. ultralytics/__init__.py +1 -1
  10. ultralytics/cfg/__init__.py +6 -6
  11. ultralytics/cfg/default.yaml +3 -1
  12. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  13. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  14. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  15. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  16. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  17. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  18. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  19. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  20. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  21. ultralytics/data/augment.py +7 -0
  22. ultralytics/data/dataset.py +1 -1
  23. ultralytics/engine/exporter.py +9 -4
  24. ultralytics/engine/model.py +1 -1
  25. ultralytics/engine/trainer.py +40 -15
  26. ultralytics/engine/tuner.py +15 -7
  27. ultralytics/models/fastsam/predict.py +1 -1
  28. ultralytics/models/yolo/detect/train.py +3 -2
  29. ultralytics/models/yolo/detect/val.py +6 -0
  30. ultralytics/models/yolo/model.py +1 -1
  31. ultralytics/models/yolo/obb/predict.py +1 -1
  32. ultralytics/models/yolo/obb/train.py +1 -1
  33. ultralytics/models/yolo/pose/train.py +1 -1
  34. ultralytics/models/yolo/segment/predict.py +1 -1
  35. ultralytics/models/yolo/segment/train.py +1 -1
  36. ultralytics/models/yolo/segment/val.py +3 -1
  37. ultralytics/models/yolo/yoloe/train.py +6 -1
  38. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  39. ultralytics/nn/autobackend.py +3 -3
  40. ultralytics/nn/modules/__init__.py +8 -0
  41. ultralytics/nn/modules/block.py +128 -8
  42. ultralytics/nn/modules/head.py +789 -204
  43. ultralytics/nn/tasks.py +74 -29
  44. ultralytics/nn/text_model.py +5 -2
  45. ultralytics/optim/__init__.py +5 -0
  46. ultralytics/optim/muon.py +338 -0
  47. ultralytics/utils/callbacks/platform.py +9 -7
  48. ultralytics/utils/downloads.py +3 -1
  49. ultralytics/utils/export/engine.py +19 -10
  50. ultralytics/utils/export/imx.py +22 -11
  51. ultralytics/utils/export/tensorflow.py +21 -21
  52. ultralytics/utils/loss.py +587 -203
  53. ultralytics/utils/metrics.py +1 -0
  54. ultralytics/utils/ops.py +11 -2
  55. ultralytics/utils/tal.py +98 -19
  56. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/METADATA +31 -39
  57. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/RECORD +61 -50
  58. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/WHEEL +0 -0
  59. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/entry_points.txt +0 -0
  60. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/licenses/LICENSE +0 -0
  61. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/top_level.txt +0 -0
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
147
147
  model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
148
148
  model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
149
149
  model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
150
- del model.pe
150
+
151
+ if getattr(model.model[-1], "one2one_cv3", None) is not None:
152
+ model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
153
+ model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
154
+ model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
155
+
151
156
  model.train()
152
157
 
153
158
  return model
@@ -104,7 +104,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
104
104
  model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
105
105
  model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
106
106
  model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
107
- del model.pe
107
+
108
+ if getattr(model.model[-1], "one2one_cv3", None) is not None:
109
+ model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
110
+ model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
111
+ model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
112
+
108
113
  model.train()
109
114
 
110
115
  return model
@@ -221,6 +221,7 @@ class AutoBackend(nn.Module):
221
221
  for p in model.parameters():
222
222
  p.requires_grad = False
223
223
  self.model = model # explicitly assign for to(), cpu(), cuda(), half()
224
+ end2end = getattr(model, "end2end", False)
224
225
 
225
226
  # TorchScript
226
227
  elif jit:
@@ -545,8 +546,7 @@ class AutoBackend(nn.Module):
545
546
  # NCNN
546
547
  elif ncnn:
547
548
  LOGGER.info(f"Loading {w} for NCNN inference...")
548
- # use git source for ARM64 due to broken PyPI packages https://github.com/Tencent/ncnn/issues/6509
549
- check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
549
+ check_requirements("ncnn", cmds="--no-deps")
550
550
  import ncnn as pyncnn
551
551
 
552
552
  net = pyncnn.Net()
@@ -657,7 +657,7 @@ class AutoBackend(nn.Module):
657
657
  names = metadata["names"]
658
658
  kpt_shape = metadata.get("kpt_shape")
659
659
  kpt_names = metadata.get("kpt_names")
660
- end2end = metadata.get("args", {}).get("nms", False)
660
+ end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
661
661
  dynamic = metadata.get("args", {}).get("dynamic", dynamic)
662
662
  ch = metadata.get("channels", 3)
663
663
  elif not (pt or triton or nn_module):
@@ -78,15 +78,19 @@ from .conv import (
78
78
  )
79
79
  from .head import (
80
80
  OBB,
81
+ OBB26,
81
82
  Classify,
82
83
  Detect,
83
84
  LRPCHead,
84
85
  Pose,
86
+ Pose26,
85
87
  RTDETRDecoder,
86
88
  Segment,
89
+ Segment26,
87
90
  WorldDetect,
88
91
  YOLOEDetect,
89
92
  YOLOESegment,
93
+ YOLOESegment26,
90
94
  v10Detect,
91
95
  )
92
96
  from .transformer import (
@@ -115,6 +119,7 @@ __all__ = (
115
119
  "ELAN1",
116
120
  "MLP",
117
121
  "OBB",
122
+ "OBB26",
118
123
  "PSA",
119
124
  "SPP",
120
125
  "SPPELAN",
@@ -161,6 +166,7 @@ __all__ = (
161
166
  "MSDeformAttn",
162
167
  "MaxSigmoidAttnBlock",
163
168
  "Pose",
169
+ "Pose26",
164
170
  "Proto",
165
171
  "RTDETRDecoder",
166
172
  "RepC3",
@@ -170,6 +176,7 @@ __all__ = (
170
176
  "ResNetLayer",
171
177
  "SCDown",
172
178
  "Segment",
179
+ "Segment26",
173
180
  "SpatialAttention",
174
181
  "TorchVision",
175
182
  "TransformerBlock",
@@ -178,5 +185,6 @@ __all__ = (
178
185
  "WorldDetect",
179
186
  "YOLOEDetect",
180
187
  "YOLOESegment",
188
+ "YOLOESegment26",
181
189
  "v10Detect",
182
190
  )
@@ -208,28 +208,33 @@ class SPP(nn.Module):
208
208
  class SPPF(nn.Module):
209
209
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
210
210
 
211
- def __init__(self, c1: int, c2: int, k: int = 5):
211
+ def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
212
212
  """Initialize the SPPF layer with given input/output channels and kernel size.
213
213
 
214
214
  Args:
215
215
  c1 (int): Input channels.
216
216
  c2 (int): Output channels.
217
217
  k (int): Kernel size.
218
+ n (int): Number of pooling iterations.
219
+ shortcut (bool): Whether to use shortcut connection.
218
220
 
219
221
  Notes:
220
222
  This module is equivalent to SPP(k=(5, 9, 13)).
221
223
  """
222
224
  super().__init__()
223
225
  c_ = c1 // 2 # hidden channels
224
- self.cv1 = Conv(c1, c_, 1, 1)
225
- self.cv2 = Conv(c_ * 4, c2, 1, 1)
226
+ self.cv1 = Conv(c1, c_, 1, 1, act=False)
227
+ self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
226
228
  self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
229
+ self.n = n
230
+ self.add = shortcut and c1 == c2
227
231
 
228
232
  def forward(self, x: torch.Tensor) -> torch.Tensor:
229
233
  """Apply sequential pooling operations to input and return concatenated feature maps."""
230
234
  y = [self.cv1(x)]
231
- y.extend(self.m(y[-1]) for _ in range(3))
232
- return self.cv2(torch.cat(y, 1))
235
+ y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
236
+ y = self.cv2(torch.cat(y, 1))
237
+ return y + x if getattr(self, "add", False) else y
233
238
 
234
239
 
235
240
  class C1(nn.Module):
@@ -1065,7 +1070,15 @@ class C3k2(C2f):
1065
1070
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1066
1071
 
1067
1072
  def __init__(
1068
- self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
1073
+ self,
1074
+ c1: int,
1075
+ c2: int,
1076
+ n: int = 1,
1077
+ c3k: bool = False,
1078
+ e: float = 0.5,
1079
+ attn: bool = False,
1080
+ g: int = 1,
1081
+ shortcut: bool = True,
1069
1082
  ):
1070
1083
  """Initialize C3k2 module.
1071
1084
 
@@ -1075,12 +1088,21 @@ class C3k2(C2f):
1075
1088
  n (int): Number of blocks.
1076
1089
  c3k (bool): Whether to use C3k blocks.
1077
1090
  e (float): Expansion ratio.
1091
+ attn (bool): Whether to use attention blocks.
1078
1092
  g (int): Groups for convolutions.
1079
1093
  shortcut (bool): Whether to use shortcut connections.
1080
1094
  """
1081
1095
  super().__init__(c1, c2, n, shortcut, g, e)
1082
1096
  self.m = nn.ModuleList(
1083
- C3k(self.c, self.c, 2, shortcut, g) if c3k else Bottleneck(self.c, self.c, shortcut, g) for _ in range(n)
1097
+ nn.Sequential(
1098
+ Bottleneck(self.c, self.c, shortcut, g),
1099
+ PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
1100
+ )
1101
+ if attn
1102
+ else C3k(self.c, self.c, 2, shortcut, g)
1103
+ if c3k
1104
+ else Bottleneck(self.c, self.c, shortcut, g)
1105
+ for _ in range(n)
1084
1106
  )
1085
1107
 
1086
1108
 
@@ -1148,6 +1170,8 @@ class RepVGGDW(torch.nn.Module):
1148
1170
 
1149
1171
  This method fuses the convolutional layers and updates the weights and biases accordingly.
1150
1172
  """
1173
+ if not hasattr(self, "conv1"):
1174
+ return # already fused
1151
1175
  conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
1152
1176
  conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
1153
1177
 
@@ -1391,7 +1415,7 @@ class PSA(nn.Module):
1391
1415
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
1392
1416
  self.cv2 = Conv(2 * self.c, c1, 1)
1393
1417
 
1394
- self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
1418
+ self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
1395
1419
  self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
1396
1420
 
1397
1421
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -1945,3 +1969,99 @@ class SAVPE(nn.Module):
1945
1969
  aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
1946
1970
 
1947
1971
  return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
1972
+
1973
+
1974
+ class Proto26(Proto):
1975
+ """Ultralytics YOLO26 models mask Proto module for segmentation models."""
1976
+
1977
+ def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
1978
+ """Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
1979
+
1980
+ Args:
1981
+ ch (tuple): Tuple of channel sizes from backbone feature maps.
1982
+ c_ (int): Intermediate channels.
1983
+ c2 (int): Output channels (number of protos).
1984
+ nc (int): Number of classes for semantic segmentation.
1985
+ """
1986
+ super().__init__(c_, c_, c2)
1987
+ self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
1988
+ self.feat_fuse = Conv(ch[0], c_, k=3)
1989
+ self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
1990
+
1991
+ def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
1992
+ """Perform a forward pass through layers using an upsampled input image."""
1993
+ feat = x[0]
1994
+ for i, f in enumerate(self.feat_refine):
1995
+ up_feat = f(x[i + 1])
1996
+ up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
1997
+ feat = feat + up_feat
1998
+ p = super().forward(self.feat_fuse(feat))
1999
+ if self.training and return_semseg:
2000
+ semseg = self.semseg(feat)
2001
+ return (p, semseg)
2002
+ return p
2003
+
2004
+ def fuse(self):
2005
+ """Fuse the model for inference by removing the semantic segmentation head."""
2006
+ self.semseg = None
2007
+
2008
+
2009
+ class RealNVP(nn.Module):
2010
+ """RealNVP: a flow-based generative model.
2011
+
2012
+ References:
2013
+ https://arxiv.org/abs/1605.08803
2014
+ https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
2015
+ """
2016
+
2017
+ @staticmethod
2018
+ def nets():
2019
+ """Get the scale model in a single invertable mapping."""
2020
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
2021
+
2022
+ @staticmethod
2023
+ def nett():
2024
+ """Get the translation model in a single invertable mapping."""
2025
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
2026
+
2027
+ @property
2028
+ def prior(self):
2029
+ """The prior distribution."""
2030
+ return torch.distributions.MultivariateNormal(self.loc, self.cov)
2031
+
2032
+ def __init__(self):
2033
+ super().__init__()
2034
+
2035
+ self.register_buffer("loc", torch.zeros(2))
2036
+ self.register_buffer("cov", torch.eye(2))
2037
+ self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
2038
+
2039
+ self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
2040
+ self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
2041
+ self.init_weights()
2042
+
2043
+ def init_weights(self):
2044
+ """Initialization model weights."""
2045
+ for m in self.modules():
2046
+ if isinstance(m, nn.Linear):
2047
+ nn.init.xavier_uniform_(m.weight, gain=0.01)
2048
+
2049
+ def backward_p(self, x):
2050
+ """Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
2051
+ matrix.
2052
+ """
2053
+ log_det_jacob, z = x.new_zeros(x.shape[0]), x
2054
+ for i in reversed(range(len(self.t))):
2055
+ z_ = self.mask[i] * z
2056
+ s = self.s[i](z_) * (1 - self.mask[i])
2057
+ t = self.t[i](z_) * (1 - self.mask[i])
2058
+ z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
2059
+ log_det_jacob -= s.sum(dim=1)
2060
+ return z, log_det_jacob
2061
+
2062
+ def log_prob(self, x):
2063
+ """Calculate the log probability of given sample in data space."""
2064
+ if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
2065
+ self.float()
2066
+ z, log_det = self.backward_p(x)
2067
+ return self.prior.log_prob(z) + log_det