ultralytics-opencv-headless 8.3.251__py3-none-any.whl → 8.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. tests/__init__.py +2 -2
  2. tests/conftest.py +1 -1
  3. tests/test_cuda.py +8 -2
  4. tests/test_engine.py +8 -8
  5. tests/test_exports.py +13 -4
  6. tests/test_integrations.py +9 -9
  7. tests/test_python.py +14 -14
  8. tests/test_solutions.py +3 -3
  9. ultralytics/__init__.py +1 -1
  10. ultralytics/cfg/__init__.py +6 -6
  11. ultralytics/cfg/default.yaml +3 -1
  12. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  13. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  14. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  15. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  16. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  17. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  18. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  19. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  20. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  21. ultralytics/data/augment.py +7 -0
  22. ultralytics/data/dataset.py +1 -1
  23. ultralytics/engine/exporter.py +11 -4
  24. ultralytics/engine/model.py +1 -1
  25. ultralytics/engine/trainer.py +40 -15
  26. ultralytics/engine/tuner.py +15 -7
  27. ultralytics/models/fastsam/predict.py +1 -1
  28. ultralytics/models/yolo/detect/train.py +3 -2
  29. ultralytics/models/yolo/detect/val.py +6 -0
  30. ultralytics/models/yolo/model.py +1 -1
  31. ultralytics/models/yolo/obb/predict.py +1 -1
  32. ultralytics/models/yolo/obb/train.py +1 -1
  33. ultralytics/models/yolo/pose/train.py +1 -1
  34. ultralytics/models/yolo/segment/predict.py +1 -1
  35. ultralytics/models/yolo/segment/train.py +1 -1
  36. ultralytics/models/yolo/segment/val.py +3 -1
  37. ultralytics/models/yolo/yoloe/train.py +6 -1
  38. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  39. ultralytics/nn/autobackend.py +11 -5
  40. ultralytics/nn/modules/__init__.py +8 -0
  41. ultralytics/nn/modules/block.py +128 -8
  42. ultralytics/nn/modules/head.py +789 -204
  43. ultralytics/nn/tasks.py +74 -29
  44. ultralytics/nn/text_model.py +5 -2
  45. ultralytics/optim/__init__.py +5 -0
  46. ultralytics/optim/muon.py +338 -0
  47. ultralytics/utils/callbacks/platform.py +30 -11
  48. ultralytics/utils/downloads.py +3 -1
  49. ultralytics/utils/export/engine.py +19 -10
  50. ultralytics/utils/export/imx.py +23 -12
  51. ultralytics/utils/export/tensorflow.py +21 -21
  52. ultralytics/utils/loss.py +587 -203
  53. ultralytics/utils/metrics.py +1 -0
  54. ultralytics/utils/ops.py +11 -2
  55. ultralytics/utils/tal.py +100 -20
  56. ultralytics/utils/torch_utils.py +1 -1
  57. ultralytics/utils/tqdm.py +4 -1
  58. {ultralytics_opencv_headless-8.3.251.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/METADATA +31 -39
  59. {ultralytics_opencv_headless-8.3.251.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/RECORD +63 -52
  60. {ultralytics_opencv_headless-8.3.251.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/WHEEL +0 -0
  61. {ultralytics_opencv_headless-8.3.251.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/entry_points.txt +0 -0
  62. {ultralytics_opencv_headless-8.3.251.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/licenses/LICENSE +0 -0
  63. {ultralytics_opencv_headless-8.3.251.dist-info → ultralytics_opencv_headless-8.4.1.dist-info}/top_level.txt +0 -0
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
147
147
  model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
148
148
  model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
149
149
  model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
150
- del model.pe
150
+
151
+ if getattr(model.model[-1], "one2one_cv3", None) is not None:
152
+ model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
153
+ model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
154
+ model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
155
+
151
156
  model.train()
152
157
 
153
158
  return model
@@ -104,7 +104,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
104
104
  model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
105
105
  model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
106
106
  model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
107
- del model.pe
107
+
108
+ if getattr(model.model[-1], "one2one_cv3", None) is not None:
109
+ model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
110
+ model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
111
+ model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
112
+
108
113
  model.train()
109
114
 
110
115
  return model
@@ -221,6 +221,7 @@ class AutoBackend(nn.Module):
221
221
  for p in model.parameters():
222
222
  p.requires_grad = False
223
223
  self.model = model # explicitly assign for to(), cpu(), cuda(), half()
224
+ end2end = getattr(model, "end2end", False)
224
225
 
225
226
  # TorchScript
226
227
  elif jit:
@@ -497,11 +498,11 @@ class AutoBackend(nn.Module):
497
498
  elif paddle:
498
499
  LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
499
500
  check_requirements(
500
- "paddlepaddle-gpu"
501
+ "paddlepaddle-gpu>=3.0.0,!=3.3.0" # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
501
502
  if torch.cuda.is_available()
502
503
  else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
503
504
  if ARM64
504
- else "paddlepaddle>=3.0.0"
505
+ else "paddlepaddle>=3.0.0,!=3.3.0" # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
505
506
  )
506
507
  import paddle.inference as pdi
507
508
 
@@ -545,11 +546,16 @@ class AutoBackend(nn.Module):
545
546
  # NCNN
546
547
  elif ncnn:
547
548
  LOGGER.info(f"Loading {w} for NCNN inference...")
548
- check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
549
+ check_requirements("ncnn", cmds="--no-deps")
549
550
  import ncnn as pyncnn
550
551
 
551
552
  net = pyncnn.Net()
552
- net.opt.use_vulkan_compute = cuda
553
+ if isinstance(cuda, torch.device):
554
+ net.opt.use_vulkan_compute = cuda
555
+ elif isinstance(device, str) and device.startswith("vulkan"):
556
+ net.opt.use_vulkan_compute = True
557
+ net.set_vulkan_device(int(device.split(":")[1]))
558
+ device = torch.device("cpu")
553
559
  w = Path(w)
554
560
  if not w.is_file(): # if not *.param
555
561
  w = next(w.glob("*.param")) # get *.param file from *_ncnn_model dir
@@ -651,7 +657,7 @@ class AutoBackend(nn.Module):
651
657
  names = metadata["names"]
652
658
  kpt_shape = metadata.get("kpt_shape")
653
659
  kpt_names = metadata.get("kpt_names")
654
- end2end = metadata.get("args", {}).get("nms", False)
660
+ end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
655
661
  dynamic = metadata.get("args", {}).get("dynamic", dynamic)
656
662
  ch = metadata.get("channels", 3)
657
663
  elif not (pt or triton or nn_module):
@@ -78,15 +78,19 @@ from .conv import (
78
78
  )
79
79
  from .head import (
80
80
  OBB,
81
+ OBB26,
81
82
  Classify,
82
83
  Detect,
83
84
  LRPCHead,
84
85
  Pose,
86
+ Pose26,
85
87
  RTDETRDecoder,
86
88
  Segment,
89
+ Segment26,
87
90
  WorldDetect,
88
91
  YOLOEDetect,
89
92
  YOLOESegment,
93
+ YOLOESegment26,
90
94
  v10Detect,
91
95
  )
92
96
  from .transformer import (
@@ -115,6 +119,7 @@ __all__ = (
115
119
  "ELAN1",
116
120
  "MLP",
117
121
  "OBB",
122
+ "OBB26",
118
123
  "PSA",
119
124
  "SPP",
120
125
  "SPPELAN",
@@ -161,6 +166,7 @@ __all__ = (
161
166
  "MSDeformAttn",
162
167
  "MaxSigmoidAttnBlock",
163
168
  "Pose",
169
+ "Pose26",
164
170
  "Proto",
165
171
  "RTDETRDecoder",
166
172
  "RepC3",
@@ -170,6 +176,7 @@ __all__ = (
170
176
  "ResNetLayer",
171
177
  "SCDown",
172
178
  "Segment",
179
+ "Segment26",
173
180
  "SpatialAttention",
174
181
  "TorchVision",
175
182
  "TransformerBlock",
@@ -178,5 +185,6 @@ __all__ = (
178
185
  "WorldDetect",
179
186
  "YOLOEDetect",
180
187
  "YOLOESegment",
188
+ "YOLOESegment26",
181
189
  "v10Detect",
182
190
  )
@@ -208,28 +208,33 @@ class SPP(nn.Module):
208
208
  class SPPF(nn.Module):
209
209
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
210
210
 
211
- def __init__(self, c1: int, c2: int, k: int = 5):
211
+ def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
212
212
  """Initialize the SPPF layer with given input/output channels and kernel size.
213
213
 
214
214
  Args:
215
215
  c1 (int): Input channels.
216
216
  c2 (int): Output channels.
217
217
  k (int): Kernel size.
218
+ n (int): Number of pooling iterations.
219
+ shortcut (bool): Whether to use shortcut connection.
218
220
 
219
221
  Notes:
220
222
  This module is equivalent to SPP(k=(5, 9, 13)).
221
223
  """
222
224
  super().__init__()
223
225
  c_ = c1 // 2 # hidden channels
224
- self.cv1 = Conv(c1, c_, 1, 1)
225
- self.cv2 = Conv(c_ * 4, c2, 1, 1)
226
+ self.cv1 = Conv(c1, c_, 1, 1, act=False)
227
+ self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
226
228
  self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
229
+ self.n = n
230
+ self.add = shortcut and c1 == c2
227
231
 
228
232
  def forward(self, x: torch.Tensor) -> torch.Tensor:
229
233
  """Apply sequential pooling operations to input and return concatenated feature maps."""
230
234
  y = [self.cv1(x)]
231
- y.extend(self.m(y[-1]) for _ in range(3))
232
- return self.cv2(torch.cat(y, 1))
235
+ y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
236
+ y = self.cv2(torch.cat(y, 1))
237
+ return y + x if getattr(self, "add", False) else y
233
238
 
234
239
 
235
240
  class C1(nn.Module):
@@ -1065,7 +1070,15 @@ class C3k2(C2f):
1065
1070
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1066
1071
 
1067
1072
  def __init__(
1068
- self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
1073
+ self,
1074
+ c1: int,
1075
+ c2: int,
1076
+ n: int = 1,
1077
+ c3k: bool = False,
1078
+ e: float = 0.5,
1079
+ attn: bool = False,
1080
+ g: int = 1,
1081
+ shortcut: bool = True,
1069
1082
  ):
1070
1083
  """Initialize C3k2 module.
1071
1084
 
@@ -1075,12 +1088,21 @@ class C3k2(C2f):
1075
1088
  n (int): Number of blocks.
1076
1089
  c3k (bool): Whether to use C3k blocks.
1077
1090
  e (float): Expansion ratio.
1091
+ attn (bool): Whether to use attention blocks.
1078
1092
  g (int): Groups for convolutions.
1079
1093
  shortcut (bool): Whether to use shortcut connections.
1080
1094
  """
1081
1095
  super().__init__(c1, c2, n, shortcut, g, e)
1082
1096
  self.m = nn.ModuleList(
1083
- C3k(self.c, self.c, 2, shortcut, g) if c3k else Bottleneck(self.c, self.c, shortcut, g) for _ in range(n)
1097
+ nn.Sequential(
1098
+ Bottleneck(self.c, self.c, shortcut, g),
1099
+ PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
1100
+ )
1101
+ if attn
1102
+ else C3k(self.c, self.c, 2, shortcut, g)
1103
+ if c3k
1104
+ else Bottleneck(self.c, self.c, shortcut, g)
1105
+ for _ in range(n)
1084
1106
  )
1085
1107
 
1086
1108
 
@@ -1148,6 +1170,8 @@ class RepVGGDW(torch.nn.Module):
1148
1170
 
1149
1171
  This method fuses the convolutional layers and updates the weights and biases accordingly.
1150
1172
  """
1173
+ if not hasattr(self, "conv1"):
1174
+ return # already fused
1151
1175
  conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
1152
1176
  conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
1153
1177
 
@@ -1391,7 +1415,7 @@ class PSA(nn.Module):
1391
1415
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
1392
1416
  self.cv2 = Conv(2 * self.c, c1, 1)
1393
1417
 
1394
- self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
1418
+ self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
1395
1419
  self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
1396
1420
 
1397
1421
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -1945,3 +1969,99 @@ class SAVPE(nn.Module):
1945
1969
  aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
1946
1970
 
1947
1971
  return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
1972
+
1973
+
1974
+ class Proto26(Proto):
1975
+ """Ultralytics YOLO26 models mask Proto module for segmentation models."""
1976
+
1977
+ def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
1978
+ """Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
1979
+
1980
+ Args:
1981
+ ch (tuple): Tuple of channel sizes from backbone feature maps.
1982
+ c_ (int): Intermediate channels.
1983
+ c2 (int): Output channels (number of protos).
1984
+ nc (int): Number of classes for semantic segmentation.
1985
+ """
1986
+ super().__init__(c_, c_, c2)
1987
+ self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
1988
+ self.feat_fuse = Conv(ch[0], c_, k=3)
1989
+ self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
1990
+
1991
+ def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
1992
+ """Perform a forward pass through layers using an upsampled input image."""
1993
+ feat = x[0]
1994
+ for i, f in enumerate(self.feat_refine):
1995
+ up_feat = f(x[i + 1])
1996
+ up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
1997
+ feat = feat + up_feat
1998
+ p = super().forward(self.feat_fuse(feat))
1999
+ if self.training and return_semseg:
2000
+ semseg = self.semseg(feat)
2001
+ return (p, semseg)
2002
+ return p
2003
+
2004
+ def fuse(self):
2005
+ """Fuse the model for inference by removing the semantic segmentation head."""
2006
+ self.semseg = None
2007
+
2008
+
2009
+ class RealNVP(nn.Module):
2010
+ """RealNVP: a flow-based generative model.
2011
+
2012
+ References:
2013
+ https://arxiv.org/abs/1605.08803
2014
+ https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
2015
+ """
2016
+
2017
+ @staticmethod
2018
+ def nets():
2019
+ """Get the scale model in a single invertable mapping."""
2020
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
2021
+
2022
+ @staticmethod
2023
+ def nett():
2024
+ """Get the translation model in a single invertable mapping."""
2025
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
2026
+
2027
+ @property
2028
+ def prior(self):
2029
+ """The prior distribution."""
2030
+ return torch.distributions.MultivariateNormal(self.loc, self.cov)
2031
+
2032
+ def __init__(self):
2033
+ super().__init__()
2034
+
2035
+ self.register_buffer("loc", torch.zeros(2))
2036
+ self.register_buffer("cov", torch.eye(2))
2037
+ self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
2038
+
2039
+ self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
2040
+ self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
2041
+ self.init_weights()
2042
+
2043
+ def init_weights(self):
2044
+ """Initialization model weights."""
2045
+ for m in self.modules():
2046
+ if isinstance(m, nn.Linear):
2047
+ nn.init.xavier_uniform_(m.weight, gain=0.01)
2048
+
2049
+ def backward_p(self, x):
2050
+ """Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
2051
+ matrix.
2052
+ """
2053
+ log_det_jacob, z = x.new_zeros(x.shape[0]), x
2054
+ for i in reversed(range(len(self.t))):
2055
+ z_ = self.mask[i] * z
2056
+ s = self.s[i](z_) * (1 - self.mask[i])
2057
+ t = self.t[i](z_) * (1 - self.mask[i])
2058
+ z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
2059
+ log_det_jacob -= s.sum(dim=1)
2060
+ return z, log_det_jacob
2061
+
2062
+ def log_prob(self, x):
2063
+ """Calculate the log probability of given sample in data space."""
2064
+ if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
2065
+ self.float()
2066
+ z, log_det = self.backward_p(x)
2067
+ return self.prior.log_prob(z) + log_det