ultralytics 8.1.29__py3-none-any.whl → 8.3.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. tests/__init__.py +22 -0
  2. tests/conftest.py +83 -0
  3. tests/test_cli.py +122 -0
  4. tests/test_cuda.py +155 -0
  5. tests/test_engine.py +131 -0
  6. tests/test_exports.py +216 -0
  7. tests/test_integrations.py +150 -0
  8. tests/test_python.py +615 -0
  9. tests/test_solutions.py +94 -0
  10. ultralytics/__init__.py +11 -8
  11. ultralytics/cfg/__init__.py +569 -131
  12. ultralytics/cfg/datasets/Argoverse.yaml +2 -1
  13. ultralytics/cfg/datasets/DOTAv1.5.yaml +3 -2
  14. ultralytics/cfg/datasets/DOTAv1.yaml +3 -2
  15. ultralytics/cfg/datasets/GlobalWheat2020.yaml +3 -2
  16. ultralytics/cfg/datasets/ImageNet.yaml +2 -1
  17. ultralytics/cfg/datasets/Objects365.yaml +5 -4
  18. ultralytics/cfg/datasets/SKU-110K.yaml +2 -1
  19. ultralytics/cfg/datasets/VOC.yaml +3 -2
  20. ultralytics/cfg/datasets/VisDrone.yaml +6 -5
  21. ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
  22. ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
  23. ultralytics/cfg/datasets/carparts-seg.yaml +3 -2
  24. ultralytics/cfg/datasets/coco-pose.yaml +7 -6
  25. ultralytics/cfg/datasets/coco.yaml +3 -2
  26. ultralytics/cfg/datasets/coco128-seg.yaml +4 -3
  27. ultralytics/cfg/datasets/coco128.yaml +4 -3
  28. ultralytics/cfg/datasets/coco8-pose.yaml +3 -2
  29. ultralytics/cfg/datasets/coco8-seg.yaml +3 -2
  30. ultralytics/cfg/datasets/coco8.yaml +3 -2
  31. ultralytics/cfg/datasets/crack-seg.yaml +3 -2
  32. ultralytics/cfg/datasets/dog-pose.yaml +24 -0
  33. ultralytics/cfg/datasets/dota8.yaml +3 -2
  34. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
  35. ultralytics/cfg/datasets/lvis.yaml +1236 -0
  36. ultralytics/cfg/datasets/medical-pills.yaml +22 -0
  37. ultralytics/cfg/datasets/open-images-v7.yaml +2 -1
  38. ultralytics/cfg/datasets/package-seg.yaml +5 -4
  39. ultralytics/cfg/datasets/signature.yaml +21 -0
  40. ultralytics/cfg/datasets/tiger-pose.yaml +3 -2
  41. ultralytics/cfg/datasets/xView.yaml +2 -1
  42. ultralytics/cfg/default.yaml +14 -11
  43. ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +24 -0
  44. ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
  45. ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
  46. ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
  47. ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
  48. ultralytics/cfg/models/11/yolo11.yaml +50 -0
  49. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +5 -2
  50. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +5 -2
  51. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +5 -2
  52. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +5 -2
  53. ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
  54. ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
  55. ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
  56. ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
  57. ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
  58. ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
  59. ultralytics/cfg/models/v3/yolov3-spp.yaml +5 -2
  60. ultralytics/cfg/models/v3/yolov3-tiny.yaml +5 -2
  61. ultralytics/cfg/models/v3/yolov3.yaml +5 -2
  62. ultralytics/cfg/models/v5/yolov5-p6.yaml +5 -2
  63. ultralytics/cfg/models/v5/yolov5.yaml +5 -2
  64. ultralytics/cfg/models/v6/yolov6.yaml +5 -2
  65. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +5 -2
  66. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +5 -2
  67. ultralytics/cfg/models/v8/yolov8-cls.yaml +5 -2
  68. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +6 -2
  69. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +6 -2
  70. ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -2
  71. ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -2
  72. ultralytics/cfg/models/v8/yolov8-p2.yaml +5 -2
  73. ultralytics/cfg/models/v8/yolov8-p6.yaml +10 -7
  74. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +5 -2
  75. ultralytics/cfg/models/v8/yolov8-pose.yaml +5 -2
  76. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -2
  77. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +5 -2
  78. ultralytics/cfg/models/v8/yolov8-seg.yaml +5 -2
  79. ultralytics/cfg/models/v8/yolov8-world.yaml +5 -2
  80. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -2
  81. ultralytics/cfg/models/v8/yolov8.yaml +5 -2
  82. ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
  83. ultralytics/cfg/models/v9/yolov9c.yaml +30 -25
  84. ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
  85. ultralytics/cfg/models/v9/yolov9e.yaml +46 -42
  86. ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
  87. ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
  88. ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
  89. ultralytics/cfg/solutions/default.yaml +24 -0
  90. ultralytics/cfg/trackers/botsort.yaml +8 -5
  91. ultralytics/cfg/trackers/bytetrack.yaml +8 -5
  92. ultralytics/data/__init__.py +14 -3
  93. ultralytics/data/annotator.py +37 -15
  94. ultralytics/data/augment.py +1783 -289
  95. ultralytics/data/base.py +62 -27
  96. ultralytics/data/build.py +36 -8
  97. ultralytics/data/converter.py +196 -36
  98. ultralytics/data/dataset.py +233 -94
  99. ultralytics/data/loaders.py +199 -96
  100. ultralytics/data/split_dota.py +39 -29
  101. ultralytics/data/utils.py +110 -40
  102. ultralytics/engine/__init__.py +1 -1
  103. ultralytics/engine/exporter.py +569 -242
  104. ultralytics/engine/model.py +604 -252
  105. ultralytics/engine/predictor.py +22 -11
  106. ultralytics/engine/results.py +1228 -218
  107. ultralytics/engine/trainer.py +190 -129
  108. ultralytics/engine/tuner.py +18 -18
  109. ultralytics/engine/validator.py +18 -15
  110. ultralytics/hub/__init__.py +31 -13
  111. ultralytics/hub/auth.py +11 -7
  112. ultralytics/hub/google/__init__.py +159 -0
  113. ultralytics/hub/session.py +128 -94
  114. ultralytics/hub/utils.py +20 -21
  115. ultralytics/models/__init__.py +4 -2
  116. ultralytics/models/fastsam/__init__.py +2 -3
  117. ultralytics/models/fastsam/model.py +26 -4
  118. ultralytics/models/fastsam/predict.py +127 -63
  119. ultralytics/models/fastsam/utils.py +1 -44
  120. ultralytics/models/fastsam/val.py +1 -1
  121. ultralytics/models/nas/__init__.py +1 -1
  122. ultralytics/models/nas/model.py +21 -10
  123. ultralytics/models/nas/predict.py +3 -6
  124. ultralytics/models/nas/val.py +4 -4
  125. ultralytics/models/rtdetr/__init__.py +1 -1
  126. ultralytics/models/rtdetr/model.py +1 -1
  127. ultralytics/models/rtdetr/predict.py +6 -8
  128. ultralytics/models/rtdetr/train.py +6 -2
  129. ultralytics/models/rtdetr/val.py +3 -3
  130. ultralytics/models/sam/__init__.py +3 -3
  131. ultralytics/models/sam/amg.py +29 -23
  132. ultralytics/models/sam/build.py +211 -13
  133. ultralytics/models/sam/model.py +91 -30
  134. ultralytics/models/sam/modules/__init__.py +1 -1
  135. ultralytics/models/sam/modules/blocks.py +1129 -0
  136. ultralytics/models/sam/modules/decoders.py +381 -53
  137. ultralytics/models/sam/modules/encoders.py +515 -324
  138. ultralytics/models/sam/modules/memory_attention.py +237 -0
  139. ultralytics/models/sam/modules/sam.py +969 -21
  140. ultralytics/models/sam/modules/tiny_encoder.py +425 -154
  141. ultralytics/models/sam/modules/transformer.py +159 -60
  142. ultralytics/models/sam/modules/utils.py +293 -0
  143. ultralytics/models/sam/predict.py +1263 -132
  144. ultralytics/models/utils/__init__.py +1 -1
  145. ultralytics/models/utils/loss.py +36 -24
  146. ultralytics/models/utils/ops.py +3 -7
  147. ultralytics/models/yolo/__init__.py +3 -3
  148. ultralytics/models/yolo/classify/__init__.py +1 -1
  149. ultralytics/models/yolo/classify/predict.py +7 -8
  150. ultralytics/models/yolo/classify/train.py +17 -22
  151. ultralytics/models/yolo/classify/val.py +8 -4
  152. ultralytics/models/yolo/detect/__init__.py +1 -1
  153. ultralytics/models/yolo/detect/predict.py +3 -5
  154. ultralytics/models/yolo/detect/train.py +11 -4
  155. ultralytics/models/yolo/detect/val.py +90 -52
  156. ultralytics/models/yolo/model.py +14 -9
  157. ultralytics/models/yolo/obb/__init__.py +1 -1
  158. ultralytics/models/yolo/obb/predict.py +2 -2
  159. ultralytics/models/yolo/obb/train.py +5 -3
  160. ultralytics/models/yolo/obb/val.py +41 -23
  161. ultralytics/models/yolo/pose/__init__.py +1 -1
  162. ultralytics/models/yolo/pose/predict.py +3 -5
  163. ultralytics/models/yolo/pose/train.py +2 -2
  164. ultralytics/models/yolo/pose/val.py +51 -17
  165. ultralytics/models/yolo/segment/__init__.py +1 -1
  166. ultralytics/models/yolo/segment/predict.py +3 -5
  167. ultralytics/models/yolo/segment/train.py +2 -2
  168. ultralytics/models/yolo/segment/val.py +60 -19
  169. ultralytics/models/yolo/world/__init__.py +5 -0
  170. ultralytics/models/yolo/world/train.py +92 -0
  171. ultralytics/models/yolo/world/train_world.py +109 -0
  172. ultralytics/nn/__init__.py +1 -1
  173. ultralytics/nn/autobackend.py +228 -93
  174. ultralytics/nn/modules/__init__.py +39 -14
  175. ultralytics/nn/modules/activation.py +21 -0
  176. ultralytics/nn/modules/block.py +526 -66
  177. ultralytics/nn/modules/conv.py +24 -7
  178. ultralytics/nn/modules/head.py +177 -34
  179. ultralytics/nn/modules/transformer.py +6 -5
  180. ultralytics/nn/modules/utils.py +1 -2
  181. ultralytics/nn/tasks.py +225 -77
  182. ultralytics/solutions/__init__.py +30 -1
  183. ultralytics/solutions/ai_gym.py +96 -143
  184. ultralytics/solutions/analytics.py +247 -0
  185. ultralytics/solutions/distance_calculation.py +78 -135
  186. ultralytics/solutions/heatmap.py +93 -247
  187. ultralytics/solutions/object_counter.py +184 -259
  188. ultralytics/solutions/parking_management.py +246 -0
  189. ultralytics/solutions/queue_management.py +112 -0
  190. ultralytics/solutions/region_counter.py +116 -0
  191. ultralytics/solutions/security_alarm.py +144 -0
  192. ultralytics/solutions/solutions.py +178 -0
  193. ultralytics/solutions/speed_estimation.py +86 -174
  194. ultralytics/solutions/streamlit_inference.py +190 -0
  195. ultralytics/solutions/trackzone.py +68 -0
  196. ultralytics/trackers/__init__.py +1 -1
  197. ultralytics/trackers/basetrack.py +32 -13
  198. ultralytics/trackers/bot_sort.py +61 -28
  199. ultralytics/trackers/byte_tracker.py +83 -51
  200. ultralytics/trackers/track.py +21 -6
  201. ultralytics/trackers/utils/__init__.py +1 -1
  202. ultralytics/trackers/utils/gmc.py +62 -48
  203. ultralytics/trackers/utils/kalman_filter.py +166 -35
  204. ultralytics/trackers/utils/matching.py +40 -21
  205. ultralytics/utils/__init__.py +511 -239
  206. ultralytics/utils/autobatch.py +40 -22
  207. ultralytics/utils/benchmarks.py +266 -85
  208. ultralytics/utils/callbacks/__init__.py +1 -1
  209. ultralytics/utils/callbacks/base.py +1 -3
  210. ultralytics/utils/callbacks/clearml.py +7 -6
  211. ultralytics/utils/callbacks/comet.py +39 -17
  212. ultralytics/utils/callbacks/dvc.py +1 -1
  213. ultralytics/utils/callbacks/hub.py +16 -16
  214. ultralytics/utils/callbacks/mlflow.py +28 -24
  215. ultralytics/utils/callbacks/neptune.py +6 -2
  216. ultralytics/utils/callbacks/raytune.py +3 -4
  217. ultralytics/utils/callbacks/tensorboard.py +18 -18
  218. ultralytics/utils/callbacks/wb.py +27 -20
  219. ultralytics/utils/checks.py +160 -100
  220. ultralytics/utils/dist.py +2 -1
  221. ultralytics/utils/downloads.py +40 -34
  222. ultralytics/utils/errors.py +1 -1
  223. ultralytics/utils/files.py +72 -38
  224. ultralytics/utils/instance.py +41 -19
  225. ultralytics/utils/loss.py +83 -55
  226. ultralytics/utils/metrics.py +61 -56
  227. ultralytics/utils/ops.py +94 -89
  228. ultralytics/utils/patches.py +30 -14
  229. ultralytics/utils/plotting.py +600 -269
  230. ultralytics/utils/tal.py +67 -26
  231. ultralytics/utils/torch_utils.py +302 -102
  232. ultralytics/utils/triton.py +2 -1
  233. ultralytics/utils/tuner.py +21 -12
  234. ultralytics-8.3.62.dist-info/METADATA +370 -0
  235. ultralytics-8.3.62.dist-info/RECORD +241 -0
  236. {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/WHEEL +1 -1
  237. ultralytics/data/explorer/__init__.py +0 -5
  238. ultralytics/data/explorer/explorer.py +0 -472
  239. ultralytics/data/explorer/gui/__init__.py +0 -1
  240. ultralytics/data/explorer/gui/dash.py +0 -268
  241. ultralytics/data/explorer/utils.py +0 -166
  242. ultralytics/models/fastsam/prompt.py +0 -357
  243. ultralytics-8.1.29.dist-info/METADATA +0 -373
  244. ultralytics-8.1.29.dist-info/RECORD +0 -197
  245. {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/LICENSE +0 -0
  246. {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/entry_points.txt +0 -0
  247. {ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Ultralytics YOLO 🚀, AGPL-3.0 license
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Convolution modules."""
3
3
 
4
4
  import math
@@ -21,6 +21,7 @@ __all__ = (
21
21
  "CBAM",
22
22
  "Concat",
23
23
  "RepConv",
24
+ "Index",
24
25
  )
25
26
 
26
27
 
@@ -50,7 +51,7 @@ class Conv(nn.Module):
50
51
  return self.act(self.bn(self.conv(x)))
51
52
 
52
53
  def forward_fuse(self, x):
53
- """Perform transposed convolution of 2D data."""
54
+ """Apply convolution and activation without batch normalization."""
54
55
  return self.act(self.conv(x))
55
56
 
56
57
 
@@ -158,9 +159,7 @@ class GhostConv(nn.Module):
158
159
  """Ghost Convolution https://github.com/huawei-noah/ghostnet."""
159
160
 
160
161
  def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
161
- """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
162
- activation.
163
- """
162
+ """Initializes Ghost Convolution module with primary and cheap operations for efficient feature learning."""
164
163
  super().__init__()
165
164
  c_ = c2 // 2 # hidden channels
166
165
  self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
@@ -211,7 +210,8 @@ class RepConv(nn.Module):
211
210
  kernelid, biasid = self._fuse_bn_tensor(self.bn)
212
211
  return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
213
212
 
214
- def _pad_1x1_to_3x3_tensor(self, kernel1x1):
213
+ @staticmethod
214
+ def _pad_1x1_to_3x3_tensor(kernel1x1):
215
215
  """Pads a 1x1 tensor to a 3x3 tensor."""
216
216
  if kernel1x1 is None:
217
217
  return 0
@@ -296,7 +296,7 @@ class SpatialAttention(nn.Module):
296
296
  def __init__(self, kernel_size=7):
297
297
  """Initialize Spatial-attention module with kernel size argument."""
298
298
  super().__init__()
299
- assert kernel_size in (3, 7), "kernel size must be 3 or 7"
299
+ assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
300
300
  padding = 3 if kernel_size == 7 else 1
301
301
  self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
302
302
  self.act = nn.Sigmoid()
@@ -331,3 +331,20 @@ class Concat(nn.Module):
331
331
  def forward(self, x):
332
332
  """Forward pass for the YOLOv8 mask Proto module."""
333
333
  return torch.cat(x, self.d)
334
+
335
+
336
+ class Index(nn.Module):
337
+ """Returns a particular index of the input."""
338
+
339
+ def __init__(self, c1, c2, index=0):
340
+ """Returns a particular index of the input."""
341
+ super().__init__()
342
+ self.index = index
343
+
344
+ def forward(self, x):
345
+ """
346
+ Forward pass.
347
+
348
+ Expects a list of tensors as input.
349
+ """
350
+ return x[self.index]
@@ -1,6 +1,7 @@
1
- # Ultralytics YOLO 🚀, AGPL-3.0 license
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Model head modules."""
3
3
 
4
+ import copy
4
5
  import math
5
6
 
6
7
  import torch
@@ -8,25 +9,30 @@ import torch.nn as nn
8
9
  from torch.nn.init import constant_, xavier_uniform_
9
10
 
10
11
  from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
11
- from .block import DFL, Proto, ContrastiveHead, BNContrastiveHead
12
- from .conv import Conv
12
+
13
+ from .block import DFL, BNContrastiveHead, ContrastiveHead, Proto
14
+ from .conv import Conv, DWConv
13
15
  from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
14
16
  from .utils import bias_init_with_prob, linear_init
15
17
 
16
- __all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"
18
+ __all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder", "v10Detect"
17
19
 
18
20
 
19
21
  class Detect(nn.Module):
20
- """YOLOv8 Detect head for detection models."""
22
+ """YOLO Detect head for detection models."""
21
23
 
22
24
  dynamic = False # force grid reconstruction
23
25
  export = False # export mode
26
+ format = None # export format
27
+ end2end = False # end2end
28
+ max_det = 300 # max_det
24
29
  shape = None
25
30
  anchors = torch.empty(0) # init
26
31
  strides = torch.empty(0) # init
32
+ legacy = False # backward compatibility for v3/v5/v8/v9 models
27
33
 
28
34
  def __init__(self, nc=80, ch=()):
29
- """Initializes the YOLOv8 detection layer with specified number of classes and channels."""
35
+ """Initializes the YOLO detection layer with specified number of classes and channels."""
30
36
  super().__init__()
31
37
  self.nc = nc # number of classes
32
38
  self.nl = len(ch) # number of detection layers
@@ -37,30 +43,76 @@ class Detect(nn.Module):
37
43
  self.cv2 = nn.ModuleList(
38
44
  nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch
39
45
  )
40
- self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
46
+ self.cv3 = (
47
+ nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
48
+ if self.legacy
49
+ else nn.ModuleList(
50
+ nn.Sequential(
51
+ nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)),
52
+ nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)),
53
+ nn.Conv2d(c3, self.nc, 1),
54
+ )
55
+ for x in ch
56
+ )
57
+ )
41
58
  self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
42
59
 
60
+ if self.end2end:
61
+ self.one2one_cv2 = copy.deepcopy(self.cv2)
62
+ self.one2one_cv3 = copy.deepcopy(self.cv3)
63
+
43
64
  def forward(self, x):
44
65
  """Concatenates and returns predicted bounding boxes and class probabilities."""
66
+ if self.end2end:
67
+ return self.forward_end2end(x)
68
+
45
69
  for i in range(self.nl):
46
70
  x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
47
71
  if self.training: # Training path
48
72
  return x
73
+ y = self._inference(x)
74
+ return y if self.export else (y, x)
75
+
76
+ def forward_end2end(self, x):
77
+ """
78
+ Performs forward pass of the v10Detect module.
79
+
80
+ Args:
81
+ x (tensor): Input tensor.
82
+
83
+ Returns:
84
+ (dict, tensor): If not in training mode, returns a dictionary containing the outputs of both one2many and one2one detections.
85
+ If in training mode, returns a dictionary containing the outputs of one2many and one2one detections separately.
86
+ """
87
+ x_detach = [xi.detach() for xi in x]
88
+ one2one = [
89
+ torch.cat((self.one2one_cv2[i](x_detach[i]), self.one2one_cv3[i](x_detach[i])), 1) for i in range(self.nl)
90
+ ]
91
+ for i in range(self.nl):
92
+ x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
93
+ if self.training: # Training path
94
+ return {"one2many": x, "one2one": one2one}
95
+
96
+ y = self._inference(one2one)
97
+ y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
98
+ return y if self.export else (y, {"one2many": x, "one2one": one2one})
49
99
 
100
+ def _inference(self, x):
101
+ """Decode predicted bounding boxes and class probabilities based on multiple-level feature maps."""
50
102
  # Inference path
51
103
  shape = x[0].shape # BCHW
52
104
  x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
53
- if self.dynamic or self.shape != shape:
105
+ if self.format != "imx" and (self.dynamic or self.shape != shape):
54
106
  self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
55
107
  self.shape = shape
56
108
 
57
- if self.export and self.format in ("saved_model", "pb", "tflite", "edgetpu", "tfjs"): # avoid TF FlexSplitV ops
109
+ if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}: # avoid TF FlexSplitV ops
58
110
  box = x_cat[:, : self.reg_max * 4]
59
111
  cls = x_cat[:, self.reg_max * 4 :]
60
112
  else:
61
113
  box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
62
114
 
63
- if self.export and self.format in ("tflite", "edgetpu"):
115
+ if self.export and self.format in {"tflite", "edgetpu"}:
64
116
  # Precompute normalization factor to increase numerical stability
65
117
  # See https://github.com/ultralytics/ultralytics/issues/7371
66
118
  grid_h = shape[2]
@@ -68,11 +120,15 @@ class Detect(nn.Module):
68
120
  grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
69
121
  norm = self.strides / (self.stride[0] * grid_size)
70
122
  dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
123
+ elif self.export and self.format == "imx":
124
+ dbox = self.decode_bboxes(
125
+ self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
126
+ )
127
+ return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
71
128
  else:
72
129
  dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
73
130
 
74
- y = torch.cat((dbox, cls.sigmoid()), 1)
75
- return y if self.export else (y, x)
131
+ return torch.cat((dbox, cls.sigmoid()), 1)
76
132
 
77
133
  def bias_init(self):
78
134
  """Initialize Detect() biases, WARNING: requires stride availability."""
@@ -82,14 +138,42 @@ class Detect(nn.Module):
82
138
  for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
83
139
  a[-1].bias.data[:] = 1.0 # box
84
140
  b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
141
+ if self.end2end:
142
+ for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride): # from
143
+ a[-1].bias.data[:] = 1.0 # box
144
+ b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
85
145
 
86
- def decode_bboxes(self, bboxes, anchors):
146
+ def decode_bboxes(self, bboxes, anchors, xywh=True):
87
147
  """Decode bounding boxes."""
88
- return dist2bbox(bboxes, anchors, xywh=True, dim=1)
148
+ return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1)
149
+
150
+ @staticmethod
151
+ def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
152
+ """
153
+ Post-processes YOLO model predictions.
154
+
155
+ Args:
156
+ preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
157
+ format [x, y, w, h, class_probs].
158
+ max_det (int): Maximum detections per image.
159
+ nc (int, optional): Number of classes. Default: 80.
160
+
161
+ Returns:
162
+ (torch.Tensor): Processed predictions with shape (batch_size, min(max_det, num_anchors), 6) and last
163
+ dimension format [x, y, w, h, max_class_prob, class_index].
164
+ """
165
+ batch_size, anchors, _ = preds.shape # i.e. shape(16,8400,84)
166
+ boxes, scores = preds.split([4, nc], dim=-1)
167
+ index = scores.amax(dim=-1).topk(min(max_det, anchors))[1].unsqueeze(-1)
168
+ boxes = boxes.gather(dim=1, index=index.repeat(1, 1, 4))
169
+ scores = scores.gather(dim=1, index=index.repeat(1, 1, nc))
170
+ scores, index = scores.flatten(1).topk(min(max_det, anchors))
171
+ i = torch.arange(batch_size)[..., None] # batch indices
172
+ return torch.cat([boxes[i, index // nc], scores[..., None], (index % nc)[..., None].float()], dim=-1)
89
173
 
90
174
 
91
175
  class Segment(Detect):
92
- """YOLOv8 Segment head for segmentation models."""
176
+ """YOLO Segment head for segmentation models."""
93
177
 
94
178
  def __init__(self, nc=80, nm=32, npr=256, ch=()):
95
179
  """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers."""
@@ -97,7 +181,6 @@ class Segment(Detect):
97
181
  self.nm = nm # number of masks
98
182
  self.npr = npr # number of protos
99
183
  self.proto = Proto(ch[0], self.npr, self.nm) # protos
100
- self.detect = Detect.forward
101
184
 
102
185
  c4 = max(ch[0] // 4, self.nm)
103
186
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
@@ -108,20 +191,19 @@ class Segment(Detect):
108
191
  bs = p.shape[0] # batch size
109
192
 
110
193
  mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients
111
- x = self.detect(self, x)
194
+ x = Detect.forward(self, x)
112
195
  if self.training:
113
196
  return x, mc, p
114
197
  return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
115
198
 
116
199
 
117
200
  class OBB(Detect):
118
- """YOLOv8 OBB detection head for detection with rotation models."""
201
+ """YOLO OBB detection head for detection with rotation models."""
119
202
 
120
203
  def __init__(self, nc=80, ne=1, ch=()):
121
204
  """Initialize OBB with number of classes `nc` and layer channels `ch`."""
122
205
  super().__init__(nc, ch)
123
206
  self.ne = ne # number of extra parameters
124
- self.detect = Detect.forward
125
207
 
126
208
  c4 = max(ch[0] // 4, self.ne)
127
209
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
@@ -135,7 +217,7 @@ class OBB(Detect):
135
217
  # angle = angle.sigmoid() * math.pi / 2 # [0, pi/2]
136
218
  if not self.training:
137
219
  self.angle = angle
138
- x = self.detect(self, x)
220
+ x = Detect.forward(self, x)
139
221
  if self.training:
140
222
  return x, angle
141
223
  return torch.cat([x, angle], 1) if self.export else (torch.cat([x[0], angle], 1), (x[1], angle))
@@ -146,14 +228,13 @@ class OBB(Detect):
146
228
 
147
229
 
148
230
  class Pose(Detect):
149
- """YOLOv8 Pose head for keypoints models."""
231
+ """YOLO Pose head for keypoints models."""
150
232
 
151
233
  def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
152
234
  """Initialize YOLO network with default parameters and Convolutional Layers."""
153
235
  super().__init__(nc, ch)
154
236
  self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
155
237
  self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
156
- self.detect = Detect.forward
157
238
 
158
239
  c4 = max(ch[0] // 4, self.nk)
159
240
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
@@ -162,7 +243,7 @@ class Pose(Detect):
162
243
  """Perform forward pass through YOLO model and return predictions."""
163
244
  bs = x[0].shape[0] # batch size
164
245
  kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
165
- x = self.detect(self, x)
246
+ x = Detect.forward(self, x)
166
247
  if self.training:
167
248
  return x, kpt
168
249
  pred_kpt = self.kpts_decode(bs, kpt)
@@ -171,9 +252,21 @@ class Pose(Detect):
171
252
  def kpts_decode(self, bs, kpts):
172
253
  """Decodes keypoints."""
173
254
  ndim = self.kpt_shape[1]
174
- if self.export: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
175
- y = kpts.view(bs, *self.kpt_shape, -1)
176
- a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
255
+ if self.export:
256
+ if self.format in {
257
+ "tflite",
258
+ "edgetpu",
259
+ }: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
260
+ # Precompute normalization factor to increase numerical stability
261
+ y = kpts.view(bs, *self.kpt_shape, -1)
262
+ grid_h, grid_w = self.shape[2], self.shape[3]
263
+ grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
264
+ norm = self.strides / (self.stride[0] * grid_size)
265
+ a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
266
+ else:
267
+ # NCNN fix
268
+ y = kpts.view(bs, *self.kpt_shape, -1)
269
+ a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
177
270
  if ndim == 3:
178
271
  a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
179
272
  return a.view(bs, self.nk, -1)
@@ -187,12 +280,12 @@ class Pose(Detect):
187
280
 
188
281
 
189
282
  class Classify(nn.Module):
190
- """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
283
+ """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
284
+
285
+ export = False # export mode
191
286
 
192
287
  def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
193
- """Initializes YOLOv8 classification head with specified input and output channels, kernel size, stride,
194
- padding, and groups.
195
- """
288
+ """Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape."""
196
289
  super().__init__()
197
290
  c_ = 1280 # efficientnet_b0 size
198
291
  self.conv = Conv(c1, c_, k, s, p, g)
@@ -205,12 +298,17 @@ class Classify(nn.Module):
205
298
  if isinstance(x, list):
206
299
  x = torch.cat(x, 1)
207
300
  x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
208
- return x if self.training else x.softmax(1)
301
+ if self.training:
302
+ return x
303
+ y = x.softmax(1) # get final output
304
+ return y if self.export else (y, x)
209
305
 
210
306
 
211
307
  class WorldDetect(Detect):
308
+ """Head for integrating YOLO detection models with semantic understanding from text embeddings."""
309
+
212
310
  def __init__(self, nc=80, embed=512, with_bn=False, ch=()):
213
- """Initialize YOLOv8 detection layer with nc classes and layer channels ch."""
311
+ """Initialize YOLO detection layer with nc classes and layer channels ch."""
214
312
  super().__init__(nc, ch)
215
313
  c3 = max(ch[0], min(self.nc, 100))
216
314
  self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
@@ -230,13 +328,13 @@ class WorldDetect(Detect):
230
328
  self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
231
329
  self.shape = shape
232
330
 
233
- if self.export and self.format in ("saved_model", "pb", "tflite", "edgetpu", "tfjs"): # avoid TF FlexSplitV ops
331
+ if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}: # avoid TF FlexSplitV ops
234
332
  box = x_cat[:, : self.reg_max * 4]
235
333
  cls = x_cat[:, self.reg_max * 4 :]
236
334
  else:
237
335
  box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
238
336
 
239
- if self.export and self.format in ("tflite", "edgetpu"):
337
+ if self.export and self.format in {"tflite", "edgetpu"}:
240
338
  # Precompute normalization factor to increase numerical stability
241
339
  # See https://github.com/ultralytics/ultralytics/issues/7371
242
340
  grid_h = shape[2]
@@ -250,6 +348,15 @@ class WorldDetect(Detect):
250
348
  y = torch.cat((dbox, cls.sigmoid()), 1)
251
349
  return y if self.export else (y, x)
252
350
 
351
+ def bias_init(self):
352
+ """Initialize Detect() biases, WARNING: requires stride availability."""
353
+ m = self # self.model[-1] # Detect() module
354
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
355
+ # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
356
+ for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
357
+ a[-1].bias.data[:] = 1.0 # box
358
+ # b[-1].bias.data[:] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
359
+
253
360
 
254
361
  class RTDETRDecoder(nn.Module):
255
362
  """
@@ -480,3 +587,39 @@ class RTDETRDecoder(nn.Module):
480
587
  xavier_uniform_(self.query_pos_head.layers[1].weight)
481
588
  for layer in self.input_proj:
482
589
  xavier_uniform_(layer[0].weight)
590
+
591
+
592
+ class v10Detect(Detect):
593
+ """
594
+ v10 Detection head from https://arxiv.org/pdf/2405.14458.
595
+
596
+ Args:
597
+ nc (int): Number of classes.
598
+ ch (tuple): Tuple of channel sizes.
599
+
600
+ Attributes:
601
+ max_det (int): Maximum number of detections.
602
+
603
+ Methods:
604
+ __init__(self, nc=80, ch=()): Initializes the v10Detect object.
605
+ forward(self, x): Performs forward pass of the v10Detect module.
606
+ bias_init(self): Initializes biases of the Detect module.
607
+
608
+ """
609
+
610
+ end2end = True
611
+
612
+ def __init__(self, nc=80, ch=()):
613
+ """Initializes the v10Detect object with the specified number of classes and input channels."""
614
+ super().__init__(nc, ch)
615
+ c3 = max(ch[0], min(self.nc, 100)) # channels
616
+ # Light cls head
617
+ self.cv3 = nn.ModuleList(
618
+ nn.Sequential(
619
+ nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)),
620
+ nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)),
621
+ nn.Conv2d(c3, self.nc, 1),
622
+ )
623
+ for x in ch
624
+ )
625
+ self.one2one_cv3 = copy.deepcopy(self.cv3)
@@ -1,4 +1,4 @@
1
- # Ultralytics YOLO 🚀, AGPL-3.0 license
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Transformer modules."""
3
3
 
4
4
  import math
@@ -174,18 +174,20 @@ class MLPBlock(nn.Module):
174
174
  class MLP(nn.Module):
175
175
  """Implements a simple multi-layer perceptron (also called FFN)."""
176
176
 
177
- def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
177
+ def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act=nn.ReLU, sigmoid=False):
178
178
  """Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
179
179
  super().__init__()
180
180
  self.num_layers = num_layers
181
181
  h = [hidden_dim] * (num_layers - 1)
182
182
  self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
183
+ self.sigmoid = sigmoid
184
+ self.act = act()
183
185
 
184
186
  def forward(self, x):
185
187
  """Forward pass for the entire MLP."""
186
188
  for i, layer in enumerate(self.layers):
187
- x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
188
- return x
189
+ x = getattr(self, "act", nn.ReLU())(layer(x)) if i < self.num_layers - 1 else layer(x)
190
+ return x.sigmoid() if getattr(self, "sigmoid", False) else x
189
191
 
190
192
 
191
193
  class LayerNorm2d(nn.Module):
@@ -350,7 +352,6 @@ class DeformableTransformerDecoderLayer(nn.Module):
350
352
 
351
353
  def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
352
354
  """Perform the forward pass through the entire decoder layer."""
353
-
354
355
  # Self attention
355
356
  q = k = self.with_pos_embed(embed, query_pos)
356
357
  tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1), attn_mask=attn_mask)[
@@ -1,4 +1,4 @@
1
- # Ultralytics YOLO 🚀, AGPL-3.0 license
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Module utils."""
3
3
 
4
4
  import copy
@@ -50,7 +50,6 @@ def multi_scale_deformable_attn_pytorch(
50
50
 
51
51
  https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
52
52
  """
53
-
54
53
  bs, _, num_heads, embed_dims = value.shape
55
54
  _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
56
55
  value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)