PyPI - ultralytics - Versions diffs - 8.1.29__py3-none-any.whl → 8.3.62__py3-none-any.whl - Mend

ultralytics 8.1.29py3-none-any.whl → 8.3.62py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (247) hide show

tests/__init__.py +22 -0
tests/conftest.py +83 -0
tests/test_cli.py +122 -0
tests/test_cuda.py +155 -0
tests/test_engine.py +131 -0
tests/test_exports.py +216 -0
tests/test_integrations.py +150 -0
tests/test_python.py +615 -0
tests/test_solutions.py +94 -0
ultralytics/__init__.py +11 -8
ultralytics/cfg/__init__.py +569 -131
ultralytics/cfg/datasets/Argoverse.yaml +2 -1
ultralytics/cfg/datasets/DOTAv1.5.yaml +3 -2
ultralytics/cfg/datasets/DOTAv1.yaml +3 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +3 -2
ultralytics/cfg/datasets/ImageNet.yaml +2 -1
ultralytics/cfg/datasets/Objects365.yaml +5 -4
ultralytics/cfg/datasets/SKU-110K.yaml +2 -1
ultralytics/cfg/datasets/VOC.yaml +3 -2
ultralytics/cfg/datasets/VisDrone.yaml +6 -5
ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
ultralytics/cfg/datasets/carparts-seg.yaml +3 -2
ultralytics/cfg/datasets/coco-pose.yaml +7 -6
ultralytics/cfg/datasets/coco.yaml +3 -2
ultralytics/cfg/datasets/coco128-seg.yaml +4 -3
ultralytics/cfg/datasets/coco128.yaml +4 -3
ultralytics/cfg/datasets/coco8-pose.yaml +3 -2
ultralytics/cfg/datasets/coco8-seg.yaml +3 -2
ultralytics/cfg/datasets/coco8.yaml +3 -2
ultralytics/cfg/datasets/crack-seg.yaml +3 -2
ultralytics/cfg/datasets/dog-pose.yaml +24 -0
ultralytics/cfg/datasets/dota8.yaml +3 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
ultralytics/cfg/datasets/lvis.yaml +1236 -0
ultralytics/cfg/datasets/medical-pills.yaml +22 -0
ultralytics/cfg/datasets/open-images-v7.yaml +2 -1
ultralytics/cfg/datasets/package-seg.yaml +5 -4
ultralytics/cfg/datasets/signature.yaml +21 -0
ultralytics/cfg/datasets/tiger-pose.yaml +3 -2
ultralytics/cfg/datasets/xView.yaml +2 -1
ultralytics/cfg/default.yaml +14 -11
ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +24 -0
ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
ultralytics/cfg/models/11/yolo11.yaml +50 -0
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +5 -2
ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
ultralytics/cfg/models/v3/yolov3-spp.yaml +5 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +5 -2
ultralytics/cfg/models/v3/yolov3.yaml +5 -2
ultralytics/cfg/models/v5/yolov5-p6.yaml +5 -2
ultralytics/cfg/models/v5/yolov5.yaml +5 -2
ultralytics/cfg/models/v6/yolov6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +6 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +6 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-p2.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-p6.yaml +10 -7
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-pose.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-seg.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-world.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -2
ultralytics/cfg/models/v8/yolov8.yaml +5 -2
ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
ultralytics/cfg/models/v9/yolov9c.yaml +30 -25
ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
ultralytics/cfg/models/v9/yolov9e.yaml +46 -42
ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
ultralytics/cfg/solutions/default.yaml +24 -0
ultralytics/cfg/trackers/botsort.yaml +8 -5
ultralytics/cfg/trackers/bytetrack.yaml +8 -5
ultralytics/data/__init__.py +14 -3
ultralytics/data/annotator.py +37 -15
ultralytics/data/augment.py +1783 -289
ultralytics/data/base.py +62 -27
ultralytics/data/build.py +36 -8
ultralytics/data/converter.py +196 -36
ultralytics/data/dataset.py +233 -94
ultralytics/data/loaders.py +199 -96
ultralytics/data/split_dota.py +39 -29
ultralytics/data/utils.py +110 -40
ultralytics/engine/__init__.py +1 -1
ultralytics/engine/exporter.py +569 -242
ultralytics/engine/model.py +604 -252
ultralytics/engine/predictor.py +22 -11
ultralytics/engine/results.py +1228 -218
ultralytics/engine/trainer.py +190 -129
ultralytics/engine/tuner.py +18 -18
ultralytics/engine/validator.py +18 -15
ultralytics/hub/__init__.py +31 -13
ultralytics/hub/auth.py +11 -7
ultralytics/hub/google/__init__.py +159 -0
ultralytics/hub/session.py +128 -94
ultralytics/hub/utils.py +20 -21
ultralytics/models/__init__.py +4 -2
ultralytics/models/fastsam/__init__.py +2 -3
ultralytics/models/fastsam/model.py +26 -4
ultralytics/models/fastsam/predict.py +127 -63
ultralytics/models/fastsam/utils.py +1 -44
ultralytics/models/fastsam/val.py +1 -1
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +21 -10
ultralytics/models/nas/predict.py +3 -6
ultralytics/models/nas/val.py +4 -4
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +1 -1
ultralytics/models/rtdetr/predict.py +6 -8
ultralytics/models/rtdetr/train.py +6 -2
ultralytics/models/rtdetr/val.py +3 -3
ultralytics/models/sam/__init__.py +3 -3
ultralytics/models/sam/amg.py +29 -23
ultralytics/models/sam/build.py +211 -13
ultralytics/models/sam/model.py +91 -30
ultralytics/models/sam/modules/__init__.py +1 -1
ultralytics/models/sam/modules/blocks.py +1129 -0
ultralytics/models/sam/modules/decoders.py +381 -53
ultralytics/models/sam/modules/encoders.py +515 -324
ultralytics/models/sam/modules/memory_attention.py +237 -0
ultralytics/models/sam/modules/sam.py +969 -21
ultralytics/models/sam/modules/tiny_encoder.py +425 -154
ultralytics/models/sam/modules/transformer.py +159 -60
ultralytics/models/sam/modules/utils.py +293 -0
ultralytics/models/sam/predict.py +1263 -132
ultralytics/models/utils/__init__.py +1 -1
ultralytics/models/utils/loss.py +36 -24
ultralytics/models/utils/ops.py +3 -7
ultralytics/models/yolo/__init__.py +3 -3
ultralytics/models/yolo/classify/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +7 -8
ultralytics/models/yolo/classify/train.py +17 -22
ultralytics/models/yolo/classify/val.py +8 -4
ultralytics/models/yolo/detect/__init__.py +1 -1
ultralytics/models/yolo/detect/predict.py +3 -5
ultralytics/models/yolo/detect/train.py +11 -4
ultralytics/models/yolo/detect/val.py +90 -52
ultralytics/models/yolo/model.py +14 -9
ultralytics/models/yolo/obb/__init__.py +1 -1
ultralytics/models/yolo/obb/predict.py +2 -2
ultralytics/models/yolo/obb/train.py +5 -3
ultralytics/models/yolo/obb/val.py +41 -23
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +3 -5
ultralytics/models/yolo/pose/train.py +2 -2
ultralytics/models/yolo/pose/val.py +51 -17
ultralytics/models/yolo/segment/__init__.py +1 -1
ultralytics/models/yolo/segment/predict.py +3 -5
ultralytics/models/yolo/segment/train.py +2 -2
ultralytics/models/yolo/segment/val.py +60 -19
ultralytics/models/yolo/world/__init__.py +5 -0
ultralytics/models/yolo/world/train.py +92 -0
ultralytics/models/yolo/world/train_world.py +109 -0
ultralytics/nn/__init__.py +1 -1
ultralytics/nn/autobackend.py +228 -93
ultralytics/nn/modules/__init__.py +39 -14
ultralytics/nn/modules/activation.py +21 -0
ultralytics/nn/modules/block.py +526 -66
ultralytics/nn/modules/conv.py +24 -7
ultralytics/nn/modules/head.py +177 -34
ultralytics/nn/modules/transformer.py +6 -5
ultralytics/nn/modules/utils.py +1 -2
ultralytics/nn/tasks.py +225 -77
ultralytics/solutions/__init__.py +30 -1
ultralytics/solutions/ai_gym.py +96 -143
ultralytics/solutions/analytics.py +247 -0
ultralytics/solutions/distance_calculation.py +78 -135
ultralytics/solutions/heatmap.py +93 -247
ultralytics/solutions/object_counter.py +184 -259
ultralytics/solutions/parking_management.py +246 -0
ultralytics/solutions/queue_management.py +112 -0
ultralytics/solutions/region_counter.py +116 -0
ultralytics/solutions/security_alarm.py +144 -0
ultralytics/solutions/solutions.py +178 -0
ultralytics/solutions/speed_estimation.py +86 -174
ultralytics/solutions/streamlit_inference.py +190 -0
ultralytics/solutions/trackzone.py +68 -0
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +32 -13
ultralytics/trackers/bot_sort.py +61 -28
ultralytics/trackers/byte_tracker.py +83 -51
ultralytics/trackers/track.py +21 -6
ultralytics/trackers/utils/__init__.py +1 -1
ultralytics/trackers/utils/gmc.py +62 -48
ultralytics/trackers/utils/kalman_filter.py +166 -35
ultralytics/trackers/utils/matching.py +40 -21
ultralytics/utils/__init__.py +511 -239
ultralytics/utils/autobatch.py +40 -22
ultralytics/utils/benchmarks.py +266 -85
ultralytics/utils/callbacks/__init__.py +1 -1
ultralytics/utils/callbacks/base.py +1 -3
ultralytics/utils/callbacks/clearml.py +7 -6
ultralytics/utils/callbacks/comet.py +39 -17
ultralytics/utils/callbacks/dvc.py +1 -1
ultralytics/utils/callbacks/hub.py +16 -16
ultralytics/utils/callbacks/mlflow.py +28 -24
ultralytics/utils/callbacks/neptune.py +6 -2
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +18 -18
ultralytics/utils/callbacks/wb.py +27 -20
ultralytics/utils/checks.py +160 -100
ultralytics/utils/dist.py +2 -1
ultralytics/utils/downloads.py +40 -34
ultralytics/utils/errors.py +1 -1
ultralytics/utils/files.py +72 -38
ultralytics/utils/instance.py +41 -19
ultralytics/utils/loss.py +83 -55
ultralytics/utils/metrics.py +61 -56
ultralytics/utils/ops.py +94 -89
ultralytics/utils/patches.py +30 -14
ultralytics/utils/plotting.py +600 -269
ultralytics/utils/tal.py +67 -26
ultralytics/utils/torch_utils.py +302 -102
ultralytics/utils/triton.py +2 -1
ultralytics/utils/tuner.py +21 -12
ultralytics-8.3.62.dist-info/METADATA +370 -0
ultralytics-8.3.62.dist-info/RECORD +241 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/WHEEL +1 -1
ultralytics/data/explorer/__init__.py +0 -5
ultralytics/data/explorer/explorer.py +0 -472
ultralytics/data/explorer/gui/__init__.py +0 -1
ultralytics/data/explorer/gui/dash.py +0 -268
ultralytics/data/explorer/utils.py +0 -166
ultralytics/models/fastsam/prompt.py +0 -357
ultralytics-8.1.29.dist-info/METADATA +0 -373
ultralytics-8.1.29.dist-info/RECORD +0 -197
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/LICENSE +0 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/entry_points.txt +0 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/top_level.txt +0 -0

ultralytics/nn/modules/conv.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """Convolution modules."""
 import math
@@ -21,6 +21,7 @@ __all__ = (
     "CBAM",
     "Concat",
     "RepConv",
+    "Index",
 )
@@ -50,7 +51,7 @@ class Conv(nn.Module):
         return self.act(self.bn(self.conv(x)))
     def forward_fuse(self, x):
-        """Perform transposed convolution of 2D data."""
+        """Apply convolution and activation without batch normalization."""
         return self.act(self.conv(x))
@@ -158,9 +159,7 @@ class GhostConv(nn.Module):
     """Ghost Convolution https://github.com/huawei-noah/ghostnet."""
     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
-        """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
-        activation.
-        """
+        """Initializes Ghost Convolution module with primary and cheap operations for efficient feature learning."""
         super().__init__()
         c_ = c2 // 2  # hidden channels
         self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
@@ -211,7 +210,8 @@ class RepConv(nn.Module):
         kernelid, biasid = self._fuse_bn_tensor(self.bn)
         return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+    @staticmethod
+    def _pad_1x1_to_3x3_tensor(kernel1x1):
         """Pads a 1x1 tensor to a 3x3 tensor."""
         if kernel1x1 is None:
             return 0
@@ -296,7 +296,7 @@ class SpatialAttention(nn.Module):
     def __init__(self, kernel_size=7):
         """Initialize Spatial-attention module with kernel size argument."""
         super().__init__()
-        assert kernel_size in (3, 7), "kernel size must be 3 or 7"
+        assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
         padding = 3 if kernel_size == 7 else 1
         self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
         self.act = nn.Sigmoid()
@@ -331,3 +331,20 @@ class Concat(nn.Module):
     def forward(self, x):
         """Forward pass for the YOLOv8 mask Proto module."""
         return torch.cat(x, self.d)
+class Index(nn.Module):
+    """Returns a particular index of the input."""
+    def __init__(self, c1, c2, index=0):
+        """Returns a particular index of the input."""
+        super().__init__()
+        self.index = index
+    def forward(self, x):
+        """
+        Forward pass.
+        Expects a list of tensors as input.
+        """
+        return x[self.index]

ultralytics/nn/modules/head.py CHANGED Viewed

@@ -1,6 +1,7 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """Model head modules."""
+import copy
 import math
 import torch
@@ -8,25 +9,30 @@ import torch.nn as nn
 from torch.nn.init import constant_, xavier_uniform_
 from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
-from .block import DFL, Proto, ContrastiveHead, BNContrastiveHead
-from .conv import Conv
+from .block import DFL, BNContrastiveHead, ContrastiveHead, Proto
+from .conv import Conv, DWConv
 from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
 from .utils import bias_init_with_prob, linear_init
-__all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"
+__all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder", "v10Detect"
 class Detect(nn.Module):
-    """YOLOv8 Detect head for detection models."""
+    """YOLO Detect head for detection models."""
     dynamic = False  # force grid reconstruction
     export = False  # export mode
+    format = None  # export format
+    end2end = False  # end2end
+    max_det = 300  # max_det
     shape = None
     anchors = torch.empty(0)  # init
     strides = torch.empty(0)  # init
+    legacy = False  # backward compatibility for v3/v5/v8/v9 models
     def __init__(self, nc=80, ch=()):
-        """Initializes the YOLOv8 detection layer with specified number of classes and channels."""
+        """Initializes the YOLO detection layer with specified number of classes and channels."""
         super().__init__()
         self.nc = nc  # number of classes
         self.nl = len(ch)  # number of detection layers
@@ -37,30 +43,76 @@ class Detect(nn.Module):
         self.cv2 = nn.ModuleList(
             nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch
         )
-        self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
+        self.cv3 = (
+            nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
+            if self.legacy
+            else nn.ModuleList(
+                nn.Sequential(
+                    nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)),
+                    nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)),
+                    nn.Conv2d(c3, self.nc, 1),
+                )
+                for x in ch
+            )
+        )
         self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
+        if self.end2end:
+            self.one2one_cv2 = copy.deepcopy(self.cv2)
+            self.one2one_cv3 = copy.deepcopy(self.cv3)
     def forward(self, x):
         """Concatenates and returns predicted bounding boxes and class probabilities."""
+        if self.end2end:
+            return self.forward_end2end(x)
         for i in range(self.nl):
             x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
         if self.training:  # Training path
             return x
+        y = self._inference(x)
+        return y if self.export else (y, x)
+    def forward_end2end(self, x):
+        """
+        Performs forward pass of the v10Detect module.
+        Args:
+            x (tensor): Input tensor.
+        Returns:
+            (dict, tensor): If not in training mode, returns a dictionary containing the outputs of both one2many and one2one detections.
+                           If in training mode, returns a dictionary containing the outputs of one2many and one2one detections separately.
+        """
+        x_detach = [xi.detach() for xi in x]
+        one2one = [
+            torch.cat((self.one2one_cv2[i](x_detach[i]), self.one2one_cv3[i](x_detach[i])), 1) for i in range(self.nl)
+        ]
+        for i in range(self.nl):
+            x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
+        if self.training:  # Training path
+            return {"one2many": x, "one2one": one2one}
+        y = self._inference(one2one)
+        y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
+        return y if self.export else (y, {"one2many": x, "one2one": one2one})
+    def _inference(self, x):
+        """Decode predicted bounding boxes and class probabilities based on multiple-level feature maps."""
         # Inference path
         shape = x[0].shape  # BCHW
         x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
-        if self.dynamic or self.shape != shape:
+        if self.format != "imx" and (self.dynamic or self.shape != shape):
             self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
             self.shape = shape
-        if self.export and self.format in ("saved_model", "pb", "tflite", "edgetpu", "tfjs"):  # avoid TF FlexSplitV ops
+        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
             box = x_cat[:, : self.reg_max * 4]
             cls = x_cat[:, self.reg_max * 4 :]
         else:
             box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
-        if self.export and self.format in ("tflite", "edgetpu"):
+        if self.export and self.format in {"tflite", "edgetpu"}:
             # Precompute normalization factor to increase numerical stability
             # See https://github.com/ultralytics/ultralytics/issues/7371
             grid_h = shape[2]
@@ -68,11 +120,15 @@ class Detect(nn.Module):
             grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
             norm = self.strides / (self.stride[0] * grid_size)
             dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
+        elif self.export and self.format == "imx":
+            dbox = self.decode_bboxes(
+                self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
+            )
+            return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
         else:
             dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
-        y = torch.cat((dbox, cls.sigmoid()), 1)
-        return y if self.export else (y, x)
+        return torch.cat((dbox, cls.sigmoid()), 1)
     def bias_init(self):
         """Initialize Detect() biases, WARNING: requires stride availability."""
@@ -82,14 +138,42 @@ class Detect(nn.Module):
         for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
             a[-1].bias.data[:] = 1.0  # box
             b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
+        if self.end2end:
+            for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride):  # from
+                a[-1].bias.data[:] = 1.0  # box
+                b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
-    def decode_bboxes(self, bboxes, anchors):
+    def decode_bboxes(self, bboxes, anchors, xywh=True):
         """Decode bounding boxes."""
-        return dist2bbox(bboxes, anchors, xywh=True, dim=1)
+        return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1)
+    @staticmethod
+    def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
+        """
+        Post-processes YOLO model predictions.
+        Args:
+            preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
+                format [x, y, w, h, class_probs].
+            max_det (int): Maximum detections per image.
+            nc (int, optional): Number of classes. Default: 80.
+        Returns:
+            (torch.Tensor): Processed predictions with shape (batch_size, min(max_det, num_anchors), 6) and last
+                dimension format [x, y, w, h, max_class_prob, class_index].
+        """
+        batch_size, anchors, _ = preds.shape  # i.e. shape(16,8400,84)
+        boxes, scores = preds.split([4, nc], dim=-1)
+        index = scores.amax(dim=-1).topk(min(max_det, anchors))[1].unsqueeze(-1)
+        boxes = boxes.gather(dim=1, index=index.repeat(1, 1, 4))
+        scores = scores.gather(dim=1, index=index.repeat(1, 1, nc))
+        scores, index = scores.flatten(1).topk(min(max_det, anchors))
+        i = torch.arange(batch_size)[..., None]  # batch indices
+        return torch.cat([boxes[i, index // nc], scores[..., None], (index % nc)[..., None].float()], dim=-1)
 class Segment(Detect):
-    """YOLOv8 Segment head for segmentation models."""
+    """YOLO Segment head for segmentation models."""
     def __init__(self, nc=80, nm=32, npr=256, ch=()):
         """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers."""
@@ -97,7 +181,6 @@ class Segment(Detect):
         self.nm = nm  # number of masks
         self.npr = npr  # number of protos
         self.proto = Proto(ch[0], self.npr, self.nm)  # protos
-        self.detect = Detect.forward
         c4 = max(ch[0] // 4, self.nm)
         self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
@@ -108,20 +191,19 @@ class Segment(Detect):
         bs = p.shape[0]  # batch size
         mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2)  # mask coefficients
-        x = self.detect(self, x)
+        x = Detect.forward(self, x)
         if self.training:
             return x, mc, p
         return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
 class OBB(Detect):
-    """YOLOv8 OBB detection head for detection with rotation models."""
+    """YOLO OBB detection head for detection with rotation models."""
     def __init__(self, nc=80, ne=1, ch=()):
         """Initialize OBB with number of classes `nc` and layer channels `ch`."""
         super().__init__(nc, ch)
         self.ne = ne  # number of extra parameters
-        self.detect = Detect.forward
         c4 = max(ch[0] // 4, self.ne)
         self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
@@ -135,7 +217,7 @@ class OBB(Detect):
         # angle = angle.sigmoid() * math.pi / 2  # [0, pi/2]
         if not self.training:
             self.angle = angle
-        x = self.detect(self, x)
+        x = Detect.forward(self, x)
         if self.training:
             return x, angle
         return torch.cat([x, angle], 1) if self.export else (torch.cat([x[0], angle], 1), (x[1], angle))
@@ -146,14 +228,13 @@ class OBB(Detect):
 class Pose(Detect):
-    """YOLOv8 Pose head for keypoints models."""
+    """YOLO Pose head for keypoints models."""
     def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
         """Initialize YOLO network with default parameters and Convolutional Layers."""
         super().__init__(nc, ch)
         self.kpt_shape = kpt_shape  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
         self.nk = kpt_shape[0] * kpt_shape[1]  # number of keypoints total
-        self.detect = Detect.forward
         c4 = max(ch[0] // 4, self.nk)
         self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
@@ -162,7 +243,7 @@ class Pose(Detect):
         """Perform forward pass through YOLO model and return predictions."""
         bs = x[0].shape[0]  # batch size
         kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1)  # (bs, 17*3, h*w)
-        x = self.detect(self, x)
+        x = Detect.forward(self, x)
         if self.training:
             return x, kpt
         pred_kpt = self.kpts_decode(bs, kpt)
@@ -171,9 +252,21 @@ class Pose(Detect):
     def kpts_decode(self, bs, kpts):
         """Decodes keypoints."""
         ndim = self.kpt_shape[1]
-        if self.export:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
-            y = kpts.view(bs, *self.kpt_shape, -1)
-            a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
+        if self.export:
+            if self.format in {
+                "tflite",
+                "edgetpu",
+            }:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
+                # Precompute normalization factor to increase numerical stability
+                y = kpts.view(bs, *self.kpt_shape, -1)
+                grid_h, grid_w = self.shape[2], self.shape[3]
+                grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
+                norm = self.strides / (self.stride[0] * grid_size)
+                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
+            else:
+                # NCNN fix
+                y = kpts.view(bs, *self.kpt_shape, -1)
+                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
             if ndim == 3:
                 a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
             return a.view(bs, self.nk, -1)
@@ -187,12 +280,12 @@ class Pose(Detect):
 class Classify(nn.Module):
-    """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
+    """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
+    export = False  # export mode
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
-        """Initializes YOLOv8 classification head with specified input and output channels, kernel size, stride,
-        padding, and groups.
-        """
+        """Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape."""
         super().__init__()
         c_ = 1280  # efficientnet_b0 size
         self.conv = Conv(c1, c_, k, s, p, g)
@@ -205,12 +298,17 @@ class Classify(nn.Module):
         if isinstance(x, list):
             x = torch.cat(x, 1)
         x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
-        return x if self.training else x.softmax(1)
+        if self.training:
+            return x
+        y = x.softmax(1)  # get final output
+        return y if self.export else (y, x)
 class WorldDetect(Detect):
+    """Head for integrating YOLO detection models with semantic understanding from text embeddings."""
     def __init__(self, nc=80, embed=512, with_bn=False, ch=()):
-        """Initialize YOLOv8 detection layer with nc classes and layer channels ch."""
+        """Initialize YOLO detection layer with nc classes and layer channels ch."""
         super().__init__(nc, ch)
         c3 = max(ch[0], min(self.nc, 100))
         self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
@@ -230,13 +328,13 @@ class WorldDetect(Detect):
             self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
             self.shape = shape
-        if self.export and self.format in ("saved_model", "pb", "tflite", "edgetpu", "tfjs"):  # avoid TF FlexSplitV ops
+        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
             box = x_cat[:, : self.reg_max * 4]
             cls = x_cat[:, self.reg_max * 4 :]
         else:
             box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
-        if self.export and self.format in ("tflite", "edgetpu"):
+        if self.export and self.format in {"tflite", "edgetpu"}:
             # Precompute normalization factor to increase numerical stability
             # See https://github.com/ultralytics/ultralytics/issues/7371
             grid_h = shape[2]
@@ -250,6 +348,15 @@ class WorldDetect(Detect):
         y = torch.cat((dbox, cls.sigmoid()), 1)
         return y if self.export else (y, x)
+    def bias_init(self):
+        """Initialize Detect() biases, WARNING: requires stride availability."""
+        m = self  # self.model[-1]  # Detect() module
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
+        # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
+        for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
+            a[-1].bias.data[:] = 1.0  # box
+            # b[-1].bias.data[:] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
 class RTDETRDecoder(nn.Module):
     """
@@ -480,3 +587,39 @@ class RTDETRDecoder(nn.Module):
         xavier_uniform_(self.query_pos_head.layers[1].weight)
         for layer in self.input_proj:
             xavier_uniform_(layer[0].weight)
+class v10Detect(Detect):
+    """
+    v10 Detection head from https://arxiv.org/pdf/2405.14458.
+    Args:
+        nc (int): Number of classes.
+        ch (tuple): Tuple of channel sizes.
+    Attributes:
+        max_det (int): Maximum number of detections.
+    Methods:
+        __init__(self, nc=80, ch=()): Initializes the v10Detect object.
+        forward(self, x): Performs forward pass of the v10Detect module.
+        bias_init(self): Initializes biases of the Detect module.
+    """
+    end2end = True
+    def __init__(self, nc=80, ch=()):
+        """Initializes the v10Detect object with the specified number of classes and input channels."""
+        super().__init__(nc, ch)
+        c3 = max(ch[0], min(self.nc, 100))  # channels
+        # Light cls head
+        self.cv3 = nn.ModuleList(
+            nn.Sequential(
+                nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)),
+                nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)),
+                nn.Conv2d(c3, self.nc, 1),
+            )
+            for x in ch
+        )
+        self.one2one_cv3 = copy.deepcopy(self.cv3)

ultralytics/nn/modules/transformer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """Transformer modules."""
 import math
@@ -174,18 +174,20 @@ class MLPBlock(nn.Module):
 class MLP(nn.Module):
     """Implements a simple multi-layer perceptron (also called FFN)."""
-    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act=nn.ReLU, sigmoid=False):
         """Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
         self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+        self.sigmoid = sigmoid
+        self.act = act()
     def forward(self, x):
         """Forward pass for the entire MLP."""
         for i, layer in enumerate(self.layers):
-            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
-        return x
+            x = getattr(self, "act", nn.ReLU())(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x.sigmoid() if getattr(self, "sigmoid", False) else x
 class LayerNorm2d(nn.Module):
@@ -350,7 +352,6 @@ class DeformableTransformerDecoderLayer(nn.Module):
     def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
         """Perform the forward pass through the entire decoder layer."""
         # Self attention
         q = k = self.with_pos_embed(embed, query_pos)
         tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1), attn_mask=attn_mask)[

ultralytics/nn/modules/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """Module utils."""
 import copy
@@ -50,7 +50,6 @@ def multi_scale_deformable_attn_pytorch(
     https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
     """
     bs, _, num_heads, embed_dims = value.shape
     _, num_queries, num_heads, num_levels, num_points, _ = sampling_locations.shape
     value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)

ultralytics 8.1.29__py3-none-any.whl → 8.3.62__py3-none-any.whl

ultralytics 8.1.29py3-none-any.whl → 8.3.62py3-none-any.whl