PyPI - ultralytics - Versions diffs - 8.3.98__py3-none-any.whl → 8.3.99__py3-none-any.whl - Mend

ultralytics 8.3.98py3-none-any.whl → 8.3.99py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

tests/test_python.py +56 -0
ultralytics/__init__.py +3 -2
ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +45 -0
ultralytics/cfg/models/v8/yoloe-v8.yaml +45 -0
ultralytics/data/augment.py +101 -5
ultralytics/data/dataset.py +165 -12
ultralytics/engine/exporter.py +4 -3
ultralytics/engine/trainer.py +16 -7
ultralytics/models/__init__.py +2 -2
ultralytics/models/yolo/__init__.py +3 -3
ultralytics/models/yolo/detect/val.py +6 -1
ultralytics/models/yolo/model.py +182 -3
ultralytics/models/yolo/segment/val.py +43 -16
ultralytics/models/yolo/yoloe/__init__.py +21 -0
ultralytics/models/yolo/yoloe/predict.py +170 -0
ultralytics/models/yolo/yoloe/train.py +355 -0
ultralytics/models/yolo/yoloe/train_seg.py +141 -0
ultralytics/models/yolo/yoloe/val.py +187 -0
ultralytics/nn/autobackend.py +3 -2
ultralytics/nn/modules/__init__.py +18 -1
ultralytics/nn/modules/block.py +17 -1
ultralytics/nn/modules/head.py +359 -22
ultralytics/nn/tasks.py +276 -10
ultralytics/nn/text_model.py +193 -0
ultralytics/utils/callbacks/comet.py +3 -6
ultralytics/utils/downloads.py +6 -2
ultralytics/utils/loss.py +67 -6
ultralytics/utils/plotting.py +1 -1
ultralytics/utils/tal.py +1 -1
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/METADATA +10 -10
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/RECORD +37 -27
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/WHEEL +0 -0
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/top_level.txt +0 -0

tests/test_python.py CHANGED Viewed

@@ -608,6 +608,62 @@ def test_yolo_world():
     )
+@pytest.mark.skipif(checks.IS_PYTHON_3_12 or not TORCH_1_9, reason="YOLOE with CLIP is not supported in Python 3.12")
+def test_yoloe():
+    """Test YOLOE models with MobileClip support."""
+    # Predict
+    # text-prompts
+    model = YOLO(WEIGHTS_DIR / "yoloe-11s-seg.pt")
+    names = ["person", "bus"]
+    model.set_classes(names, model.get_text_pe(names))
+    model(SOURCE, conf=0.01)
+    import numpy as np
+    from ultralytics import YOLOE
+    from ultralytics.models.yolo.yoloe import YOLOEVPSegPredictor
+    # visual-prompts
+    visuals = dict(
+        bboxes=np.array(
+            [[221.52, 405.8, 344.98, 857.54], [120, 425, 160, 445]],
+        ),
+        cls=np.array([0, 1]),
+    )
+    model.predict(
+        SOURCE,
+        visual_prompts=visuals,
+        predictor=YOLOEVPSegPredictor,
+    )
+    # Val
+    model = YOLOE(WEIGHTS_DIR / "yoloe-11s-seg.pt")
+    # text prompts
+    model.val(data="coco128-seg.yaml", imgsz=32)
+    # visual prompts
+    model.val(data="coco128-seg.yaml", load_vp=True, imgsz=32)
+    # Train, fine-tune
+    from ultralytics.models.yolo.yoloe import YOLOEPESegTrainer
+    model = YOLOE("yoloe-11s-seg.pt")
+    model.train(
+        data="coco128-seg.yaml",
+        epochs=1,
+        close_mosaic=1,
+        trainer=YOLOEPESegTrainer,
+        imgsz=32,
+    )
+    # prompt-free
+    # predict
+    model = YOLOE(WEIGHTS_DIR / "yoloe-11s-seg-pf.pt")
+    model.predict(SOURCE)
+    # val
+    model = YOLOE("yoloe-11s-seg.pt")  # or select yoloe-m/l-seg.pt for different sizes
+    model.val(data="coco128-seg.yaml", imgsz=32)
 def test_yolov10():
     """Test YOLOv10 model training, validation, and prediction functionality."""
     model = YOLO("yolov10n.yaml")

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-__version__ = "8.3.98"
+__version__ = "8.3.99"
 import os
@@ -8,7 +8,7 @@ import os
 if not os.environ.get("OMP_NUM_THREADS"):
     os.environ["OMP_NUM_THREADS"] = "1"  # default for reduced CPU utilization during training
-from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorld
+from ultralytics.models import NAS, RTDETR, SAM, YOLO, YOLOE, FastSAM, YOLOWorld
 from ultralytics.utils import ASSETS, SETTINGS
 from ultralytics.utils.checks import check_yolo as checks
 from ultralytics.utils.downloads import download
@@ -19,6 +19,7 @@ __all__ = (
     "ASSETS",
     "YOLO",
     "YOLOWorld",
+    "YOLOE",
     "NAS",
     "SAM",
     "FastSAM",

ultralytics/cfg/models/11/yoloe-11-seg.yaml ADDED Viewed

@@ -0,0 +1,48 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+# YOLO11-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolo11n-seg.yaml' will call yolo11-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.50, 0.25, 1024] # summary: 355 layers, 2876848 parameters, 2876832 gradients, 10.5 GFLOPs
+  s: [0.50, 0.50, 1024] # summary: 355 layers, 10113248 parameters, 10113232 gradients, 35.8 GFLOPs
+  m: [0.50, 1.00, 512] # summary: 445 layers, 22420896 parameters, 22420880 gradients, 123.9 GFLOPs
+  l: [1.00, 1.00, 512] # summary: 667 layers, 27678368 parameters, 27678352 gradients, 143.0 GFLOPs
+  x: [1.00, 1.50, 512] # summary: 667 layers, 62142656 parameters, 62142640 gradients, 320.2 GFLOPs
+# YOLO11n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 2, C3k2, [256, False, 0.25]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 2, C3k2, [512, False, 0.25]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 2, C3k2, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 2, C3k2, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 2, C2PSA, [1024]] # 10
+# YOLO11n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 2, C3k2, [512, False]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 2, C3k2, [256, False]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, YOLOESegment, [nc, 32, 256, 512, True]] # Detect(P3, P4, P5)

ultralytics/cfg/models/11/yoloe-11.yaml ADDED Viewed

@@ -0,0 +1,48 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
+  s: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
+  m: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
+  l: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
+  x: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs
+# YOLO11n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 2, C3k2, [256, False, 0.25]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 2, C3k2, [512, False, 0.25]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 2, C3k2, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 2, C3k2, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 2, C2PSA, [1024]] # 10
+# YOLO11n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 2, C3k2, [512, False]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 2, C3k2, [256, False]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, YOLOEDetect, [nc, 512, True]] # Detect(P3, P4, P5)

ultralytics/cfg/models/v8/yoloe-v8-seg.yaml ADDED Viewed

@@ -0,0 +1,45 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-world summary: 161 layers, 4204111 parameters, 4204095 gradients, 39.6 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-world summary: 161 layers, 13383496 parameters, 13383480 gradients, 71.5 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-world summary: 201 layers, 29065310 parameters, 29065294 gradients, 131.4 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-world summary: 241 layers, 47553970 parameters, 47553954 gradients, 225.6 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-world summary: 241 layers, 73690217 parameters, 73690201 gradients, 330.8 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, YOLOESegment, [nc, 32, 256, 512, True]] # Segment(P3, P4, P5)

ultralytics/cfg/models/v8/yoloe-v8.yaml ADDED Viewed

@@ -0,0 +1,45 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-worldv2 summary: 148 layers, 3695183 parameters, 3695167 gradients, 19.5 GFLOPS
+  s: [0.33, 0.50, 1024] # YOLOv8s-worldv2 summary: 148 layers, 12759880 parameters, 12759864 gradients, 51.0 GFLOPS
+  m: [0.67, 0.75, 768] # YOLOv8m-worldv2 summary: 188 layers, 28376158 parameters, 28376142 gradients, 110.5 GFLOPS
+  l: [1.00, 1.00, 512] # YOLOv8l-worldv2 summary: 228 layers, 46832050 parameters, 46832034 gradients, 204.5 GFLOPS
+  x: [1.00, 1.25, 512] # YOLOv8x-worldv2 summary: 228 layers, 72886377 parameters, 72886361 gradients, 309.3 GFLOPS
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, YOLOEDetect, [nc, 512, True]] # Detect(P3, P4, P5)

ultralytics/data/augment.py CHANGED Viewed

@@ -3,19 +3,20 @@
 import math
 import random
 from copy import deepcopy
-from typing import Tuple, Union
+from typing import List, Tuple, Union
 import cv2
 import numpy as np
 import torch
 from PIL import Image
+from torch.nn import functional as F
 from ultralytics.data.utils import polygons2masks, polygons2masks_overlap
 from ultralytics.utils import LOGGER, colorstr
 from ultralytics.utils.checks import check_version
 from ultralytics.utils.instance import Instances
 from ultralytics.utils.metrics import bbox_ioa
-from ultralytics.utils.ops import segment2box, xyxyxyxy2xywhr
+from ultralytics.utils.ops import segment2box, xywh2xyxy, xyxyxyxy2xywhr
 from ultralytics.utils.torch_utils import TORCHVISION_0_10, TORCHVISION_0_11, TORCHVISION_0_13
 DEFAULT_MEAN = (0.0, 0.0, 0.0)
@@ -2140,6 +2141,99 @@ class Format:
         return masks, instances, cls
+class LoadVisualPrompt:
+    """Creates visual prompts from bounding boxes or masks for model input."""
+    def __init__(self, scale_factor=1 / 8):
+        """
+        Initialize the LoadVisualPrompt with a scale factor.
+        Args:
+            scale_factor (float): Factor to scale the input image dimensions.
+        """
+        self.scale_factor = scale_factor
+    def make_mask(self, boxes, h, w):
+        """
+        Create binary masks from bounding boxes.
+        Args:
+            boxes (torch.Tensor): Bounding boxes in xyxy format, shape: (N, 4).
+            h (int): Height of the mask.
+            w (int): Width of the mask.
+        Returns:
+            (torch.Tensor): Binary masks with shape (N, h, w).
+        """
+        x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
+        r = torch.arange(w)[None, None, :]  # rows shape(1,1,w)
+        c = torch.arange(h)[None, :, None]  # cols shape(1,h,1)
+        return (r >= x1) * (r < x2) * (c >= y1) * (c < y2)
+    def __call__(self, labels):
+        """
+        Process labels to create visual prompts.
+        Args:
+            labels (dict): Dictionary containing image data and annotations.
+        Returns:
+            (dict): Updated labels with visual prompts added.
+        """
+        imgsz = labels["img"].shape[1:]
+        bboxes, masks = None, None
+        if "bboxes" in labels:
+            bboxes = labels["bboxes"]
+            bboxes = xywh2xyxy(bboxes) * torch.tensor(imgsz)[[1, 0, 1, 0]]  # denormalize boxes
+        cls = labels["cls"].squeeze(-1).to(torch.int)
+        visuals = self.get_visuals(cls, imgsz, bboxes=bboxes, masks=masks)
+        labels["visuals"] = visuals
+        return labels
+    def get_visuals(self, category, shape, bboxes=None, masks=None):
+        """
+        Generate visual masks based on bounding boxes or masks.
+        Args:
+            category (int | np.ndarray | torch.Tensor): The category labels for the objects.
+            shape (tuple): The shape of the image (height, width).
+            bboxes (np.ndarray | torch.Tensor, optional): Bounding boxes for the objects, xyxy format. Defaults to None.
+            masks (np.ndarray | torch.Tensor, optional): Masks for the objects. Defaults to None.
+        Returns:
+            (torch.Tensor): A tensor containing the visual masks for each category.
+        Raises:
+            ValueError: If neither bboxes nor masks are provided.
+        """
+        masksz = (int(shape[0] * self.scale_factor), int(shape[1] * self.scale_factor))
+        if bboxes is not None:
+            if isinstance(bboxes, np.ndarray):
+                bboxes = torch.from_numpy(bboxes)
+            bboxes *= self.scale_factor
+            masks = self.make_mask(bboxes, *masksz).float()
+        elif masks is not None:
+            if isinstance(masks, np.ndarray):
+                masks = torch.from_numpy(masks)  # (N, H, W)
+            masks = F.interpolate(masks.unsqueeze(1), masksz, mode="nearest").squeeze(1).float()
+        else:
+            raise ValueError("LoadVisualPrompt must have bboxes or masks in the label")
+        if not isinstance(category, torch.Tensor):
+            category = torch.tensor(category, dtype=torch.int)
+        cls_unique, inverse_indices = torch.unique(category, sorted=True, return_inverse=True)
+        # NOTE: `cls` indices from RandomLoadText should be continuous.
+        # if len(cls_unique):
+        #     assert len(cls_unique) == cls_unique[-1] + 1, (
+        #         f"Expected a continuous range of class indices, but got {cls_unique}"
+        #     )
+        visuals = torch.zeros(len(cls_unique), *masksz)
+        for idx, mask in zip(inverse_indices, masks):
+            visuals[idx] = torch.logical_or(visuals[idx], mask)
+        return visuals
 class RandomLoadText:
     """
     Randomly samples positive and negative texts and updates class indices accordingly.
@@ -2172,7 +2266,7 @@ class RandomLoadText:
         neg_samples: Tuple[int, int] = (80, 80),
         max_samples: int = 80,
         padding: bool = False,
-        padding_value: str = "",
+        padding_value: List[str] = [""],
     ) -> None:
         """
         Initializes the RandomLoadText class for randomly sampling positive and negative texts.
@@ -2246,7 +2340,8 @@ class RandomLoadText:
         neg_labels = random.sample(neg_labels, k=neg_samples)
         sampled_labels = pos_labels + neg_labels
-        random.shuffle(sampled_labels)
+        # Randomness
+        # random.shuffle(sampled_labels)
         label2ids = {label: i for i, label in enumerate(sampled_labels)}
         valid_idx = np.zeros(len(labels["instances"]), dtype=bool)
@@ -2271,8 +2366,9 @@ class RandomLoadText:
             valid_labels = len(pos_labels) + len(neg_labels)
             num_padding = self.max_samples - valid_labels
             if num_padding > 0:
-                texts += [self.padding_value] * num_padding
+                texts += random.choices(self.padding_value, k=num_padding)
+        assert len(texts) == self.max_samples
         labels["texts"] = texts
         return labels

ultralytics 8.3.98__py3-none-any.whl → 8.3.99__py3-none-any.whl

ultralytics 8.3.98py3-none-any.whl → 8.3.99py3-none-any.whl