PyPI - ultralytics-opencv-headless - Versions diffs - 8.4.4__py3-none-any.whl → 8.4.8__py3-none-any.whl - Mend

ultralytics-opencv-headless 8.4.4py3-none-any.whl → 8.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

tests/test_cli.py +10 -3
tests/test_exports.py +64 -43
tests/test_python.py +40 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +5 -4
ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
ultralytics/cfg/default.yaml +2 -1
ultralytics/data/augment.py +8 -0
ultralytics/data/converter.py +32 -9
ultralytics/data/utils.py +2 -2
ultralytics/engine/exporter.py +10 -6
ultralytics/engine/predictor.py +5 -0
ultralytics/engine/results.py +8 -3
ultralytics/engine/trainer.py +6 -4
ultralytics/engine/tuner.py +2 -2
ultralytics/engine/validator.py +5 -0
ultralytics/models/sam/predict.py +2 -2
ultralytics/models/yolo/classify/train.py +14 -1
ultralytics/models/yolo/detect/train.py +8 -4
ultralytics/models/yolo/pose/train.py +2 -1
ultralytics/models/yolo/world/train_world.py +21 -1
ultralytics/models/yolo/yoloe/train.py +1 -2
ultralytics/nn/autobackend.py +1 -1
ultralytics/nn/modules/head.py +13 -2
ultralytics/nn/tasks.py +18 -0
ultralytics/solutions/security_alarm.py +1 -1
ultralytics/trackers/byte_tracker.py +7 -7
ultralytics/utils/benchmarks.py +3 -9
ultralytics/utils/callbacks/platform.py +2 -1
ultralytics/utils/callbacks/wb.py +6 -1
ultralytics/utils/dist.py +1 -0
ultralytics/utils/loss.py +18 -9
ultralytics/utils/patches.py +42 -0
ultralytics/utils/tal.py +15 -5
ultralytics/utils/torch_utils.py +1 -1
{ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/METADATA +4 -5
{ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/RECORD +41 -40
{ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/WHEEL +1 -1
{ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/entry_points.txt +0 -0
{ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
{ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/top_level.txt +0 -0

ultralytics/engine/results.py CHANGED Viewed

@@ -803,12 +803,17 @@ class Results(SimpleClass, DataExportMixin):
                     "y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(),
                 }
             if self.keypoints is not None:
-                x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1)  # torch Tensor
+                kpt = self.keypoints[i]
+                if kpt.has_visible:
+                    x, y, visible = kpt.data[0].cpu().unbind(dim=1)
+                else:
+                    x, y = kpt.data[0].cpu().unbind(dim=1)
                 result["keypoints"] = {
-                    "x": (x / w).numpy().round(decimals).tolist(),  # decimals named argument required
+                    "x": (x / w).numpy().round(decimals).tolist(),
                     "y": (y / h).numpy().round(decimals).tolist(),
-                    "visible": visible.numpy().round(decimals).tolist(),
                 }
+                if kpt.has_visible:
+                    result["keypoints"]["visible"] = visible.numpy().round(decimals).tolist()
             results.append(result)
         return results

ultralytics/engine/trainer.py CHANGED Viewed

@@ -948,7 +948,7 @@ class BaseTrainer:
             )
             nc = self.data.get("nc", 10)  # number of classes
             lr_fit = round(0.002 * 5 / (4 + nc), 6)  # lr0 fit equation to 6 decimal places
-            name, lr, momentum = ("MuSGD", 0.01 if iterations > 10000 else lr_fit, 0.9)
+            name, lr, momentum = ("MuSGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
             self.args.warmup_bias_lr = 0.0  # no higher than 0.01 for Adam
         use_muon = name == "MuSGD"
@@ -981,16 +981,18 @@ class BaseTrainer:
                 "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
             )
+        num_params = [len(g[0]), len(g[1]), len(g[2])]  # number of param groups
         g[2] = {"params": g[2], **optim_args, "param_group": "bias"}
         g[0] = {"params": g[0], **optim_args, "weight_decay": decay, "param_group": "weight"}
         g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0, "param_group": "bn"}
-        muon, sgd = (0.1, 1.0) if iterations > 10000 else (0.5, 0.5)  # scale factor for MuSGD
+        muon, sgd = (0.2, 1.0)
         if use_muon:
+            num_params[0] = len(g[3])  # update number of params
             g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True, "param_group": "muon"}
             import re
             # higher lr for certain parameters in MuSGD when funetuning
-            pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
+            pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg")
             g_ = []  # new param groups
             for x in g:
                 p = x.pop("params")
@@ -1002,6 +1004,6 @@ class BaseTrainer:
         LOGGER.info(
             f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
-            f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
+            f"{num_params[1]} weight(decay=0.0), {num_params[0]} weight(decay={decay}), {num_params[2]} bias(decay=0.0)"
         )
         return optimizer

ultralytics/engine/tuner.py CHANGED Viewed

@@ -26,7 +26,7 @@ from datetime import datetime
 import numpy as np
 import torch
-from ultralytics.cfg import get_cfg, get_save_dir
+from ultralytics.cfg import CFG_INT_KEYS, get_cfg, get_save_dir
 from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
 from ultralytics.utils.checks import check_requirements
 from ultralytics.utils.patches import torch_load
@@ -448,7 +448,7 @@ class Tuner:
                 f"{self.prefix}Best fitness model is {best_save_dir}"
             )
             LOGGER.info("\n" + header)
-            data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
+            data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
             YAML.save(
                 self.tune_dir / "best_hyperparameters.yaml",
                 data=data,

ultralytics/engine/validator.py CHANGED Viewed

@@ -156,6 +156,11 @@ class BaseValidator:
             if str(self.args.model).endswith(".yaml") and model is None:
                 LOGGER.warning("validating an untrained model YAML will result in 0 mAP.")
             callbacks.add_integration_callbacks(self)
+            if hasattr(model, "end2end"):
+                if self.args.end2end is not None:
+                    model.end2end = self.args.end2end
+                if model.end2end:
+                    model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
             model = AutoBackend(
                 model=model or self.args.model,
                 device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),

ultralytics/models/sam/predict.py CHANGED Viewed

@@ -2619,6 +2619,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
+        names = []
         if len(curr_obj_ids) == 0:
             pred_masks, pred_boxes = None, torch.zeros((0, 7), device=self.device)
         else:
@@ -2656,9 +2657,8 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
                         background_value=0,
                     ).squeeze(1)
                 ) > 0
+                names = self.model.names or dict(enumerate(str(i) for i in range(pred_boxes[:, 6].int().max())))
-        # names = getattr(self.model, "names", [str(i) for i in range(pred_scores.shape[0])])
-        names = dict(enumerate(str(i) for i in range(pred_boxes.shape[0])))
         results = []
         for masks, boxes, orig_img, img_path in zip([pred_masks], [pred_boxes], orig_imgs, self.batch[0]):
             results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=boxes))

ultralytics/models/yolo/classify/train.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ultralytics.data import ClassificationDataset, build_dataloader
 from ultralytics.engine.trainer import BaseTrainer
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import ClassificationModel
-from ultralytics.utils import DEFAULT_CFG, RANK
+from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
 from ultralytics.utils.plotting import plot_images
 from ultralytics.utils.torch_utils import is_parallel, torch_distributed_zero_first
@@ -138,6 +138,19 @@ class ClassificationTrainer(BaseTrainer):
         with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
             dataset = self.build_dataset(dataset_path, mode)
+        # Filter out samples with class indices >= nc (prevents CUDA assertion errors)
+        nc = self.data.get("nc", 0)
+        dataset_nc = len(dataset.base.classes)
+        if nc and dataset_nc > nc:
+            extra_classes = dataset.base.classes[nc:]
+            original_count = len(dataset.samples)
+            dataset.samples = [s for s in dataset.samples if s[1] < nc]
+            skipped = original_count - len(dataset.samples)
+            LOGGER.warning(
+                f"{mode} split has {dataset_nc} classes but model expects {nc}. "
+                f"Skipping {skipped} samples from extra classes: {extra_classes}"
+            )
         loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
         # Attach inference transforms
         if mode != "train":

ultralytics/models/yolo/detect/train.py CHANGED Viewed

@@ -92,7 +92,7 @@ class DetectionTrainer(BaseTrainer):
         with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
             dataset = self.build_dataset(dataset_path, mode, batch_size)
         shuffle = mode == "train"
-        if getattr(dataset, "rect", False) and shuffle:
+        if getattr(dataset, "rect", False) and shuffle and not np.all(dataset.batch_shapes == dataset.batch_shapes[0]):
             LOGGER.warning("'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
             shuffle = False
         return build_dataloader(
@@ -117,11 +117,13 @@ class DetectionTrainer(BaseTrainer):
             if isinstance(v, torch.Tensor):
                 batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
         batch["img"] = batch["img"].float() / 255
-        multi_scale = self.args.multi_scale
-        if random.random() < multi_scale:
+        if self.args.multi_scale > 0.0:
             imgs = batch["img"]
             sz = (
-                random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
+                random.randrange(
+                    int(self.args.imgsz * (1.0 - self.args.multi_scale)),
+                    int(self.args.imgsz * (1.0 + self.args.multi_scale) + self.stride),
+                )
                 // self.stride
                 * self.stride
             )  # size
@@ -143,6 +145,8 @@ class DetectionTrainer(BaseTrainer):
         self.model.nc = self.data["nc"]  # attach number of classes to model
         self.model.names = self.data["names"]  # attach class names to model
         self.model.args = self.args  # attach hyperparameters to model
+        if getattr(self.model, "end2end"):
+            self.model.set_head_attr(max_det=self.args.max_det)
         # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
     def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True):

ultralytics/models/yolo/pose/train.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Any
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import PoseModel
 from ultralytics.utils import DEFAULT_CFG
+from ultralytics.utils.torch_utils import unwrap_model
 class PoseTrainer(yolo.detect.DetectionTrainer):
@@ -91,7 +92,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
     def get_validator(self):
         """Return an instance of the PoseValidator class for validation."""
         self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
-        if getattr(self.model.model[-1], "flow_model", None) is not None:
+        if getattr(unwrap_model(self.model).model[-1], "flow_model", None) is not None:
             self.loss_names += ("rle_loss",)
         return yolo.pose.PoseValidator(
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks

ultralytics/models/yolo/world/train_world.py CHANGED Viewed

@@ -1,11 +1,14 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 from pathlib import Path
 from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
 from ultralytics.data.utils import check_det_dataset
 from ultralytics.models.yolo.world import WorldTrainer
 from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
+from ultralytics.utils.checks import check_file
 from ultralytics.utils.torch_utils import unwrap_model
@@ -100,6 +103,23 @@ class WorldTrainerFromScratch(WorldTrainer):
         self.set_text_embeddings(datasets, batch)  # cache text embeddings to accelerate training
         return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
+    @staticmethod
+    def check_data_config(data: dict | str | Path) -> dict:
+        """Check and load the data configuration from a YAML file or dictionary.
+        Args:
+            data (dict | str | Path): Data configuration as a dictionary or path to a YAML file.
+        Returns:
+            (dict): Data configuration dictionary loaded from YAML file or passed directly.
+        """
+        # If string, load from YAML file
+        if not isinstance(data, dict):
+            from ultralytics.utils import YAML
+            return YAML.load(check_file(data))
+        return data
     def get_dataset(self):
         """Get train and validation paths from data dictionary.
@@ -114,7 +134,7 @@ class WorldTrainerFromScratch(WorldTrainer):
             AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
         """
         final_data = {}
-        data_yaml = self.args.data
+        self.args.data = data_yaml = self.check_data_config(self.args.data)
         assert data_yaml.get("train", False), "train dataset not found"  # object365.yaml
         assert data_yaml.get("val", False), "validation dataset not found"  # lvis.yaml
         data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}

ultralytics/models/yolo/yoloe/train.py CHANGED Viewed

@@ -196,7 +196,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
         Returns:
             (dict): Dictionary mapping text samples to their embeddings.
         """
-        model = "mobileclip:blt"
+        model = unwrap_model(self.model).text_model
         cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
         if cache_path.exists():
             LOGGER.info(f"Reading existed cache from '{cache_path}'")
@@ -204,7 +204,6 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
             if sorted(txt_map.keys()) == sorted(texts):
                 return txt_map
         LOGGER.info(f"Caching text embeddings to '{cache_path}'")
-        assert self.model is not None
         txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)

ultralytics/nn/autobackend.py CHANGED Viewed

@@ -648,7 +648,7 @@ class AutoBackend(nn.Module):
             for k, v in metadata.items():
                 if k in {"stride", "batch", "channels"}:
                     metadata[k] = int(v)
-                elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args"} and isinstance(v, str):
+                elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args", "end2end"} and isinstance(v, str):
                     metadata[k] = ast.literal_eval(v)
             stride = metadata["stride"]
             task = metadata["task"]

ultralytics/nn/modules/head.py CHANGED Viewed

@@ -69,6 +69,7 @@ class Detect(nn.Module):
     export = False  # export mode
     format = None  # export format
     max_det = 300  # max_det
+    agnostic_nms = False
     shape = None
     anchors = torch.empty(0)  # init
     strides = torch.empty(0)  # init
@@ -125,7 +126,12 @@ class Detect(nn.Module):
     @property
     def end2end(self):
         """Checks if the model has one2one for v5/v5/v8/v9/11 backward compatibility."""
-        return hasattr(self, "one2one")
+        return getattr(self, "_end2end", True) and hasattr(self, "one2one")
+    @end2end.setter
+    def end2end(self, value):
+        """Override the end-to-end detection mode."""
+        self._end2end = value
     def forward_head(
         self, x: list[torch.Tensor], box_head: torch.nn.Module = None, cls_head: torch.nn.Module = None
@@ -230,6 +236,11 @@ class Detect(nn.Module):
         # Use max_det directly during export for TensorRT compatibility (requires k to be constant),
         # otherwise use min(max_det, anchors) for safety with small inputs during Python inference
         k = max_det if self.export else min(max_det, anchors)
+        if self.agnostic_nms:
+            scores, labels = scores.max(dim=-1, keepdim=True)
+            scores, indices = scores.topk(k, dim=1)
+            labels = labels.gather(1, indices)
+            return scores, labels, indices
         ori_index = scores.max(dim=-1)[0].topk(k)[1].unsqueeze(-1)
         scores = scores.gather(dim=1, index=ori_index.repeat(1, 1, nc))
         scores, index = scores.flatten(1).topk(k)
@@ -1098,7 +1109,7 @@ class YOLOEDetect(Detect):
         boxes, scores, index = [], [], []
         bs = x[0].shape[0]
         cv2 = self.cv2 if not self.end2end else self.one2one_cv2
-        cv3 = self.cv3 if not self.end2end else self.one2one_cv2
+        cv3 = self.cv3 if not self.end2end else self.one2one_cv3
         for i in range(self.nl):
             cls_feat = cv3[i](x[i])
             loc_feat = cv2[i](x[i])

ultralytics/nn/tasks.py CHANGED Viewed

@@ -425,6 +425,24 @@ class DetectionModel(BaseModel):
         """Return whether the model uses end-to-end NMS-free detection."""
         return getattr(self.model[-1], "end2end", False)
+    @end2end.setter
+    def end2end(self, value):
+        """Override the end-to-end detection mode."""
+        self.set_head_attr(end2end=value)
+    def set_head_attr(self, **kwargs):
+        """Set attributes of the model head (last layer).
+        Args:
+            **kwargs: Arbitrary keyword arguments representing attributes to set.
+        """
+        head = self.model[-1]
+        for k, v in kwargs.items():
+            if not hasattr(head, k):
+                LOGGER.warning(f"Head has no attribute '{k}'.")
+                continue
+            setattr(head, k, v)
     def _predict_augment(self, x):
         """Perform augmentations on input image x and return augmented inference and train outputs.

ultralytics/solutions/security_alarm.py CHANGED Viewed

@@ -62,7 +62,7 @@ class SecurityAlarm(BaseSolution):
         """
         import smtplib
-        self.server = smtplib.SMTP("smtp.gmail.com: 587")
+        self.server = smtplib.SMTP("smtp.gmail.com", 587)
         self.server.starttls()
         self.server.login(from_email, password)
         self.to_email = to_email

ultralytics/trackers/byte_tracker.py CHANGED Viewed

@@ -270,9 +270,9 @@ class BYTETracker:
             args (Namespace): Command-line arguments containing tracking parameters.
             frame_rate (int): Frame rate of the video sequence.
         """
-        self.tracked_stracks = []  # type: list[STrack]
-        self.lost_stracks = []  # type: list[STrack]
-        self.removed_stracks = []  # type: list[STrack]
+        self.tracked_stracks: list[STrack] = []
+        self.lost_stracks: list[STrack] = []
+        self.removed_stracks: list[STrack] = []
         self.frame_id = 0
         self.args = args
@@ -304,7 +304,7 @@ class BYTETracker:
         detections = self.init_track(results, feats_keep)
         # Add newly detected tracklets to tracked_stracks
         unconfirmed = []
-        tracked_stracks = []  # type: list[STrack]
+        tracked_stracks: list[STrack] = []
         for track in self.tracked_stracks:
             if not track.is_activated:
                 unconfirmed.append(track)
@@ -423,9 +423,9 @@ class BYTETracker:
     def reset(self):
         """Reset the tracker by clearing all tracked, lost, and removed tracks and reinitializing the Kalman filter."""
-        self.tracked_stracks = []  # type: list[STrack]
-        self.lost_stracks = []  # type: list[STrack]
-        self.removed_stracks = []  # type: list[STrack]
+        self.tracked_stracks: list[STrack] = []
+        self.lost_stracks: list[STrack] = []
+        self.removed_stracks: list[STrack] = []
         self.frame_id = 0
         self.kalman_filter = self.get_kalmanfilter()
         self.reset_id()

ultralytics/utils/benchmarks.py CHANGED Viewed

@@ -36,6 +36,7 @@ import platform
 import re
 import shutil
 import time
+from copy import deepcopy
 from pathlib import Path
 import numpy as np
@@ -101,7 +102,6 @@ def benchmark(
     device = select_device(device, verbose=False)
     if isinstance(model, (str, Path)):
         model = YOLO(model)
-    is_end2end = getattr(model.model.model[-1], "end2end", False)
     data = data or TASK2DATA[model.task]  # task to dataset, i.e. coco8.yaml for task=detect
     key = TASK2METRIC[model.task]  # task to metric, i.e. metrics/mAP50-95(B) for task=detect
@@ -135,14 +135,12 @@ def benchmark(
             if format == "paddle":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
                 assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
-                assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
                 assert (LINUX and not IS_JETSON) or MACOS, "Windows and Jetson Paddle exports not supported yet"
             if format == "mnn":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
             if format == "ncnn":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
             if format == "imx":
-                assert not is_end2end
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
                 assert model.task in {"detect", "classify", "pose"}, (
                     "IMX export is only supported for detection, classification and pose estimation tasks"
@@ -150,25 +148,21 @@ def benchmark(
                 assert "C2f" in model.__str__(), "IMX only supported for YOLOv8n and YOLO11n"
             if format == "rknn":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
-                assert not is_end2end, "End-to-end models not supported by RKNN yet"
                 assert LINUX, "RKNN only supported on Linux"
                 assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices"
             if format == "executorch":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 ExecuTorch exports not supported yet"
-                assert not is_end2end, "End-to-end models not supported by ExecuTorch yet"
             if "cpu" in device.type:
                 assert cpu, "inference not supported on CPU"
             if "cuda" in device.type:
                 assert gpu, "inference not supported on GPU"
-            if format == "ncnn":
-                assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
             # Export
             if format == "-":
                 filename = model.pt_path or model.ckpt_path or model.model_name
-                exported_model = model  # PyTorch format
+                exported_model = deepcopy(model)  # PyTorch format
             else:
-                filename = model.export(
+                filename = deepcopy(model).export(
                     imgsz=imgsz, format=format, half=half, int8=int8, data=data, device=device, verbose=False, **kwargs
                 )
                 exported_model = YOLO(filename, task=model.task)

ultralytics/utils/callbacks/platform.py CHANGED Viewed

@@ -89,7 +89,8 @@ def resolve_platform_uri(uri, hard=True):
         raise ValueError(f"Invalid platform URI: {uri}. Use ul://user/datasets/name or ul://user/project/model")
     try:
-        r = requests.head(url, headers=headers, allow_redirects=False, timeout=30)
+        timeout = 3600 if "/datasets/" in url else 90  # NDJSON generation can be slow for large datasets
+        r = requests.head(url, headers=headers, allow_redirects=False, timeout=timeout)
         # Handle redirect responses (301, 302, 303, 307, 308)
         if 300 <= r.status_code < 400 and "location" in r.headers:

ultralytics/utils/callbacks/wb.py CHANGED Viewed

@@ -128,10 +128,15 @@ def _log_plots(plots, step):
 def on_pretrain_routine_start(trainer):
     """Initialize and start wandb project if module is present."""
     if not wb.run:
+        from datetime import datetime
+        name = str(trainer.args.name).replace("/", "-").replace(" ", "_")
         wb.init(
             project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
-            name=str(trainer.args.name).replace("/", "-"),
+            name=name,
             config=vars(trainer.args),
+            id=f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",  # add unique id
+            dir=str(trainer.save_dir),
         )

ultralytics/utils/dist.py CHANGED Viewed

@@ -49,6 +49,7 @@ def generate_ddp_file(trainer):
     content = f"""
 # Ultralytics Multi-GPU training temp file (should be automatically deleted after use)
+from pathlib import Path, PosixPath  # For model arguments stored as Path instead of str
 overrides = {vars(trainer.args)}
 if __name__ == "__main__":

ultralytics/utils/loss.py CHANGED Viewed

@@ -512,9 +512,19 @@ class v8SegmentationLoss(v8DetectionLoss):
             )
             if pred_semseg is not None:
                 sem_masks = batch["sem_masks"].to(self.device)  # NxHxW
-                mask_zero = sem_masks == 0  # NxHxW
                 sem_masks = F.one_hot(sem_masks.long(), num_classes=self.nc).permute(0, 3, 1, 2).float()  # NxCxHxW
-                sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
+                if self.overlap:
+                    mask_zero = masks == 0  # NxHxW
+                    sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
+                else:
+                    batch_idx = batch["batch_idx"].view(-1)  # [total_instances]
+                    for i in range(batch_size):
+                        instance_mask_i = masks[batch_idx == i]  # [num_instances_i, H, W]
+                        if len(instance_mask_i) == 0:
+                            continue
+                        sem_masks[i, :, instance_mask_i.sum(dim=0) == 0] = 0
                 loss[4] = self.bcedice_loss(pred_semseg, sem_masks)
                 loss[4] *= self.hyp.box  # seg gain
@@ -798,7 +808,7 @@ class PoseLoss26(v8PoseLoss):
         loss[0], loss[3], loss[4] = det_loss[0], det_loss[1], det_loss[2]
         batch_size = pred_kpts.shape[0]
-        imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=pred_kpts.dtype)  # image size (h,w)
+        imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=pred_kpts.dtype) * self.stride[0]
         pred_kpts = pred_kpts.view(batch_size, -1, *self.kpt_shape)  # (b, h*w, 17, 3)
@@ -992,7 +1002,7 @@ class v8OBBLoss(v8DetectionLoss):
         batch_size = pred_angle.shape[0]  # batch size, number of masks, mask height, mask width
         dtype = pred_scores.dtype
-        imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=dtype)  # image size (h,w)
+        imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]
         # targets
         try:
@@ -1095,7 +1105,7 @@ class v8OBBLoss(v8DetectionLoss):
         pred_theta = pred_bboxes[..., 4]
         target_theta = target_bboxes[..., 4]
-        log_ar = torch.log(w_gt / h_gt)
+        log_ar = torch.log((w_gt + 1e-9) / (h_gt + 1e-9))
         scale_weight = torch.exp(-(log_ar**2) / (lambda_val**2))
         delta_theta = pred_theta - target_theta
@@ -1164,9 +1174,9 @@ class E2ELoss:
 class TVPDetectLoss:
     """Criterion class for computing training losses for text-visual prompt detection."""
-    def __init__(self, model, tal_topk=10):
+    def __init__(self, model, tal_topk=10, tal_topk2: int | None = None):
         """Initialize TVPDetectLoss with task-prompt and visual-prompt criteria using the provided model."""
-        self.vp_criterion = v8DetectionLoss(model, tal_topk)
+        self.vp_criterion = v8DetectionLoss(model, tal_topk, tal_topk2)
         # NOTE: store following info as it's changeable in __call__
         self.hyp = self.vp_criterion.hyp
         self.ori_nc = self.vp_criterion.nc
@@ -1196,8 +1206,7 @@ class TVPDetectLoss:
     def _get_vp_features(self, preds: dict[str, torch.Tensor]) -> list[torch.Tensor]:
         """Extract visual-prompt features from the model output."""
-        # NOTE: remove empty placeholder
-        scores = preds["scores"][:, self.ori_nc :, :]
+        scores = preds["scores"]
         vnc = scores.shape[1]
         self.vp_criterion.nc = vnc

ultralytics/utils/patches.py CHANGED Viewed

@@ -40,9 +40,51 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
         return None
     else:
         im = cv2.imdecode(file_bytes, flags)
+        # Fallback for formats OpenCV imdecode may not support (AVIF, HEIC)
+        if im is None and filename.lower().endswith((".avif", ".heic")):
+            im = _imread_pil(filename, flags)
         return im[..., None] if im is not None and im.ndim == 2 else im  # Always ensure 3 dimensions
+_pil_plugins_registered = False
+def _imread_pil(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
+    """Read an image using PIL as fallback for formats not supported by OpenCV.
+    Args:
+        filename (str): Path to the file to read.
+        flags (int, optional): OpenCV imread flags (used to determine grayscale conversion).
+    Returns:
+        (np.ndarray | None): The read image array in BGR format, or None if reading fails.
+    """
+    global _pil_plugins_registered
+    try:
+        from PIL import Image
+        # Register HEIF/AVIF plugins once
+        if not _pil_plugins_registered:
+            try:
+                import pillow_heif
+                pillow_heif.register_heif_opener()
+            except ImportError:
+                pass
+            try:
+                import pillow_avif  # noqa: F401
+            except ImportError:
+                pass
+            _pil_plugins_registered = True
+        with Image.open(filename) as img:
+            if flags == cv2.IMREAD_GRAYSCALE:
+                return np.asarray(img.convert("L"))
+            return cv2.cvtColor(np.asarray(img.convert("RGB")), cv2.COLOR_RGB2BGR)
+    except Exception:
+        return None
 def imwrite(filename: str, img: np.ndarray, params: list[int] | None = None) -> bool:
     """Write an image to a file with multilanguage filename support.

ultralytics-opencv-headless 8.4.4__py3-none-any.whl → 8.4.8__py3-none-any.whl

ultralytics-opencv-headless 8.4.4py3-none-any.whl → 8.4.8py3-none-any.whl