PyPI - ultralytics - Versions diffs - 8.3.142__py3-none-any.whl → 8.3.144__py3-none-any.whl - Mend

ultralytics 8.3.142py3-none-any.whl → 8.3.144py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +12 -12
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +39 -39
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +187 -157
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +6 -3
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +16 -8
ultralytics/solutions/object_cropper.py +12 -5
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +215 -85
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +42 -28
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
ultralytics-8.3.144.dist-info/RECORD +272 -0
ultralytics-8.3.142.dist-info/RECORD +0 -272
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0

ultralytics/utils/ops.py CHANGED Viewed

@@ -4,6 +4,7 @@ import contextlib
 import math
 import re
 import time
+from typing import Optional
 import cv2
 import numpy as np
@@ -16,27 +17,35 @@ from ultralytics.utils.metrics import batch_probiou
 class Profile(contextlib.ContextDecorator):
     """
-    YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'.
+    Ultralytics Profile class for timing code execution.
+    Use as a decorator with @Profile() or as a context manager with 'with Profile():'. Provides accurate timing
+    measurements with CUDA synchronization support for GPU operations.
     Attributes:
-        t (float): Accumulated time.
+        t (float): Accumulated time in seconds.
         device (torch.device): Device used for model inference.
-        cuda (bool): Whether CUDA is being used.
+        cuda (bool): Whether CUDA is being used for timing synchronization.
     Examples:
-        >>> from ultralytics.utils.ops import Profile
+        Use as a context manager to time code execution
         >>> with Profile(device=device) as dt:
         ...     pass  # slow operation here
         >>> print(dt)  # prints "Elapsed time is 9.5367431640625e-07 s"
+        Use as a decorator to time function execution
+        >>> @Profile()
+        ... def slow_function():
+        ...     time.sleep(0.1)
     """
-    def __init__(self, t=0.0, device: torch.device = None):
+    def __init__(self, t: float = 0.0, device: Optional[torch.device] = None):
         """
         Initialize the Profile class.
         Args:
-            t (float): Initial time.
-            device (torch.device): Device used for model inference.
+            t (float): Initial accumulated time in seconds.
+            device (torch.device, optional): Device used for model inference to enable CUDA synchronization.
         """
         self.t = t
         self.device = device
@@ -53,30 +62,33 @@ class Profile(contextlib.ContextDecorator):
         self.t += self.dt  # accumulate dt
     def __str__(self):
-        """Returns a human-readable string representing the accumulated elapsed time in the profiler."""
+        """Return a human-readable string representing the accumulated elapsed time."""
         return f"Elapsed time is {self.t} s"
     def time(self):
-        """Get current time."""
+        """Get current time with CUDA synchronization if applicable."""
         if self.cuda:
             torch.cuda.synchronize(self.device)
         return time.perf_counter()
-def segment2box(segment, width=640, height=640):
+def segment2box(segment, width: int = 640, height: int = 640):
     """
-    Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).
+    Convert segment coordinates to bounding box coordinates.
+    Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates.
+    Applies inside-image constraint and clips coordinates when necessary.
     Args:
-        segment (torch.Tensor): The segment label.
-        width (int): The width of the image.
-        height (int): The height of the image.
+        segment (torch.Tensor): Segment coordinates in format (N, 2) where N is number of points.
+        width (int): Width of the image in pixels.
+        height (int): Height of the image in pixels.
     Returns:
-        (np.ndarray): The minimum and maximum x and y values of the segment.
+        (np.ndarray): Bounding box coordinates in xyxy format [x1, y1, x2, y2].
     """
     x, y = segment.T  # segment xy
-    # any 3 out of 4 sides are outside the image, clip coordinates first, https://github.com/ultralytics/ultralytics/pull/18294
+    # Clip coordinates if 3 out of 4 sides are outside the image
     if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3:
         x = x.clip(0, width)
         y = y.clip(0, height)
@@ -90,22 +102,23 @@ def segment2box(segment, width=640, height=640):
     )  # xyxy
-def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = True, xywh: bool = False):
     """
-    Rescale bounding boxes from img1_shape to img0_shape.
+    Rescale bounding boxes from one image shape to another.
+    Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes.
+    Supports both xyxy and xywh box formats.
     Args:
-        img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
-        boxes (torch.Tensor): The bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2).
-        img0_shape (tuple): The shape of the target image, in the format of (height, width).
-        ratio_pad (tuple): A tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
-            calculated based on the size difference between the two images.
-        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
-            rescaling.
-        xywh (bool): The box format is xywh or not.
+        img1_shape (tuple): Shape of the source image (height, width).
+        boxes (torch.Tensor): Bounding boxes to rescale in format (N, 4).
+        img0_shape (tuple): Shape of the target image (height, width).
+        ratio_pad (tuple, optional): Tuple of (ratio, pad) for scaling. If None, calculated from image shapes.
+        padding (bool): Whether boxes are based on YOLO-style augmented images with padding.
+        xywh (bool): Whether box format is xywh (True) or xyxy (False).
     Returns:
-        (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2).
+        (torch.Tensor): Rescaled bounding boxes in the same format as input.
     """
     if ratio_pad is None:  # calculate from img0_shape
         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
@@ -127,9 +140,9 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xyw
     return clip_boxes(boxes, img0_shape)
-def make_divisible(x, divisor):
+def make_divisible(x: int, divisor):
     """
-    Returns the nearest number that is divisible by the given divisor.
+    Return the nearest number that is divisible by the given divisor.
     Args:
         x (int): The number to make divisible.
@@ -143,16 +156,15 @@ def make_divisible(x, divisor):
     return math.ceil(x / divisor) * divisor
-def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
+def nms_rotated(boxes, scores, threshold: float = 0.45, use_triu: bool = True):
     """
-    NMS for oriented bounding boxes using probiou and fast-nms.
+    Perform NMS on oriented bounding boxes using probiou and fast-nms.
     Args:
-        boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
-        scores (torch.Tensor): Confidence scores, shape (N,).
-        threshold (float): IoU threshold.
-        use_triu (bool): Whether to use `torch.triu` operator. It'd be useful for disable it
-            when exporting obb models to some formats that do not support `torch.triu`.
+        boxes (torch.Tensor): Rotated bounding boxes with shape (N, 5) in xywhr format.
+        scores (torch.Tensor): Confidence scores with shape (N,).
+        threshold (float): IoU threshold for NMS.
+        use_triu (bool): Whether to use torch.triu operator for upper triangular matrix operations.
     Returns:
         (torch.Tensor): Indices of boxes to keep after NMS.
@@ -162,7 +174,6 @@ def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
     ious = batch_probiou(boxes, boxes)
     if use_triu:
         ious = ious.triu_(diagonal=1)
-        # pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1)
         # NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
         pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1)
     else:
@@ -180,54 +191,51 @@ def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
 def non_max_suppression(
     prediction,
-    conf_thres=0.25,
-    iou_thres=0.45,
+    conf_thres: float = 0.25,
+    iou_thres: float = 0.45,
     classes=None,
-    agnostic=False,
-    multi_label=False,
+    agnostic: bool = False,
+    multi_label: bool = False,
     labels=(),
-    max_det=300,
-    nc=0,  # number of classes (optional)
-    max_time_img=0.05,
-    max_nms=30000,
-    max_wh=7680,
-    in_place=True,
-    rotated=False,
-    end2end=False,
-    return_idxs=False,
+    max_det: int = 300,
+    nc: int = 0,  # number of classes (optional)
+    max_time_img: float = 0.05,
+    max_nms: int = 30000,
+    max_wh: int = 7680,
+    in_place: bool = True,
+    rotated: bool = False,
+    end2end: bool = False,
+    return_idxs: bool = False,
 ):
     """
-    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
+    Perform non-maximum suppression (NMS) on prediction results.
+    Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple
+    detection formats including standard boxes, rotated boxes, and masks.
     Args:
-        prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
-            containing the predicted boxes, classes, and masks. The tensor should be in the format
-            output by a model, such as YOLO.
-        conf_thres (float): The confidence threshold below which boxes will be filtered out.
-            Valid values are between 0.0 and 1.0.
-        iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
-            Valid values are between 0.0 and 1.0.
-        classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
-        agnostic (bool): If True, the model is agnostic to the number of classes, and all
-            classes will be considered as one.
-        multi_label (bool): If True, each box may have multiple labels.
-        labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
-            list contains the apriori labels for a given image. The list should be in the format
-            output by a dataloader, with each label being a tuple of (class_index, x, y, w, h).
-        max_det (int): The maximum number of boxes to keep after NMS.
-        nc (int): The number of classes output by the model. Any indices after this will be considered masks.
-        max_time_img (float): The maximum time (seconds) for processing one image.
-        max_nms (int): The maximum number of boxes into torchvision.ops.nms().
-        max_wh (int): The maximum box width and height in pixels.
-        in_place (bool): If True, the input prediction tensor will be modified in place.
-        rotated (bool): If Oriented Bounding Boxes (OBB) are being passed for NMS.
-        end2end (bool): If the model doesn't require NMS.
-        return_idxs (bool): Return the indices of the detections that were kept.
+        prediction (torch.Tensor): Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes)
+            containing boxes, classes, and optional masks.
+        conf_thres (float): Confidence threshold for filtering detections. Valid values are between 0.0 and 1.0.
+        iou_thres (float): IoU threshold for NMS filtering. Valid values are between 0.0 and 1.0.
+        classes (List[int], optional): List of class indices to consider. If None, all classes are considered.
+        agnostic (bool): Whether to perform class-agnostic NMS.
+        multi_label (bool): Whether each box can have multiple labels.
+        labels (List[List[Union[int, float, torch.Tensor]]]): A priori labels for each image.
+        max_det (int): Maximum number of detections to keep per image.
+        nc (int): Number of classes. Indices after this are considered masks.
+        max_time_img (float): Maximum time in seconds for processing one image.
+        max_nms (int): Maximum number of boxes for torchvision.ops.nms().
+        max_wh (int): Maximum box width and height in pixels.
+        in_place (bool): Whether to modify the input prediction tensor in place.
+        rotated (bool): Whether to handle Oriented Bounding Boxes (OBB).
+        end2end (bool): Whether the model is end-to-end and doesn't require NMS.
+        return_idxs (bool): Whether to return the indices of kept detections.
     Returns:
-        (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
-            shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
-            (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
+        output (List[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks)
+            containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
+        keepi (List[torch.Tensor]): Indices of kept detections if return_idxs=True.
     """
     import torchvision  # scope for faster 'import ultralytics'
@@ -322,18 +330,6 @@ def non_max_suppression(
             i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
         i = i[:max_det]  # limit detections
-        # # Experimental
-        # merge = False  # use merge-NMS
-        # if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
-        #     # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
-        #     from .metrics import box_iou
-        #     iou = box_iou(boxes[i], boxes) > iou_thres  # IoU matrix
-        #     weights = iou * scores[None]  # box weights
-        #     x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
-        #     redundant = True  # require redundant detections
-        #     if redundant:
-        #         i = i[iou.sum(1) > 1]  # require redundancy
         output[xi], keepi[xi] = x[i], xk[i].reshape(-1)
         if (time.time() - t) > time_limit:
             LOGGER.warning(f"NMS time limit {time_limit:.3f}s exceeded")
@@ -344,14 +340,14 @@ def non_max_suppression(
 def clip_boxes(boxes, shape):
     """
-    Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
+    Clip bounding boxes to image boundaries.
     Args:
-        boxes (torch.Tensor | numpy.ndarray): The bounding boxes to clip.
-        shape (tuple): The shape of the image.
+        boxes (torch.Tensor | numpy.ndarray): Bounding boxes to clip.
+        shape (tuple): Image shape as (height, width).
     Returns:
-        (torch.Tensor | numpy.ndarray): The clipped boxes.
+        (torch.Tensor | numpy.ndarray): Clipped bounding boxes.
     """
     if isinstance(boxes, torch.Tensor):  # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
         boxes[..., 0] = boxes[..., 0].clamp(0, shape[1])  # x1
@@ -366,11 +362,11 @@ def clip_boxes(boxes, shape):
 def clip_coords(coords, shape):
     """
-    Clip line coordinates to the image boundaries.
+    Clip line coordinates to image boundaries.
     Args:
-        coords (torch.Tensor | numpy.ndarray): A list of line coordinates.
-        shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
+        coords (torch.Tensor | numpy.ndarray): Line coordinates to clip.
+        shape (tuple): Image shape as (height, width).
     Returns:
         (torch.Tensor | numpy.ndarray): Clipped coordinates.
@@ -386,15 +382,18 @@ def clip_coords(coords, shape):
 def scale_image(masks, im0_shape, ratio_pad=None):
     """
-    Takes a mask, and resizes it to the original image size.
+    Rescale masks to original image size.
+    Takes resized and padded masks and rescales them back to the original image dimensions, removing any padding
+    that was applied during preprocessing.
     Args:
-        masks (np.ndarray): Resized and padded masks/images, [h, w, num]/[h, w, 3].
-        im0_shape (tuple): The original image shape.
-        ratio_pad (tuple): The ratio of the padding to the original image.
+        masks (np.ndarray): Resized and padded masks with shape [H, W, N] or [H, W, 3].
+        im0_shape (tuple): Original image shape as (height, width).
+        ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
     Returns:
-        masks (np.ndarray): The masks that are being returned with shape [h, w, num].
+        (np.ndarray): Rescaled masks with shape [H, W, N] matching original image dimensions.
     """
     # Rescale coordinates (xyxy) from im1_shape to im0_shape
     im1_shape = masks.shape
@@ -404,7 +403,6 @@ def scale_image(masks, im0_shape, ratio_pad=None):
         gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
         pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
     else:
-        # gain = ratio_pad[0][0]
         pad = ratio_pad[1]
     top, left = int(pad[1]), int(pad[0])  # y, x
     bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
@@ -425,10 +423,10 @@ def xyxy2xywh(x):
     top-left corner and (x2, y2) is the bottom-right corner.
     Args:
-        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
+        x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
     Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
+        (np.ndarray | torch.Tensor): Bounding box coordinates in (x, y, width, height) format.
     """
     assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
     y = empty_like(x)  # faster than clone/copy
@@ -445,10 +443,10 @@ def xywh2xyxy(x):
     top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.
     Args:
-        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
+        x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x, y, width, height) format.
     Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
+        (np.ndarray | torch.Tensor): Bounding box coordinates in (x1, y1, x2, y2) format.
     """
     assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
     y = empty_like(x)  # faster than clone/copy
@@ -459,16 +457,16 @@ def xywh2xyxy(x):
     return y
-def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
+def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
     """
     Convert normalized bounding box coordinates to pixel coordinates.
     Args:
-        x (np.ndarray | torch.Tensor): The bounding box coordinates.
-        w (int): Width of the image.
-        h (int): Height of the image.
-        padw (int): Padding width.
-        padh (int): Padding height.
+        x (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, w, h) format.
+        w (int): Image width in pixels.
+        h (int): Image height in pixels.
+        padw (int): Padding width in pixels.
+        padh (int): Padding height in pixels.
     Returns:
         y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
@@ -483,20 +481,20 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
     return y
-def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
+def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0):
     """
     Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
     width and height are normalized to image dimensions.
     Args:
-        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
-        w (int): The width of the image.
-        h (int): The height of the image.
-        clip (bool): If True, the boxes will be clipped to the image boundaries.
-        eps (float): The minimum value of the box's width and height.
+        x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
+        w (int): Image width in pixels.
+        h (int): Image height in pixels.
+        clip (bool): Whether to clip boxes to image boundaries.
+        eps (float): Minimum value for box width and height.
     Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format
+        (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, width, height) format.
     """
     if clip:
         x = clip_boxes(x, (h - eps, w - eps))
@@ -511,13 +509,13 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
 def xywh2ltwh(x):
     """
-    Convert the bounding box format from [x, y, w, h] to [x1, y1, w, h], where x1, y1 are the top-left coordinates.
+    Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.
     Args:
-        x (np.ndarray | torch.Tensor): The input tensor with the bounding box coordinates in the xywh format
+        x (np.ndarray | torch.Tensor): Input bounding box coordinates in xywh format.
     Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format
+        (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
     """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
@@ -527,13 +525,13 @@ def xywh2ltwh(x):
 def xyxy2ltwh(x):
     """
-    Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right.
+    Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.
     Args:
-        x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format
+        x (np.ndarray | torch.Tensor): Input bounding box coordinates in xyxy format.
     Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format.
+        (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
     """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 2] = x[..., 2] - x[..., 0]  # width
@@ -543,13 +541,13 @@ def xyxy2ltwh(x):
 def ltwh2xywh(x):
     """
-    Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
+    Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
     Args:
-        x (torch.Tensor): the input tensor
+        x (torch.Tensor): Input bounding box coordinates.
     Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in the xywh format.
+        (np.ndarray | torch.Tensor): Bounding box coordinates in xywh format.
     """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 0] = x[..., 0] + x[..., 2] / 2  # center x
@@ -559,14 +557,14 @@ def ltwh2xywh(x):
 def xyxyxyxy2xywhr(x):
     """
-    Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation]. Rotation values are
-    returned in radians from 0 to pi/2.
+    Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
     Args:
-        x (numpy.ndarray | torch.Tensor): Input box corners [xy1, xy2, xy3, xy4] of shape (n, 8).
+        x (numpy.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.
     Returns:
-        (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5).
+        (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5).
+            Rotation values are in radians from 0 to pi/2.
     """
     is_torch = isinstance(x, torch.Tensor)
     points = x.cpu().numpy() if is_torch else x
@@ -582,14 +580,14 @@ def xyxyxyxy2xywhr(x):
 def xywhr2xyxyxyxy(x):
     """
-    Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4]. Rotation values should
-    be in radians from 0 to pi/2.
+    Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
     Args:
-        x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format of shape (n, 5) or (b, n, 5).
+        x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5).
+            Rotation values should be in radians from 0 to pi/2.
     Returns:
-        (numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 4, 2) or (b, n, 4, 2).
+        (numpy.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
     """
     cos, sin, cat, stack = (
         (torch.cos, torch.sin, torch.cat, torch.stack)
@@ -616,10 +614,10 @@ def ltwh2xyxy(x):
     Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
     Args:
-        x (np.ndarray | torch.Tensor): The input image.
+        x (np.ndarray | torch.Tensor): Input bounding box coordinates.
     Returns:
-        (np.ndarray | torch.Tensor): The xyxy coordinates of the bounding boxes.
+        (np.ndarray | torch.Tensor): Bounding box coordinates in xyxy format.
     """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 2] = x[..., 2] + x[..., 0]  # width
@@ -632,10 +630,10 @@ def segments2boxes(segments):
     Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
     Args:
-        segments (list): List of segments, each segment is a list of points, each point is a list of x, y coordinates.
+        segments (list): List of segments where each segment is a list of points, each point is [x, y] coordinates.
     Returns:
-        (np.ndarray): The xywh coordinates of the bounding boxes.
+        (np.ndarray): Bounding box coordinates in xywh format.
     """
     boxes = []
     for s in segments:
@@ -644,16 +642,16 @@ def segments2boxes(segments):
     return xyxy2xywh(np.array(boxes))  # cls, xywh
-def resample_segments(segments, n=1000):
+def resample_segments(segments, n: int = 1000):
     """
-    Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each.
+    Resample segments to n points each using linear interpolation.
     Args:
-        segments (list): A list of (n,2) arrays, where n is the number of points in the segment.
-        n (int): Number of points to resample the segment to.
+        segments (list): List of (N, 2) arrays where N is the number of points in each segment.
+        n (int): Number of points to resample each segment to.
     Returns:
-        segments (list): The resampled segments.
+        (list): Resampled segments with n points each.
     """
     for i, s in enumerate(segments):
         if len(s) == n:
@@ -670,11 +668,11 @@ def resample_segments(segments, n=1000):
 def crop_mask(masks, boxes):
     """
-    Crop masks to bounding boxes.
+    Crop masks to bounding box regions.
     Args:
-        masks (torch.Tensor): [n, h, w] tensor of masks.
-        boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form.
+        masks (torch.Tensor): Masks with shape (N, H, W).
+        boxes (torch.Tensor): Bounding box coordinates with shape (N, 4) in relative point form.
     Returns:
         (torch.Tensor): Cropped masks.
@@ -687,16 +685,16 @@ def crop_mask(masks, boxes):
     return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
-def process_mask(protos, masks_in, bboxes, shape, upsample=False):
+def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False):
     """
-    Apply masks to bounding boxes using the output of the mask head.
+    Apply masks to bounding boxes using mask head output.
     Args:
-        protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
-        masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
-        bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
-        shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
-        upsample (bool): A flag to indicate whether to upsample the mask to the original image size.
+        protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
+        masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
+        bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
+        shape (tuple): Input image size as (height, width).
+        upsample (bool): Whether to upsample masks to original image size.
     Returns:
         (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
@@ -722,16 +720,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
 def process_mask_native(protos, masks_in, bboxes, shape):
     """
-    Apply masks to bounding boxes using the output of the mask head with native upsampling.
+    Apply masks to bounding boxes using mask head output with native upsampling.
     Args:
-        protos (torch.Tensor): [mask_dim, mask_h, mask_w].
-        masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
-        bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
-        shape (tuple): The size of the input image (h,w).
+        protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
+        masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
+        bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
+        shape (tuple): Input image size as (height, width).
     Returns:
-        (torch.Tensor): The returned masks with dimensions [h, w, n].
+        (torch.Tensor): Binary mask tensor with shape (H, W, N).
     """
     c, mh, mw = protos.shape  # CHW
     masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
@@ -740,15 +738,14 @@ def process_mask_native(protos, masks_in, bboxes, shape):
     return masks.gt_(0.0)
-def scale_masks(masks, shape, padding=True):
+def scale_masks(masks, shape, padding: bool = True):
     """
-    Rescale segment masks to shape.
+    Rescale segment masks to target shape.
     Args:
-        masks (torch.Tensor): (N, C, H, W).
-        shape (tuple): Height and width.
-        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
-            rescaling.
+        masks (torch.Tensor): Masks with shape (N, C, H, W).
+        shape (tuple): Target height and width as (height, width).
+        padding (bool): Whether masks are based on YOLO-style augmented images with padding.
     Returns:
         (torch.Tensor): Rescaled masks.
@@ -767,21 +764,20 @@ def scale_masks(masks, shape, padding=True):
     return masks
-def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
     """
-    Rescale segment coordinates (xy) from img1_shape to img0_shape.
+    Rescale segment coordinates from img1_shape to img0_shape.
     Args:
-        img1_shape (tuple): The shape of the image that the coords are from.
-        coords (torch.Tensor): The coords to be scaled of shape n,2.
-        img0_shape (tuple): The shape of the image that the segmentation is being applied to.
-        ratio_pad (tuple): The ratio of the image size to the padded image size.
-        normalize (bool): If True, the coordinates will be normalized to the range [0, 1].
-        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
-            rescaling.
+        img1_shape (tuple): Shape of the source image.
+        coords (torch.Tensor): Coordinates to scale with shape (N, 2).
+        img0_shape (tuple): Shape of the target image.
+        ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
+        normalize (bool): Whether to normalize coordinates to range [0, 1].
+        padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.
     Returns:
-        coords (torch.Tensor): The scaled coordinates.
+        (torch.Tensor): Scaled coordinates.
     """
     if ratio_pad is None:  # calculate from img0_shape
         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
@@ -804,13 +800,13 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False
 def regularize_rboxes(rboxes):
     """
-    Regularize rotated boxes in range [0, pi/2].
+    Regularize rotated bounding boxes to range [0, pi/2].
     Args:
-        rboxes (torch.Tensor): Input boxes of shape(N, 5) in xywhr format.
+        rboxes (torch.Tensor): Input rotated boxes with shape (N, 5) in xywhr format.
     Returns:
-        (torch.Tensor): The regularized boxes.
+        (torch.Tensor): Regularized rotated boxes.
     """
     x, y, w, h, t = rboxes.unbind(dim=-1)
     # Swap edge if t >= pi/2 while not being symmetrically opposite
@@ -821,16 +817,16 @@ def regularize_rboxes(rboxes):
     return torch.stack([x, y, w_, h_, t], dim=-1)  # regularized boxes
-def masks2segments(masks, strategy="all"):
+def masks2segments(masks, strategy: str = "all"):
     """
-    Convert masks to segments.
+    Convert masks to segments using contour detection.
     Args:
-        masks (torch.Tensor): The output of the model, which is a tensor of shape (batch_size, 160, 160).
-        strategy (str): 'all' or 'largest'.
+        masks (torch.Tensor): Binary masks with shape (batch_size, 160, 160).
+        strategy (str): Segmentation strategy, either 'all' or 'largest'.
     Returns:
-        (list): List of segment masks.
+        (list): List of segment masks as float32 arrays.
     """
     from ultralytics.data.converter import merge_multi_segment
@@ -854,20 +850,20 @@ def masks2segments(masks, strategy="all"):
 def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
     """
-    Convert a batch of FP32 torch tensors (0.0-1.0) to a NumPy uint8 array (0-255), changing from BCHW to BHWC layout.
+    Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.
     Args:
-        batch (torch.Tensor): Input tensor batch of shape (Batch, Channels, Height, Width) and dtype torch.float32.
+        batch (torch.Tensor): Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32.
     Returns:
-        (np.ndarray): Output NumPy array batch of shape (Batch, Height, Width, Channels) and dtype uint8.
+        (np.ndarray): Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8.
     """
     return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy()
 def clean_str(s):
     """
-    Cleans a string by replacing special characters with '_' character.
+    Clean a string by replacing special characters with '_' character.
     Args:
         s (str): A string needing special characters replaced.
@@ -879,7 +875,7 @@ def clean_str(s):
 def empty_like(x):
-    """Creates empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
+    """Create empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
     return (
         torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32)
     )

ultralytics 8.3.142__py3-none-any.whl → 8.3.144__py3-none-any.whl

ultralytics 8.3.142py3-none-any.whl → 8.3.144py3-none-any.whl