ultralytics-opencv-headless 8.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +23 -0
- tests/conftest.py +59 -0
- tests/test_cli.py +131 -0
- tests/test_cuda.py +216 -0
- tests/test_engine.py +157 -0
- tests/test_exports.py +309 -0
- tests/test_integrations.py +151 -0
- tests/test_python.py +777 -0
- tests/test_solutions.py +371 -0
- ultralytics/__init__.py +48 -0
- ultralytics/assets/bus.jpg +0 -0
- ultralytics/assets/zidane.jpg +0 -0
- ultralytics/cfg/__init__.py +1026 -0
- ultralytics/cfg/datasets/Argoverse.yaml +78 -0
- ultralytics/cfg/datasets/DOTAv1.5.yaml +37 -0
- ultralytics/cfg/datasets/DOTAv1.yaml +36 -0
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +68 -0
- ultralytics/cfg/datasets/HomeObjects-3K.yaml +32 -0
- ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
- ultralytics/cfg/datasets/Objects365.yaml +447 -0
- ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
- ultralytics/cfg/datasets/VOC.yaml +102 -0
- ultralytics/cfg/datasets/VisDrone.yaml +87 -0
- ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
- ultralytics/cfg/datasets/brain-tumor.yaml +22 -0
- ultralytics/cfg/datasets/carparts-seg.yaml +44 -0
- ultralytics/cfg/datasets/coco-pose.yaml +64 -0
- ultralytics/cfg/datasets/coco.yaml +118 -0
- ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
- ultralytics/cfg/datasets/coco128.yaml +101 -0
- ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
- ultralytics/cfg/datasets/coco8-multispectral.yaml +104 -0
- ultralytics/cfg/datasets/coco8-pose.yaml +47 -0
- ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
- ultralytics/cfg/datasets/coco8.yaml +101 -0
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/datasets/crack-seg.yaml +22 -0
- ultralytics/cfg/datasets/dog-pose.yaml +52 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +38 -0
- ultralytics/cfg/datasets/dota8.yaml +35 -0
- ultralytics/cfg/datasets/hand-keypoints.yaml +50 -0
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +1240 -0
- ultralytics/cfg/datasets/medical-pills.yaml +21 -0
- ultralytics/cfg/datasets/open-images-v7.yaml +663 -0
- ultralytics/cfg/datasets/package-seg.yaml +22 -0
- ultralytics/cfg/datasets/signature.yaml +21 -0
- ultralytics/cfg/datasets/tiger-pose.yaml +41 -0
- ultralytics/cfg/datasets/xView.yaml +155 -0
- ultralytics/cfg/default.yaml +130 -0
- ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +17 -0
- ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
- ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
- ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
- ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
- ultralytics/cfg/models/11/yolo11.yaml +50 -0
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
- ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
- ultralytics/cfg/models/12/yolo12-cls.yaml +32 -0
- ultralytics/cfg/models/12/yolo12-obb.yaml +48 -0
- ultralytics/cfg/models/12/yolo12-pose.yaml +49 -0
- ultralytics/cfg/models/12/yolo12-seg.yaml +48 -0
- ultralytics/cfg/models/12/yolo12.yaml +48 -0
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +53 -0
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +45 -0
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +45 -0
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +57 -0
- ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
- ultralytics/cfg/models/v3/yolov3-spp.yaml +49 -0
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +40 -0
- ultralytics/cfg/models/v3/yolov3.yaml +49 -0
- ultralytics/cfg/models/v5/yolov5-p6.yaml +62 -0
- ultralytics/cfg/models/v5/yolov5.yaml +51 -0
- ultralytics/cfg/models/v6/yolov6.yaml +56 -0
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +48 -0
- ultralytics/cfg/models/v8/yoloe-v8.yaml +48 -0
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +28 -0
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +28 -0
- ultralytics/cfg/models/v8/yolov8-cls.yaml +32 -0
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +58 -0
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +60 -0
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +50 -0
- ultralytics/cfg/models/v8/yolov8-obb.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8-p2.yaml +57 -0
- ultralytics/cfg/models/v8/yolov8-p6.yaml +59 -0
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +60 -0
- ultralytics/cfg/models/v8/yolov8-pose.yaml +50 -0
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +59 -0
- ultralytics/cfg/models/v8/yolov8-seg.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8-world.yaml +51 -0
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8.yaml +49 -0
- ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9c.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
- ultralytics/cfg/models/v9/yolov9e.yaml +64 -0
- ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
- ultralytics/cfg/trackers/botsort.yaml +21 -0
- ultralytics/cfg/trackers/bytetrack.yaml +12 -0
- ultralytics/data/__init__.py +26 -0
- ultralytics/data/annotator.py +66 -0
- ultralytics/data/augment.py +2801 -0
- ultralytics/data/base.py +435 -0
- ultralytics/data/build.py +437 -0
- ultralytics/data/converter.py +855 -0
- ultralytics/data/dataset.py +834 -0
- ultralytics/data/loaders.py +704 -0
- ultralytics/data/scripts/download_weights.sh +18 -0
- ultralytics/data/scripts/get_coco.sh +61 -0
- ultralytics/data/scripts/get_coco128.sh +18 -0
- ultralytics/data/scripts/get_imagenet.sh +52 -0
- ultralytics/data/split.py +138 -0
- ultralytics/data/split_dota.py +344 -0
- ultralytics/data/utils.py +798 -0
- ultralytics/engine/__init__.py +1 -0
- ultralytics/engine/exporter.py +1574 -0
- ultralytics/engine/model.py +1124 -0
- ultralytics/engine/predictor.py +508 -0
- ultralytics/engine/results.py +1522 -0
- ultralytics/engine/trainer.py +974 -0
- ultralytics/engine/tuner.py +448 -0
- ultralytics/engine/validator.py +384 -0
- ultralytics/hub/__init__.py +166 -0
- ultralytics/hub/auth.py +151 -0
- ultralytics/hub/google/__init__.py +174 -0
- ultralytics/hub/session.py +422 -0
- ultralytics/hub/utils.py +162 -0
- ultralytics/models/__init__.py +9 -0
- ultralytics/models/fastsam/__init__.py +7 -0
- ultralytics/models/fastsam/model.py +79 -0
- ultralytics/models/fastsam/predict.py +169 -0
- ultralytics/models/fastsam/utils.py +23 -0
- ultralytics/models/fastsam/val.py +38 -0
- ultralytics/models/nas/__init__.py +7 -0
- ultralytics/models/nas/model.py +98 -0
- ultralytics/models/nas/predict.py +56 -0
- ultralytics/models/nas/val.py +38 -0
- ultralytics/models/rtdetr/__init__.py +7 -0
- ultralytics/models/rtdetr/model.py +63 -0
- ultralytics/models/rtdetr/predict.py +88 -0
- ultralytics/models/rtdetr/train.py +89 -0
- ultralytics/models/rtdetr/val.py +216 -0
- ultralytics/models/sam/__init__.py +25 -0
- ultralytics/models/sam/amg.py +275 -0
- ultralytics/models/sam/build.py +365 -0
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +169 -0
- ultralytics/models/sam/modules/__init__.py +1 -0
- ultralytics/models/sam/modules/blocks.py +1067 -0
- ultralytics/models/sam/modules/decoders.py +495 -0
- ultralytics/models/sam/modules/encoders.py +794 -0
- ultralytics/models/sam/modules/memory_attention.py +298 -0
- ultralytics/models/sam/modules/sam.py +1160 -0
- ultralytics/models/sam/modules/tiny_encoder.py +979 -0
- ultralytics/models/sam/modules/transformer.py +344 -0
- ultralytics/models/sam/modules/utils.py +512 -0
- ultralytics/models/sam/predict.py +3940 -0
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +529 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +199 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +547 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/utils/__init__.py +1 -0
- ultralytics/models/utils/loss.py +466 -0
- ultralytics/models/utils/ops.py +315 -0
- ultralytics/models/yolo/__init__.py +7 -0
- ultralytics/models/yolo/classify/__init__.py +7 -0
- ultralytics/models/yolo/classify/predict.py +90 -0
- ultralytics/models/yolo/classify/train.py +202 -0
- ultralytics/models/yolo/classify/val.py +216 -0
- ultralytics/models/yolo/detect/__init__.py +7 -0
- ultralytics/models/yolo/detect/predict.py +122 -0
- ultralytics/models/yolo/detect/train.py +227 -0
- ultralytics/models/yolo/detect/val.py +507 -0
- ultralytics/models/yolo/model.py +430 -0
- ultralytics/models/yolo/obb/__init__.py +7 -0
- ultralytics/models/yolo/obb/predict.py +56 -0
- ultralytics/models/yolo/obb/train.py +79 -0
- ultralytics/models/yolo/obb/val.py +302 -0
- ultralytics/models/yolo/pose/__init__.py +7 -0
- ultralytics/models/yolo/pose/predict.py +65 -0
- ultralytics/models/yolo/pose/train.py +110 -0
- ultralytics/models/yolo/pose/val.py +248 -0
- ultralytics/models/yolo/segment/__init__.py +7 -0
- ultralytics/models/yolo/segment/predict.py +109 -0
- ultralytics/models/yolo/segment/train.py +69 -0
- ultralytics/models/yolo/segment/val.py +307 -0
- ultralytics/models/yolo/world/__init__.py +5 -0
- ultralytics/models/yolo/world/train.py +173 -0
- ultralytics/models/yolo/world/train_world.py +178 -0
- ultralytics/models/yolo/yoloe/__init__.py +22 -0
- ultralytics/models/yolo/yoloe/predict.py +162 -0
- ultralytics/models/yolo/yoloe/train.py +287 -0
- ultralytics/models/yolo/yoloe/train_seg.py +122 -0
- ultralytics/models/yolo/yoloe/val.py +206 -0
- ultralytics/nn/__init__.py +27 -0
- ultralytics/nn/autobackend.py +958 -0
- ultralytics/nn/modules/__init__.py +182 -0
- ultralytics/nn/modules/activation.py +54 -0
- ultralytics/nn/modules/block.py +1947 -0
- ultralytics/nn/modules/conv.py +669 -0
- ultralytics/nn/modules/head.py +1183 -0
- ultralytics/nn/modules/transformer.py +793 -0
- ultralytics/nn/modules/utils.py +159 -0
- ultralytics/nn/tasks.py +1768 -0
- ultralytics/nn/text_model.py +356 -0
- ultralytics/py.typed +1 -0
- ultralytics/solutions/__init__.py +41 -0
- ultralytics/solutions/ai_gym.py +108 -0
- ultralytics/solutions/analytics.py +264 -0
- ultralytics/solutions/config.py +107 -0
- ultralytics/solutions/distance_calculation.py +123 -0
- ultralytics/solutions/heatmap.py +125 -0
- ultralytics/solutions/instance_segmentation.py +86 -0
- ultralytics/solutions/object_blurrer.py +89 -0
- ultralytics/solutions/object_counter.py +190 -0
- ultralytics/solutions/object_cropper.py +87 -0
- ultralytics/solutions/parking_management.py +280 -0
- ultralytics/solutions/queue_management.py +93 -0
- ultralytics/solutions/region_counter.py +133 -0
- ultralytics/solutions/security_alarm.py +151 -0
- ultralytics/solutions/similarity_search.py +219 -0
- ultralytics/solutions/solutions.py +828 -0
- ultralytics/solutions/speed_estimation.py +114 -0
- ultralytics/solutions/streamlit_inference.py +260 -0
- ultralytics/solutions/templates/similarity-search.html +156 -0
- ultralytics/solutions/trackzone.py +88 -0
- ultralytics/solutions/vision_eye.py +67 -0
- ultralytics/trackers/__init__.py +7 -0
- ultralytics/trackers/basetrack.py +115 -0
- ultralytics/trackers/bot_sort.py +257 -0
- ultralytics/trackers/byte_tracker.py +469 -0
- ultralytics/trackers/track.py +116 -0
- ultralytics/trackers/utils/__init__.py +1 -0
- ultralytics/trackers/utils/gmc.py +339 -0
- ultralytics/trackers/utils/kalman_filter.py +482 -0
- ultralytics/trackers/utils/matching.py +154 -0
- ultralytics/utils/__init__.py +1450 -0
- ultralytics/utils/autobatch.py +118 -0
- ultralytics/utils/autodevice.py +205 -0
- ultralytics/utils/benchmarks.py +728 -0
- ultralytics/utils/callbacks/__init__.py +5 -0
- ultralytics/utils/callbacks/base.py +233 -0
- ultralytics/utils/callbacks/clearml.py +146 -0
- ultralytics/utils/callbacks/comet.py +625 -0
- ultralytics/utils/callbacks/dvc.py +197 -0
- ultralytics/utils/callbacks/hub.py +110 -0
- ultralytics/utils/callbacks/mlflow.py +134 -0
- ultralytics/utils/callbacks/neptune.py +126 -0
- ultralytics/utils/callbacks/platform.py +73 -0
- ultralytics/utils/callbacks/raytune.py +42 -0
- ultralytics/utils/callbacks/tensorboard.py +123 -0
- ultralytics/utils/callbacks/wb.py +188 -0
- ultralytics/utils/checks.py +998 -0
- ultralytics/utils/cpu.py +85 -0
- ultralytics/utils/dist.py +123 -0
- ultralytics/utils/downloads.py +529 -0
- ultralytics/utils/errors.py +35 -0
- ultralytics/utils/events.py +113 -0
- ultralytics/utils/export/__init__.py +7 -0
- ultralytics/utils/export/engine.py +237 -0
- ultralytics/utils/export/imx.py +315 -0
- ultralytics/utils/export/tensorflow.py +231 -0
- ultralytics/utils/files.py +219 -0
- ultralytics/utils/git.py +137 -0
- ultralytics/utils/instance.py +484 -0
- ultralytics/utils/logger.py +444 -0
- ultralytics/utils/loss.py +849 -0
- ultralytics/utils/metrics.py +1560 -0
- ultralytics/utils/nms.py +337 -0
- ultralytics/utils/ops.py +664 -0
- ultralytics/utils/patches.py +201 -0
- ultralytics/utils/plotting.py +1045 -0
- ultralytics/utils/tal.py +403 -0
- ultralytics/utils/torch_utils.py +984 -0
- ultralytics/utils/tqdm.py +440 -0
- ultralytics/utils/triton.py +112 -0
- ultralytics/utils/tuner.py +160 -0
- ultralytics_opencv_headless-8.3.242.dist-info/METADATA +374 -0
- ultralytics_opencv_headless-8.3.242.dist-info/RECORD +298 -0
- ultralytics_opencv_headless-8.3.242.dist-info/WHEEL +5 -0
- ultralytics_opencv_headless-8.3.242.dist-info/entry_points.txt +3 -0
- ultralytics_opencv_headless-8.3.242.dist-info/licenses/LICENSE +661 -0
- ultralytics_opencv_headless-8.3.242.dist-info/top_level.txt +1 -0
ultralytics/utils/ops.py
ADDED
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import math
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
import cv2
|
|
11
|
+
import numpy as np
|
|
12
|
+
import torch
|
|
13
|
+
import torch.nn.functional as F
|
|
14
|
+
|
|
15
|
+
from ultralytics.utils import NOT_MACOS14
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Profile(contextlib.ContextDecorator):
|
|
19
|
+
"""Ultralytics Profile class for timing code execution.
|
|
20
|
+
|
|
21
|
+
Use as a decorator with @Profile() or as a context manager with 'with Profile():'. Provides accurate timing
|
|
22
|
+
measurements with CUDA synchronization support for GPU operations.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
t (float): Accumulated time in seconds.
|
|
26
|
+
device (torch.device): Device used for model inference.
|
|
27
|
+
cuda (bool): Whether CUDA is being used for timing synchronization.
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
Use as a context manager to time code execution
|
|
31
|
+
>>> with Profile(device=device) as dt:
|
|
32
|
+
... pass # slow operation here
|
|
33
|
+
>>> print(dt) # prints "Elapsed time is 9.5367431640625e-07 s"
|
|
34
|
+
|
|
35
|
+
Use as a decorator to time function execution
|
|
36
|
+
>>> @Profile()
|
|
37
|
+
... def slow_function():
|
|
38
|
+
... time.sleep(0.1)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, t: float = 0.0, device: torch.device | None = None):
|
|
42
|
+
"""Initialize the Profile class.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
t (float): Initial accumulated time in seconds.
|
|
46
|
+
device (torch.device, optional): Device used for model inference to enable CUDA synchronization.
|
|
47
|
+
"""
|
|
48
|
+
self.t = t
|
|
49
|
+
self.device = device
|
|
50
|
+
self.cuda = bool(device and str(device).startswith("cuda"))
|
|
51
|
+
|
|
52
|
+
def __enter__(self):
|
|
53
|
+
"""Start timing."""
|
|
54
|
+
self.start = self.time()
|
|
55
|
+
return self
|
|
56
|
+
|
|
57
|
+
def __exit__(self, type, value, traceback):
|
|
58
|
+
"""Stop timing."""
|
|
59
|
+
self.dt = self.time() - self.start # delta-time
|
|
60
|
+
self.t += self.dt # accumulate dt
|
|
61
|
+
|
|
62
|
+
def __str__(self):
|
|
63
|
+
"""Return a human-readable string representing the accumulated elapsed time."""
|
|
64
|
+
return f"Elapsed time is {self.t} s"
|
|
65
|
+
|
|
66
|
+
def time(self):
|
|
67
|
+
"""Get current time with CUDA synchronization if applicable."""
|
|
68
|
+
if self.cuda:
|
|
69
|
+
torch.cuda.synchronize(self.device)
|
|
70
|
+
return time.perf_counter()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def segment2box(segment, width: int = 640, height: int = 640):
|
|
74
|
+
"""Convert segment coordinates to bounding box coordinates.
|
|
75
|
+
|
|
76
|
+
Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates. Applies
|
|
77
|
+
inside-image constraint and clips coordinates when necessary.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
segment (torch.Tensor): Segment coordinates in format (N, 2) where N is number of points.
|
|
81
|
+
width (int): Width of the image in pixels.
|
|
82
|
+
height (int): Height of the image in pixels.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
(np.ndarray): Bounding box coordinates in xyxy format [x1, y1, x2, y2].
|
|
86
|
+
"""
|
|
87
|
+
x, y = segment.T # segment xy
|
|
88
|
+
# Clip coordinates if 3 out of 4 sides are outside the image
|
|
89
|
+
if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3:
|
|
90
|
+
x = x.clip(0, width)
|
|
91
|
+
y = y.clip(0, height)
|
|
92
|
+
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
|
|
93
|
+
x = x[inside]
|
|
94
|
+
y = y[inside]
|
|
95
|
+
return (
|
|
96
|
+
np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)
|
|
97
|
+
if any(x)
|
|
98
|
+
else np.zeros(4, dtype=segment.dtype)
|
|
99
|
+
) # xyxy
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = True, xywh: bool = False):
|
|
103
|
+
"""Rescale bounding boxes from one image shape to another.
|
|
104
|
+
|
|
105
|
+
Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes. Supports
|
|
106
|
+
both xyxy and xywh box formats.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
img1_shape (tuple): Shape of the source image (height, width).
|
|
110
|
+
boxes (torch.Tensor): Bounding boxes to rescale in format (N, 4).
|
|
111
|
+
img0_shape (tuple): Shape of the target image (height, width).
|
|
112
|
+
ratio_pad (tuple, optional): Tuple of (ratio, pad) for scaling. If None, calculated from image shapes.
|
|
113
|
+
padding (bool): Whether boxes are based on YOLO-style augmented images with padding.
|
|
114
|
+
xywh (bool): Whether box format is xywh (True) or xyxy (False).
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
(torch.Tensor): Rescaled bounding boxes in the same format as input.
|
|
118
|
+
"""
|
|
119
|
+
if ratio_pad is None: # calculate from img0_shape
|
|
120
|
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
|
121
|
+
pad_x = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1)
|
|
122
|
+
pad_y = round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)
|
|
123
|
+
else:
|
|
124
|
+
gain = ratio_pad[0][0]
|
|
125
|
+
pad_x, pad_y = ratio_pad[1]
|
|
126
|
+
|
|
127
|
+
if padding:
|
|
128
|
+
boxes[..., 0] -= pad_x # x padding
|
|
129
|
+
boxes[..., 1] -= pad_y # y padding
|
|
130
|
+
if not xywh:
|
|
131
|
+
boxes[..., 2] -= pad_x # x padding
|
|
132
|
+
boxes[..., 3] -= pad_y # y padding
|
|
133
|
+
boxes[..., :4] /= gain
|
|
134
|
+
return boxes if xywh else clip_boxes(boxes, img0_shape)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def make_divisible(x: int, divisor):
|
|
138
|
+
"""Return the nearest number that is divisible by the given divisor.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
x (int): The number to make divisible.
|
|
142
|
+
divisor (int | torch.Tensor): The divisor.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
(int): The nearest number divisible by the divisor.
|
|
146
|
+
"""
|
|
147
|
+
if isinstance(divisor, torch.Tensor):
|
|
148
|
+
divisor = int(divisor.max()) # to int
|
|
149
|
+
return math.ceil(x / divisor) * divisor
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def clip_boxes(boxes, shape):
|
|
153
|
+
"""Clip bounding boxes to image boundaries.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
|
|
157
|
+
shape (tuple): Image shape as HWC or HW (supports both).
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
(torch.Tensor | np.ndarray): Clipped bounding boxes.
|
|
161
|
+
"""
|
|
162
|
+
h, w = shape[:2] # supports both HWC or HW shapes
|
|
163
|
+
if isinstance(boxes, torch.Tensor): # faster individually
|
|
164
|
+
if NOT_MACOS14:
|
|
165
|
+
boxes[..., 0].clamp_(0, w) # x1
|
|
166
|
+
boxes[..., 1].clamp_(0, h) # y1
|
|
167
|
+
boxes[..., 2].clamp_(0, w) # x2
|
|
168
|
+
boxes[..., 3].clamp_(0, h) # y2
|
|
169
|
+
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
|
|
170
|
+
boxes[..., 0] = boxes[..., 0].clamp(0, w)
|
|
171
|
+
boxes[..., 1] = boxes[..., 1].clamp(0, h)
|
|
172
|
+
boxes[..., 2] = boxes[..., 2].clamp(0, w)
|
|
173
|
+
boxes[..., 3] = boxes[..., 3].clamp(0, h)
|
|
174
|
+
else: # np.array (faster grouped)
|
|
175
|
+
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, w) # x1, x2
|
|
176
|
+
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, h) # y1, y2
|
|
177
|
+
return boxes
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def clip_coords(coords, shape):
|
|
181
|
+
"""Clip line coordinates to image boundaries.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
coords (torch.Tensor | np.ndarray): Line coordinates to clip.
|
|
185
|
+
shape (tuple): Image shape as HWC or HW (supports both).
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
(torch.Tensor | np.ndarray): Clipped coordinates.
|
|
189
|
+
"""
|
|
190
|
+
h, w = shape[:2] # supports both HWC or HW shapes
|
|
191
|
+
if isinstance(coords, torch.Tensor):
|
|
192
|
+
if NOT_MACOS14:
|
|
193
|
+
coords[..., 0].clamp_(0, w) # x
|
|
194
|
+
coords[..., 1].clamp_(0, h) # y
|
|
195
|
+
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
|
|
196
|
+
coords[..., 0] = coords[..., 0].clamp(0, w)
|
|
197
|
+
coords[..., 1] = coords[..., 1].clamp(0, h)
|
|
198
|
+
else: # np.array
|
|
199
|
+
coords[..., 0] = coords[..., 0].clip(0, w) # x
|
|
200
|
+
coords[..., 1] = coords[..., 1].clip(0, h) # y
|
|
201
|
+
return coords
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def xyxy2xywh(x):
|
|
205
|
+
"""Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is
|
|
206
|
+
the top-left corner and (x2, y2) is the bottom-right corner.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
(np.ndarray | torch.Tensor): Bounding box coordinates in (x, y, width, height) format.
|
|
213
|
+
"""
|
|
214
|
+
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
|
215
|
+
y = empty_like(x) # faster than clone/copy
|
|
216
|
+
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
|
217
|
+
y[..., 0] = (x1 + x2) / 2 # x center
|
|
218
|
+
y[..., 1] = (y1 + y2) / 2 # y center
|
|
219
|
+
y[..., 2] = x2 - x1 # width
|
|
220
|
+
y[..., 3] = y2 - y1 # height
|
|
221
|
+
return y
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def xywh2xyxy(x):
|
|
225
|
+
"""Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is
|
|
226
|
+
the top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x, y, width, height) format.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
(np.ndarray | torch.Tensor): Bounding box coordinates in (x1, y1, x2, y2) format.
|
|
233
|
+
"""
|
|
234
|
+
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
|
235
|
+
y = empty_like(x) # faster than clone/copy
|
|
236
|
+
xy = x[..., :2] # centers
|
|
237
|
+
wh = x[..., 2:] / 2 # half width-height
|
|
238
|
+
y[..., :2] = xy - wh # top left xy
|
|
239
|
+
y[..., 2:] = xy + wh # bottom right xy
|
|
240
|
+
return y
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
|
|
244
|
+
"""Convert normalized bounding box coordinates to pixel coordinates.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
x (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, w, h) format.
|
|
248
|
+
w (int): Image width in pixels.
|
|
249
|
+
h (int): Image height in pixels.
|
|
250
|
+
padw (int): Padding width in pixels.
|
|
251
|
+
padh (int): Padding height in pixels.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is
|
|
255
|
+
the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
|
|
256
|
+
"""
|
|
257
|
+
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
|
258
|
+
y = empty_like(x) # faster than clone/copy
|
|
259
|
+
xc, yc, xw, xh = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
|
260
|
+
half_w, half_h = xw / 2, xh / 2
|
|
261
|
+
y[..., 0] = w * (xc - half_w) + padw # top left x
|
|
262
|
+
y[..., 1] = h * (yc - half_h) + padh # top left y
|
|
263
|
+
y[..., 2] = w * (xc + half_w) + padw # bottom right x
|
|
264
|
+
y[..., 3] = h * (yc + half_h) + padh # bottom right y
|
|
265
|
+
return y
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0):
|
|
269
|
+
"""Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
|
|
270
|
+
width and height are normalized to image dimensions.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
|
|
274
|
+
w (int): Image width in pixels.
|
|
275
|
+
h (int): Image height in pixels.
|
|
276
|
+
clip (bool): Whether to clip boxes to image boundaries.
|
|
277
|
+
eps (float): Minimum value for box width and height.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
(np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, width, height) format.
|
|
281
|
+
"""
|
|
282
|
+
if clip:
|
|
283
|
+
x = clip_boxes(x, (h - eps, w - eps))
|
|
284
|
+
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
|
|
285
|
+
y = empty_like(x) # faster than clone/copy
|
|
286
|
+
x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
|
|
287
|
+
y[..., 0] = ((x1 + x2) / 2) / w # x center
|
|
288
|
+
y[..., 1] = ((y1 + y2) / 2) / h # y center
|
|
289
|
+
y[..., 2] = (x2 - x1) / w # width
|
|
290
|
+
y[..., 3] = (y2 - y1) / h # height
|
|
291
|
+
return y
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def xywh2ltwh(x):
|
|
295
|
+
"""Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
x (np.ndarray | torch.Tensor): Input bounding box coordinates in xywh format.
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
(np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
|
|
302
|
+
"""
|
|
303
|
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
|
304
|
+
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
|
|
305
|
+
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
|
|
306
|
+
return y
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def xyxy2ltwh(x):
|
|
310
|
+
"""Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
x (np.ndarray | torch.Tensor): Input bounding box coordinates in xyxy format.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
(np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
|
|
317
|
+
"""
|
|
318
|
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
|
319
|
+
y[..., 2] = x[..., 2] - x[..., 0] # width
|
|
320
|
+
y[..., 3] = x[..., 3] - x[..., 1] # height
|
|
321
|
+
return y
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def ltwh2xywh(x):
|
|
325
|
+
"""Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
x (np.ndarray | torch.Tensor): Input bounding box coordinates.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
(np.ndarray | torch.Tensor): Bounding box coordinates in xywh format.
|
|
332
|
+
"""
|
|
333
|
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
|
334
|
+
y[..., 0] = x[..., 0] + x[..., 2] / 2 # center x
|
|
335
|
+
y[..., 1] = x[..., 1] + x[..., 3] / 2 # center y
|
|
336
|
+
return y
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def xyxyxyxy2xywhr(x):
|
|
340
|
+
"""Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
x (np.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
(np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5). Rotation
|
|
347
|
+
values are in radians from 0 to pi/2.
|
|
348
|
+
"""
|
|
349
|
+
is_torch = isinstance(x, torch.Tensor)
|
|
350
|
+
points = x.cpu().numpy() if is_torch else x
|
|
351
|
+
points = points.reshape(len(x), -1, 2)
|
|
352
|
+
rboxes = []
|
|
353
|
+
for pts in points:
|
|
354
|
+
# NOTE: Use cv2.minAreaRect to get accurate xywhr,
|
|
355
|
+
# especially some objects are cut off by augmentations in dataloader.
|
|
356
|
+
(cx, cy), (w, h), angle = cv2.minAreaRect(pts)
|
|
357
|
+
rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
|
|
358
|
+
return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def xywhr2xyxyxyxy(x):
|
|
362
|
+
"""Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
x (np.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5). Rotation
|
|
366
|
+
values should be in radians from 0 to pi/2.
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
(np.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
|
|
370
|
+
"""
|
|
371
|
+
cos, sin, cat, stack = (
|
|
372
|
+
(torch.cos, torch.sin, torch.cat, torch.stack)
|
|
373
|
+
if isinstance(x, torch.Tensor)
|
|
374
|
+
else (np.cos, np.sin, np.concatenate, np.stack)
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
ctr = x[..., :2]
|
|
378
|
+
w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
|
|
379
|
+
cos_value, sin_value = cos(angle), sin(angle)
|
|
380
|
+
vec1 = [w / 2 * cos_value, w / 2 * sin_value]
|
|
381
|
+
vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
|
|
382
|
+
vec1 = cat(vec1, -1)
|
|
383
|
+
vec2 = cat(vec2, -1)
|
|
384
|
+
pt1 = ctr + vec1 + vec2
|
|
385
|
+
pt2 = ctr + vec1 - vec2
|
|
386
|
+
pt3 = ctr - vec1 - vec2
|
|
387
|
+
pt4 = ctr - vec1 + vec2
|
|
388
|
+
return stack([pt1, pt2, pt3, pt4], -2)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def ltwh2xyxy(x):
|
|
392
|
+
"""Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
x (np.ndarray | torch.Tensor): Input bounding box coordinates.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
(np.ndarray | torch.Tensor): Bounding box coordinates in xyxy format.
|
|
399
|
+
"""
|
|
400
|
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
|
401
|
+
y[..., 2] = x[..., 2] + x[..., 0] # x2
|
|
402
|
+
y[..., 3] = x[..., 3] + x[..., 1] # y2
|
|
403
|
+
return y
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def segments2boxes(segments):
|
|
407
|
+
"""Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
segments (list): List of segments where each segment is a list of points, each point is [x, y] coordinates.
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
(np.ndarray): Bounding box coordinates in xywh format.
|
|
414
|
+
"""
|
|
415
|
+
boxes = []
|
|
416
|
+
for s in segments:
|
|
417
|
+
x, y = s.T # segment xy
|
|
418
|
+
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
|
|
419
|
+
return xyxy2xywh(np.array(boxes)) # cls, xywh
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def resample_segments(segments, n: int = 1000):
|
|
423
|
+
"""Resample segments to n points each using linear interpolation.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
segments (list): List of (N, 2) arrays where N is the number of points in each segment.
|
|
427
|
+
n (int): Number of points to resample each segment to.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
(list): Resampled segments with n points each.
|
|
431
|
+
"""
|
|
432
|
+
for i, s in enumerate(segments):
|
|
433
|
+
if len(s) == n:
|
|
434
|
+
continue
|
|
435
|
+
s = np.concatenate((s, s[0:1, :]), axis=0)
|
|
436
|
+
x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
|
|
437
|
+
xp = np.arange(len(s))
|
|
438
|
+
x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
|
|
439
|
+
segments[i] = (
|
|
440
|
+
np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
|
|
441
|
+
) # segment xy
|
|
442
|
+
return segments
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def crop_mask(masks: torch.Tensor, boxes: torch.Tensor) -> torch.Tensor:
|
|
446
|
+
"""Crop masks to bounding box regions.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
masks (torch.Tensor): Masks with shape (N, H, W).
|
|
450
|
+
boxes (torch.Tensor): Bounding box coordinates with shape (N, 4) in relative point form.
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
(torch.Tensor): Cropped masks.
|
|
454
|
+
"""
|
|
455
|
+
if boxes.device != masks.device:
|
|
456
|
+
boxes = boxes.to(masks.device)
|
|
457
|
+
n, h, w = masks.shape
|
|
458
|
+
if n < 50 and not masks.is_cuda: # faster for fewer masks (predict)
|
|
459
|
+
for i, (x1, y1, x2, y2) in enumerate(boxes.round().int()):
|
|
460
|
+
masks[i, :y1] = 0
|
|
461
|
+
masks[i, y2:] = 0
|
|
462
|
+
masks[i, :, :x1] = 0
|
|
463
|
+
masks[i, :, x2:] = 0
|
|
464
|
+
return masks
|
|
465
|
+
else: # faster for more masks (val)
|
|
466
|
+
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
|
|
467
|
+
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
|
|
468
|
+
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
|
|
469
|
+
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False):
|
|
473
|
+
"""Apply masks to bounding boxes using mask head output.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
|
|
477
|
+
masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
|
|
478
|
+
bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
|
|
479
|
+
shape (tuple): Input image size as (height, width).
|
|
480
|
+
upsample (bool): Whether to upsample masks to original image size.
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
|
|
484
|
+
are the height and width of the input image. The mask is applied to the bounding boxes.
|
|
485
|
+
"""
|
|
486
|
+
c, mh, mw = protos.shape # CHW
|
|
487
|
+
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # CHW
|
|
488
|
+
|
|
489
|
+
width_ratio = mw / shape[1]
|
|
490
|
+
height_ratio = mh / shape[0]
|
|
491
|
+
ratios = torch.tensor([[width_ratio, height_ratio, width_ratio, height_ratio]], device=bboxes.device)
|
|
492
|
+
|
|
493
|
+
masks = crop_mask(masks, boxes=bboxes * ratios) # CHW
|
|
494
|
+
if upsample:
|
|
495
|
+
masks = F.interpolate(masks[None], shape, mode="bilinear")[0] # CHW
|
|
496
|
+
return masks.gt_(0.0).byte()
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def process_mask_native(protos, masks_in, bboxes, shape):
|
|
500
|
+
"""Apply masks to bounding boxes using mask head output with native upsampling.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
|
|
504
|
+
masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
|
|
505
|
+
bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
|
|
506
|
+
shape (tuple): Input image size as (height, width).
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
(torch.Tensor): Binary mask tensor with shape (H, W, N).
|
|
510
|
+
"""
|
|
511
|
+
c, mh, mw = protos.shape # CHW
|
|
512
|
+
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
|
|
513
|
+
masks = scale_masks(masks[None], shape)[0] # CHW
|
|
514
|
+
masks = crop_mask(masks, bboxes) # CHW
|
|
515
|
+
return masks.gt_(0.0).byte()
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def scale_masks(
|
|
519
|
+
masks: torch.Tensor,
|
|
520
|
+
shape: tuple[int, int],
|
|
521
|
+
ratio_pad: tuple[tuple[int, int], tuple[int, int]] | None = None,
|
|
522
|
+
padding: bool = True,
|
|
523
|
+
) -> torch.Tensor:
|
|
524
|
+
"""Rescale segment masks to target shape.
|
|
525
|
+
|
|
526
|
+
Args:
|
|
527
|
+
masks (torch.Tensor): Masks with shape (N, C, H, W).
|
|
528
|
+
shape (tuple[int, int]): Target height and width as (height, width).
|
|
529
|
+
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
|
|
530
|
+
padding (bool): Whether masks are based on YOLO-style augmented images with padding.
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
(torch.Tensor): Rescaled masks.
|
|
534
|
+
"""
|
|
535
|
+
im1_h, im1_w = masks.shape[2:]
|
|
536
|
+
im0_h, im0_w = shape[:2]
|
|
537
|
+
if im1_h == im0_h and im1_w == im0_w:
|
|
538
|
+
return masks
|
|
539
|
+
|
|
540
|
+
if ratio_pad is None: # calculate from im0_shape
|
|
541
|
+
gain = min(im1_h / im0_h, im1_w / im0_w) # gain = old / new
|
|
542
|
+
pad_w, pad_h = (im1_w - im0_w * gain), (im1_h - im0_h * gain) # wh padding
|
|
543
|
+
if padding:
|
|
544
|
+
pad_w /= 2
|
|
545
|
+
pad_h /= 2
|
|
546
|
+
else:
|
|
547
|
+
pad_w, pad_h = ratio_pad[1]
|
|
548
|
+
top, left = (round(pad_h - 0.1), round(pad_w - 0.1)) if padding else (0, 0)
|
|
549
|
+
bottom = im1_h - round(pad_h + 0.1)
|
|
550
|
+
right = im1_w - round(pad_w + 0.1)
|
|
551
|
+
return F.interpolate(masks[..., top:bottom, left:right].float(), shape, mode="bilinear") # NCHW masks
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
|
|
555
|
+
"""Rescale segment coordinates from img1_shape to img0_shape.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
img1_shape (tuple): Source image shape as HWC or HW (supports both).
|
|
559
|
+
coords (torch.Tensor): Coordinates to scale with shape (N, 2).
|
|
560
|
+
img0_shape (tuple): Image 0 shape as HWC or HW (supports both).
|
|
561
|
+
ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
|
|
562
|
+
normalize (bool): Whether to normalize coordinates to range [0, 1].
|
|
563
|
+
padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
(torch.Tensor): Scaled coordinates.
|
|
567
|
+
"""
|
|
568
|
+
img0_h, img0_w = img0_shape[:2] # supports both HWC or HW shapes
|
|
569
|
+
if ratio_pad is None: # calculate from img0_shape
|
|
570
|
+
img1_h, img1_w = img1_shape[:2] # supports both HWC or HW shapes
|
|
571
|
+
gain = min(img1_h / img0_h, img1_w / img0_w) # gain = old / new
|
|
572
|
+
pad = (img1_w - img0_w * gain) / 2, (img1_h - img0_h * gain) / 2 # wh padding
|
|
573
|
+
else:
|
|
574
|
+
gain = ratio_pad[0][0]
|
|
575
|
+
pad = ratio_pad[1]
|
|
576
|
+
|
|
577
|
+
if padding:
|
|
578
|
+
coords[..., 0] -= pad[0] # x padding
|
|
579
|
+
coords[..., 1] -= pad[1] # y padding
|
|
580
|
+
coords[..., 0] /= gain
|
|
581
|
+
coords[..., 1] /= gain
|
|
582
|
+
coords = clip_coords(coords, img0_shape)
|
|
583
|
+
if normalize:
|
|
584
|
+
coords[..., 0] /= img0_w # width
|
|
585
|
+
coords[..., 1] /= img0_h # height
|
|
586
|
+
return coords
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def regularize_rboxes(rboxes):
|
|
590
|
+
"""Regularize rotated bounding boxes to range [0, pi/2].
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
rboxes (torch.Tensor): Input rotated boxes with shape (N, 5) in xywhr format.
|
|
594
|
+
|
|
595
|
+
Returns:
|
|
596
|
+
(torch.Tensor): Regularized rotated boxes.
|
|
597
|
+
"""
|
|
598
|
+
x, y, w, h, t = rboxes.unbind(dim=-1)
|
|
599
|
+
# Swap edge if t >= pi/2 while not being symmetrically opposite
|
|
600
|
+
swap = t % math.pi >= math.pi / 2
|
|
601
|
+
w_ = torch.where(swap, h, w)
|
|
602
|
+
h_ = torch.where(swap, w, h)
|
|
603
|
+
t = t % (math.pi / 2)
|
|
604
|
+
return torch.stack([x, y, w_, h_, t], dim=-1) # regularized boxes
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def masks2segments(masks: np.ndarray | torch.Tensor, strategy: str = "all") -> list[np.ndarray]:
|
|
608
|
+
"""Convert masks to segments using contour detection.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
masks (np.ndarray | torch.Tensor): Binary masks with shape (batch_size, 160, 160).
|
|
612
|
+
strategy (str): Segmentation strategy, either 'all' or 'largest'.
|
|
613
|
+
|
|
614
|
+
Returns:
|
|
615
|
+
(list): List of segment masks as float32 arrays.
|
|
616
|
+
"""
|
|
617
|
+
from ultralytics.data.converter import merge_multi_segment
|
|
618
|
+
|
|
619
|
+
masks = masks.astype("uint8") if isinstance(masks, np.ndarray) else masks.byte().cpu().numpy()
|
|
620
|
+
segments = []
|
|
621
|
+
for x in np.ascontiguousarray(masks):
|
|
622
|
+
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
|
|
623
|
+
if c:
|
|
624
|
+
if strategy == "all": # merge and concatenate all segments
|
|
625
|
+
c = (
|
|
626
|
+
np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c]))
|
|
627
|
+
if len(c) > 1
|
|
628
|
+
else c[0].reshape(-1, 2)
|
|
629
|
+
)
|
|
630
|
+
elif strategy == "largest": # select largest segment
|
|
631
|
+
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
|
|
632
|
+
else:
|
|
633
|
+
c = np.zeros((0, 2)) # no segments found
|
|
634
|
+
segments.append(c.astype("float32"))
|
|
635
|
+
return segments
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
|
|
639
|
+
"""Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
batch (torch.Tensor): Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32.
|
|
643
|
+
|
|
644
|
+
Returns:
|
|
645
|
+
(np.ndarray): Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8.
|
|
646
|
+
"""
|
|
647
|
+
return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).byte().cpu().numpy()
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def clean_str(s):
|
|
651
|
+
"""Clean a string by replacing special characters with '_' character.
|
|
652
|
+
|
|
653
|
+
Args:
|
|
654
|
+
s (str): A string needing special characters replaced.
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
(str): A string with special characters replaced by an underscore _.
|
|
658
|
+
"""
|
|
659
|
+
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨`><+]", repl="_", string=s)
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def empty_like(x):
|
|
663
|
+
"""Create empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
|
|
664
|
+
return torch.empty_like(x, dtype=x.dtype) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=x.dtype)
|