ultralytics 8.3.89__py3-none-any.whl → 8.3.91__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/conftest.py +2 -2
- tests/test_cli.py +13 -11
- tests/test_cuda.py +10 -1
- tests/test_exports.py +2 -2
- tests/test_integrations.py +1 -5
- tests/test_python.py +16 -16
- tests/test_solutions.py +9 -9
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +3 -1
- ultralytics/cfg/models/11/yolo11-cls.yaml +5 -5
- ultralytics/cfg/models/11/yolo11-obb.yaml +5 -5
- ultralytics/cfg/models/11/yolo11-pose.yaml +5 -5
- ultralytics/cfg/models/11/yolo11-seg.yaml +5 -5
- ultralytics/cfg/models/11/yolo11.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-world.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8.yaml +5 -5
- ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9e-seg.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9e.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9m.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9t.yaml +1 -1
- ultralytics/data/annotator.py +9 -14
- ultralytics/data/base.py +118 -30
- ultralytics/data/build.py +63 -24
- ultralytics/data/converter.py +5 -5
- ultralytics/data/dataset.py +207 -53
- ultralytics/data/loaders.py +1 -0
- ultralytics/data/split_dota.py +39 -12
- ultralytics/data/utils.py +15 -19
- ultralytics/engine/exporter.py +24 -23
- ultralytics/engine/model.py +67 -88
- ultralytics/engine/predictor.py +106 -21
- ultralytics/engine/trainer.py +32 -23
- ultralytics/engine/tuner.py +21 -18
- ultralytics/engine/validator.py +75 -41
- ultralytics/hub/__init__.py +12 -13
- ultralytics/hub/auth.py +9 -12
- ultralytics/hub/session.py +76 -21
- ultralytics/hub/utils.py +19 -17
- ultralytics/models/fastsam/model.py +20 -11
- ultralytics/models/fastsam/predict.py +36 -16
- ultralytics/models/fastsam/utils.py +5 -5
- ultralytics/models/fastsam/val.py +6 -6
- ultralytics/models/nas/model.py +22 -11
- ultralytics/models/nas/predict.py +9 -4
- ultralytics/models/nas/val.py +5 -5
- ultralytics/models/rtdetr/model.py +20 -11
- ultralytics/models/rtdetr/predict.py +18 -15
- ultralytics/models/rtdetr/train.py +20 -16
- ultralytics/models/rtdetr/val.py +42 -6
- ultralytics/models/sam/__init__.py +1 -1
- ultralytics/models/sam/amg.py +50 -4
- ultralytics/models/sam/model.py +8 -14
- ultralytics/models/sam/modules/decoders.py +18 -21
- ultralytics/models/sam/modules/encoders.py +25 -46
- ultralytics/models/sam/modules/memory_attention.py +19 -15
- ultralytics/models/sam/modules/sam.py +18 -25
- ultralytics/models/sam/modules/tiny_encoder.py +19 -29
- ultralytics/models/sam/modules/transformer.py +35 -57
- ultralytics/models/sam/modules/utils.py +15 -15
- ultralytics/models/sam/predict.py +0 -3
- ultralytics/models/utils/loss.py +87 -36
- ultralytics/models/utils/ops.py +26 -31
- ultralytics/models/yolo/classify/predict.py +24 -3
- ultralytics/models/yolo/classify/train.py +77 -10
- ultralytics/models/yolo/classify/val.py +40 -15
- ultralytics/models/yolo/detect/predict.py +23 -10
- ultralytics/models/yolo/detect/train.py +85 -15
- ultralytics/models/yolo/detect/val.py +145 -21
- ultralytics/models/yolo/model.py +1 -2
- ultralytics/models/yolo/obb/predict.py +12 -4
- ultralytics/models/yolo/obb/train.py +7 -0
- ultralytics/models/yolo/obb/val.py +25 -7
- ultralytics/models/yolo/pose/predict.py +22 -6
- ultralytics/models/yolo/pose/train.py +17 -1
- ultralytics/models/yolo/pose/val.py +46 -21
- ultralytics/models/yolo/segment/predict.py +22 -8
- ultralytics/models/yolo/segment/train.py +6 -0
- ultralytics/models/yolo/segment/val.py +100 -14
- ultralytics/models/yolo/world/train.py +38 -8
- ultralytics/models/yolo/world/train_world.py +39 -10
- ultralytics/nn/autobackend.py +28 -14
- ultralytics/nn/modules/__init__.py +3 -0
- ultralytics/nn/modules/activation.py +12 -3
- ultralytics/nn/modules/block.py +587 -84
- ultralytics/nn/modules/conv.py +418 -54
- ultralytics/nn/modules/head.py +3 -4
- ultralytics/nn/modules/transformer.py +320 -34
- ultralytics/nn/modules/utils.py +17 -3
- ultralytics/nn/tasks.py +221 -69
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +4 -4
- ultralytics/solutions/heatmap.py +4 -4
- ultralytics/solutions/instance_segmentation.py +10 -4
- ultralytics/solutions/object_blurrer.py +2 -2
- ultralytics/solutions/object_counter.py +2 -2
- ultralytics/solutions/object_cropper.py +2 -2
- ultralytics/solutions/parking_management.py +9 -9
- ultralytics/solutions/queue_management.py +1 -1
- ultralytics/solutions/region_counter.py +2 -2
- ultralytics/solutions/security_alarm.py +7 -7
- ultralytics/solutions/solutions.py +7 -4
- ultralytics/solutions/speed_estimation.py +2 -2
- ultralytics/solutions/streamlit_inference.py +6 -6
- ultralytics/solutions/trackzone.py +9 -2
- ultralytics/solutions/vision_eye.py +4 -4
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/bot_sort.py +23 -22
- ultralytics/trackers/byte_tracker.py +4 -4
- ultralytics/trackers/track.py +2 -1
- ultralytics/trackers/utils/gmc.py +26 -27
- ultralytics/trackers/utils/kalman_filter.py +31 -29
- ultralytics/trackers/utils/matching.py +7 -7
- ultralytics/utils/__init__.py +32 -27
- ultralytics/utils/autobatch.py +5 -5
- ultralytics/utils/benchmarks.py +111 -18
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +11 -11
- ultralytics/utils/callbacks/comet.py +42 -24
- ultralytics/utils/callbacks/dvc.py +11 -10
- ultralytics/utils/callbacks/hub.py +8 -8
- ultralytics/utils/callbacks/mlflow.py +1 -1
- ultralytics/utils/callbacks/neptune.py +12 -10
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +6 -6
- ultralytics/utils/callbacks/wb.py +16 -16
- ultralytics/utils/checks.py +116 -35
- ultralytics/utils/dist.py +15 -2
- ultralytics/utils/downloads.py +13 -9
- ultralytics/utils/files.py +12 -13
- ultralytics/utils/instance.py +112 -45
- ultralytics/utils/loss.py +28 -33
- ultralytics/utils/metrics.py +246 -181
- ultralytics/utils/ops.py +61 -53
- ultralytics/utils/patches.py +8 -6
- ultralytics/utils/plotting.py +65 -45
- ultralytics/utils/tal.py +88 -57
- ultralytics/utils/torch_utils.py +181 -33
- ultralytics/utils/triton.py +13 -3
- ultralytics/utils/tuner.py +8 -16
- {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/METADATA +1 -1
- ultralytics-8.3.91.dist-info/RECORD +250 -0
- ultralytics-8.3.89.dist-info/RECORD +0 -250
- {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/LICENSE +0 -0
- {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/top_level.txt +0 -0
ultralytics/utils/ops.py
CHANGED
@@ -18,6 +18,11 @@ class Profile(contextlib.ContextDecorator):
|
|
18
18
|
"""
|
19
19
|
YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'.
|
20
20
|
|
21
|
+
Attributes:
|
22
|
+
t (float): Accumulated time.
|
23
|
+
device (torch.device): Device used for model inference.
|
24
|
+
cuda (bool): Whether CUDA is being used.
|
25
|
+
|
21
26
|
Examples:
|
22
27
|
>>> from ultralytics.utils.ops import Profile
|
23
28
|
>>> with Profile(device=device) as dt:
|
@@ -30,8 +35,8 @@ class Profile(contextlib.ContextDecorator):
|
|
30
35
|
Initialize the Profile class.
|
31
36
|
|
32
37
|
Args:
|
33
|
-
t (float): Initial time.
|
34
|
-
device (torch.device):
|
38
|
+
t (float): Initial time.
|
39
|
+
device (torch.device): Device used for model inference.
|
35
40
|
"""
|
36
41
|
self.t = t
|
37
42
|
self.device = device
|
@@ -63,12 +68,12 @@ def segment2box(segment, width=640, height=640):
|
|
63
68
|
Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).
|
64
69
|
|
65
70
|
Args:
|
66
|
-
segment (torch.Tensor):
|
67
|
-
width (int):
|
68
|
-
height (int): The height of the image.
|
71
|
+
segment (torch.Tensor): The segment label.
|
72
|
+
width (int): The width of the image.
|
73
|
+
height (int): The height of the image.
|
69
74
|
|
70
75
|
Returns:
|
71
|
-
(np.ndarray):
|
76
|
+
(np.ndarray): The minimum and maximum x and y values of the segment.
|
72
77
|
"""
|
73
78
|
x, y = segment.T # segment xy
|
74
79
|
# any 3 out of 4 sides are outside the image, clip coordinates first, https://github.com/ultralytics/ultralytics/pull/18294
|
@@ -87,21 +92,20 @@ def segment2box(segment, width=640, height=640):
|
|
87
92
|
|
88
93
|
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):
|
89
94
|
"""
|
90
|
-
|
91
|
-
specified in (img1_shape) to the shape of a different image (img0_shape).
|
95
|
+
Rescale bounding boxes from img1_shape to img0_shape.
|
92
96
|
|
93
97
|
Args:
|
94
98
|
img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
|
95
|
-
boxes (torch.Tensor):
|
96
|
-
img0_shape (tuple):
|
97
|
-
ratio_pad (tuple):
|
99
|
+
boxes (torch.Tensor): The bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2).
|
100
|
+
img0_shape (tuple): The shape of the target image, in the format of (height, width).
|
101
|
+
ratio_pad (tuple): A tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
|
98
102
|
calculated based on the size difference between the two images.
|
99
103
|
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
100
104
|
rescaling.
|
101
|
-
xywh (bool): The box format is xywh or not
|
105
|
+
xywh (bool): The box format is xywh or not.
|
102
106
|
|
103
107
|
Returns:
|
104
|
-
|
108
|
+
(torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2).
|
105
109
|
"""
|
106
110
|
if ratio_pad is None: # calculate from img0_shape
|
107
111
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
@@ -146,8 +150,8 @@ def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
|
|
146
150
|
Args:
|
147
151
|
boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
|
148
152
|
scores (torch.Tensor): Confidence scores, shape (N,).
|
149
|
-
threshold (float
|
150
|
-
use_triu (bool
|
153
|
+
threshold (float): IoU threshold.
|
154
|
+
use_triu (bool): Whether to use `torch.triu` operator. It'd be useful for disable it
|
151
155
|
when exporting obb models to some formats that do not support `torch.triu`.
|
152
156
|
|
153
157
|
Returns:
|
@@ -210,7 +214,7 @@ def non_max_suppression(
|
|
210
214
|
list contains the apriori labels for a given image. The list should be in the format
|
211
215
|
output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
|
212
216
|
max_det (int): The maximum number of boxes to keep after NMS.
|
213
|
-
nc (int
|
217
|
+
nc (int): The number of classes output by the model. Any indices after this will be considered masks.
|
214
218
|
max_time_img (float): The maximum time (seconds) for processing one image.
|
215
219
|
max_nms (int): The maximum number of boxes into torchvision.ops.nms().
|
216
220
|
max_wh (int): The maximum box width and height in pixels.
|
@@ -333,7 +337,7 @@ def clip_boxes(boxes, shape):
|
|
333
337
|
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
|
334
338
|
|
335
339
|
Args:
|
336
|
-
boxes (torch.Tensor): The bounding boxes to clip.
|
340
|
+
boxes (torch.Tensor | numpy.ndarray): The bounding boxes to clip.
|
337
341
|
shape (tuple): The shape of the image.
|
338
342
|
|
339
343
|
Returns:
|
@@ -359,7 +363,7 @@ def clip_coords(coords, shape):
|
|
359
363
|
shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
|
360
364
|
|
361
365
|
Returns:
|
362
|
-
(torch.Tensor | numpy.ndarray): Clipped coordinates
|
366
|
+
(torch.Tensor | numpy.ndarray): Clipped coordinates.
|
363
367
|
"""
|
364
368
|
if isinstance(coords, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
|
365
369
|
coords[..., 0] = coords[..., 0].clamp(0, shape[1]) # x
|
@@ -451,10 +455,11 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
|
451
455
|
|
452
456
|
Args:
|
453
457
|
x (np.ndarray | torch.Tensor): The bounding box coordinates.
|
454
|
-
w (int): Width of the image.
|
455
|
-
h (int): Height of the image.
|
456
|
-
padw (int): Padding width.
|
457
|
-
padh (int): Padding height.
|
458
|
+
w (int): Width of the image.
|
459
|
+
h (int): Height of the image.
|
460
|
+
padw (int): Padding width.
|
461
|
+
padh (int): Padding height.
|
462
|
+
|
458
463
|
Returns:
|
459
464
|
y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
|
460
465
|
x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
|
@@ -475,10 +480,10 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
|
475
480
|
|
476
481
|
Args:
|
477
482
|
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
|
478
|
-
w (int): The width of the image.
|
479
|
-
h (int): The height of the image.
|
480
|
-
clip (bool): If True, the boxes will be clipped to the image boundaries.
|
481
|
-
eps (float): The minimum value of the box's width and height.
|
483
|
+
w (int): The width of the image.
|
484
|
+
h (int): The height of the image.
|
485
|
+
clip (bool): If True, the boxes will be clipped to the image boundaries.
|
486
|
+
eps (float): The minimum value of the box's width and height.
|
482
487
|
|
483
488
|
Returns:
|
484
489
|
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format
|
@@ -598,13 +603,13 @@ def xywhr2xyxyxyxy(x):
|
|
598
603
|
|
599
604
|
def ltwh2xyxy(x):
|
600
605
|
"""
|
601
|
-
|
606
|
+
Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
|
602
607
|
|
603
608
|
Args:
|
604
|
-
x (np.ndarray | torch.Tensor):
|
609
|
+
x (np.ndarray | torch.Tensor): The input image.
|
605
610
|
|
606
611
|
Returns:
|
607
|
-
|
612
|
+
(np.ndarray | torch.Tensor): The xyxy coordinates of the bounding boxes.
|
608
613
|
"""
|
609
614
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
610
615
|
y[..., 2] = x[..., 2] + x[..., 0] # width
|
@@ -614,13 +619,13 @@ def ltwh2xyxy(x):
|
|
614
619
|
|
615
620
|
def segments2boxes(segments):
|
616
621
|
"""
|
617
|
-
|
622
|
+
Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
|
618
623
|
|
619
624
|
Args:
|
620
|
-
segments (
|
625
|
+
segments (List): List of segments, each segment is a list of points, each point is a list of x, y coordinates.
|
621
626
|
|
622
627
|
Returns:
|
623
|
-
(np.ndarray):
|
628
|
+
(np.ndarray): The xywh coordinates of the bounding boxes.
|
624
629
|
"""
|
625
630
|
boxes = []
|
626
631
|
for s in segments:
|
@@ -634,11 +639,11 @@ def resample_segments(segments, n=1000):
|
|
634
639
|
Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each.
|
635
640
|
|
636
641
|
Args:
|
637
|
-
segments (
|
638
|
-
n (int):
|
642
|
+
segments (List): A list of (n,2) arrays, where n is the number of points in the segment.
|
643
|
+
n (int): Number of points to resample the segment to.
|
639
644
|
|
640
645
|
Returns:
|
641
|
-
segments (
|
646
|
+
segments (List): The resampled segments.
|
642
647
|
"""
|
643
648
|
for i, s in enumerate(segments):
|
644
649
|
if len(s) == n:
|
@@ -655,14 +660,14 @@ def resample_segments(segments, n=1000):
|
|
655
660
|
|
656
661
|
def crop_mask(masks, boxes):
|
657
662
|
"""
|
658
|
-
|
663
|
+
Crop masks to bounding boxes.
|
659
664
|
|
660
665
|
Args:
|
661
|
-
masks (torch.Tensor): [n, h, w] tensor of masks
|
662
|
-
boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
|
666
|
+
masks (torch.Tensor): [n, h, w] tensor of masks.
|
667
|
+
boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form.
|
663
668
|
|
664
669
|
Returns:
|
665
|
-
(torch.Tensor):
|
670
|
+
(torch.Tensor): Cropped masks.
|
666
671
|
"""
|
667
672
|
_, h, w = masks.shape
|
668
673
|
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
|
@@ -681,7 +686,7 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
|
681
686
|
masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
|
682
687
|
bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
|
683
688
|
shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
|
684
|
-
upsample (bool): A flag to indicate whether to upsample the mask to the original image size.
|
689
|
+
upsample (bool): A flag to indicate whether to upsample the mask to the original image size.
|
685
690
|
|
686
691
|
Returns:
|
687
692
|
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
|
@@ -707,16 +712,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
|
707
712
|
|
708
713
|
def process_mask_native(protos, masks_in, bboxes, shape):
|
709
714
|
"""
|
710
|
-
|
715
|
+
Apply masks to bounding boxes using the output of the mask head with native upsampling.
|
711
716
|
|
712
717
|
Args:
|
713
|
-
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
718
|
+
protos (torch.Tensor): [mask_dim, mask_h, mask_w].
|
714
719
|
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
|
715
720
|
bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
|
716
721
|
shape (tuple): The size of the input image (h,w).
|
717
722
|
|
718
723
|
Returns:
|
719
|
-
|
724
|
+
(torch.Tensor): The returned masks with dimensions [h, w, n].
|
720
725
|
"""
|
721
726
|
c, mh, mw = protos.shape # CHW
|
722
727
|
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
|
@@ -734,6 +739,9 @@ def scale_masks(masks, shape, padding=True):
|
|
734
739
|
shape (tuple): Height and width.
|
735
740
|
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
736
741
|
rescaling.
|
742
|
+
|
743
|
+
Returns:
|
744
|
+
(torch.Tensor): Rescaled masks.
|
737
745
|
"""
|
738
746
|
mh, mw = masks.shape[2:]
|
739
747
|
gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
|
@@ -755,10 +763,10 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False
|
|
755
763
|
|
756
764
|
Args:
|
757
765
|
img1_shape (tuple): The shape of the image that the coords are from.
|
758
|
-
coords (torch.Tensor):
|
759
|
-
img0_shape (tuple):
|
760
|
-
ratio_pad (tuple):
|
761
|
-
normalize (bool): If True, the coordinates will be normalized to the range [0, 1].
|
766
|
+
coords (torch.Tensor): The coords to be scaled of shape n,2.
|
767
|
+
img0_shape (tuple): The shape of the image that the segmentation is being applied to.
|
768
|
+
ratio_pad (tuple): The ratio of the image size to the padded image size.
|
769
|
+
normalize (bool): If True, the coordinates will be normalized to the range [0, 1].
|
762
770
|
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
763
771
|
rescaling.
|
764
772
|
|
@@ -805,14 +813,14 @@ def regularize_rboxes(rboxes):
|
|
805
813
|
|
806
814
|
def masks2segments(masks, strategy="all"):
|
807
815
|
"""
|
808
|
-
|
816
|
+
Convert masks to segments.
|
809
817
|
|
810
818
|
Args:
|
811
|
-
masks (torch.Tensor):
|
812
|
-
strategy (str): 'all' or 'largest'.
|
819
|
+
masks (torch.Tensor): The output of the model, which is a tensor of shape (batch_size, 160, 160).
|
820
|
+
strategy (str): 'all' or 'largest'.
|
813
821
|
|
814
822
|
Returns:
|
815
|
-
|
823
|
+
(List): List of segment masks.
|
816
824
|
"""
|
817
825
|
from ultralytics.data.converter import merge_multi_segment
|
818
826
|
|
@@ -852,10 +860,10 @@ def clean_str(s):
|
|
852
860
|
Cleans a string by replacing special characters with '_' character.
|
853
861
|
|
854
862
|
Args:
|
855
|
-
s (str):
|
863
|
+
s (str): A string needing special characters replaced.
|
856
864
|
|
857
865
|
Returns:
|
858
|
-
(str):
|
866
|
+
(str): A string with special characters replaced by an underscore _.
|
859
867
|
"""
|
860
868
|
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
|
861
869
|
|
ultralytics/utils/patches.py
CHANGED
@@ -18,7 +18,7 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
|
|
18
18
|
|
19
19
|
Args:
|
20
20
|
filename (str): Path to the file to read.
|
21
|
-
flags (int, optional): Flag that can take values of cv2.IMREAD_*.
|
21
|
+
flags (int, optional): Flag that can take values of cv2.IMREAD_*.
|
22
22
|
|
23
23
|
Returns:
|
24
24
|
(np.ndarray): The read image.
|
@@ -33,7 +33,7 @@ def imwrite(filename: str, img: np.ndarray, params=None):
|
|
33
33
|
Args:
|
34
34
|
filename (str): Path to the file to write.
|
35
35
|
img (np.ndarray): Image to write.
|
36
|
-
params (
|
36
|
+
params (List[int], optional): Additional parameters for image encoding.
|
37
37
|
|
38
38
|
Returns:
|
39
39
|
(bool): True if the file was written, False otherwise.
|
@@ -47,7 +47,7 @@ def imwrite(filename: str, img: np.ndarray, params=None):
|
|
47
47
|
|
48
48
|
def imshow(winname: str, mat: np.ndarray):
|
49
49
|
"""
|
50
|
-
|
50
|
+
Display an image in the specified window.
|
51
51
|
|
52
52
|
Args:
|
53
53
|
winname (str): Name of the window.
|
@@ -88,11 +88,13 @@ def torch_load(*args, **kwargs):
|
|
88
88
|
|
89
89
|
def torch_save(*args, **kwargs):
|
90
90
|
"""
|
91
|
-
|
92
|
-
|
91
|
+
Save PyTorch objects with retry mechanism for robustness.
|
92
|
+
|
93
|
+
This function wraps torch.save with 3 retries and exponential backoff in case of save failures, which can occur
|
94
|
+
due to device flushing delays or antivirus scanning.
|
93
95
|
|
94
96
|
Args:
|
95
|
-
*args (
|
97
|
+
*args (Any): Positional arguments to pass to torch.save.
|
96
98
|
**kwargs (Any): Keyword arguments to pass to torch.save.
|
97
99
|
"""
|
98
100
|
for i in range(4): # 3 retries
|
ultralytics/utils/plotting.py
CHANGED
@@ -25,9 +25,9 @@ class Colors:
|
|
25
25
|
RGB values.
|
26
26
|
|
27
27
|
Attributes:
|
28
|
-
palette (
|
28
|
+
palette (List[Tuple]): List of RGB color values.
|
29
29
|
n (int): The number of colors in the palette.
|
30
|
-
pose_palette (np.ndarray): A specific color palette array with dtype np.uint8.
|
30
|
+
pose_palette (np.ndarray): A specific color palette array for pose estimation with dtype np.uint8.
|
31
31
|
|
32
32
|
Examples:
|
33
33
|
>>> from ultralytics.utils.plotting import Colors
|
@@ -142,13 +142,13 @@ class Colors:
|
|
142
142
|
)
|
143
143
|
|
144
144
|
def __call__(self, i, bgr=False):
|
145
|
-
"""
|
145
|
+
"""Convert hex color codes to RGB values."""
|
146
146
|
c = self.palette[int(i) % self.n]
|
147
147
|
return (c[2], c[1], c[0]) if bgr else c
|
148
148
|
|
149
149
|
@staticmethod
|
150
150
|
def hex2rgb(h):
|
151
|
-
"""
|
151
|
+
"""Convert hex color codes to RGB values (i.e. default PIL order)."""
|
152
152
|
return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4))
|
153
153
|
|
154
154
|
|
@@ -160,13 +160,15 @@ class Annotator:
|
|
160
160
|
Ultralytics Annotator for train/val mosaics and JPGs and predictions annotations.
|
161
161
|
|
162
162
|
Attributes:
|
163
|
-
im (Image.Image or
|
163
|
+
im (Image.Image or np.ndarray): The image to annotate.
|
164
164
|
pil (bool): Whether to use PIL or cv2 for drawing annotations.
|
165
165
|
font (ImageFont.truetype or ImageFont.load_default): Font used for text annotations.
|
166
166
|
lw (float): Line width for drawing.
|
167
167
|
skeleton (List[List[int]]): Skeleton structure for keypoints.
|
168
168
|
limb_color (List[int]): Color palette for limbs.
|
169
169
|
kpt_color (List[int]): Color palette for keypoints.
|
170
|
+
dark_colors (set): Set of colors considered dark for text contrast.
|
171
|
+
light_colors (set): Set of colors considered light for text contrast.
|
170
172
|
|
171
173
|
Examples:
|
172
174
|
>>> from ultralytics.utils.plotting import Annotator
|
@@ -256,7 +258,7 @@ class Annotator:
|
|
256
258
|
txt_color (tuple, optional): The color of the text (R, G, B).
|
257
259
|
|
258
260
|
Returns:
|
259
|
-
|
261
|
+
(tuple): Text color for label.
|
260
262
|
|
261
263
|
Examples:
|
262
264
|
>>> from ultralytics.utils.plotting import Annotator
|
@@ -273,14 +275,14 @@ class Annotator:
|
|
273
275
|
|
274
276
|
def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), rotated=False):
|
275
277
|
"""
|
276
|
-
|
278
|
+
Draw a bounding box on an image with a given label.
|
277
279
|
|
278
280
|
Args:
|
279
281
|
box (tuple): The bounding box coordinates (x1, y1, x2, y2).
|
280
|
-
label (str): The text label to be displayed.
|
282
|
+
label (str, optional): The text label to be displayed.
|
281
283
|
color (tuple, optional): The background color of the rectangle (B, G, R).
|
282
284
|
txt_color (tuple, optional): The color of the text (R, G, B).
|
283
|
-
rotated (bool, optional):
|
285
|
+
rotated (bool, optional): Whether the task is oriented bounding box detection.
|
284
286
|
|
285
287
|
Examples:
|
286
288
|
>>> from ultralytics.utils.plotting import Annotator
|
@@ -340,11 +342,11 @@ class Annotator:
|
|
340
342
|
Plot masks on image.
|
341
343
|
|
342
344
|
Args:
|
343
|
-
masks (
|
344
|
-
colors (List[List[
|
345
|
-
im_gpu (
|
346
|
-
alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque
|
347
|
-
retina_masks (bool): Whether to use high resolution masks or not.
|
345
|
+
masks (torch.Tensor): Predicted masks on cuda, shape: [n, h, w]
|
346
|
+
colors (List[List[int]]): Colors for predicted masks, [[r, g, b] * n]
|
347
|
+
im_gpu (torch.Tensor): Image is in cuda, shape: [3, h, w], range: [0, 1]
|
348
|
+
alpha (float, optional): Mask transparency: 0.0 fully transparent, 1.0 opaque.
|
349
|
+
retina_masks (bool, optional): Whether to use high resolution masks or not.
|
348
350
|
"""
|
349
351
|
if self.pil:
|
350
352
|
# Convert to numpy first
|
@@ -377,11 +379,11 @@ class Annotator:
|
|
377
379
|
|
378
380
|
Args:
|
379
381
|
kpts (torch.Tensor): Keypoints, shape [17, 3] (x, y, confidence).
|
380
|
-
shape (tuple, optional): Image shape (h, w).
|
381
|
-
radius (int, optional): Keypoint radius.
|
382
|
-
kpt_line (bool, optional): Draw lines between keypoints.
|
383
|
-
conf_thres (float, optional): Confidence threshold.
|
384
|
-
kpt_color (tuple, optional): Keypoint color (B, G, R).
|
382
|
+
shape (tuple, optional): Image shape (h, w).
|
383
|
+
radius (int, optional): Keypoint radius.
|
384
|
+
kpt_line (bool, optional): Draw lines between keypoints.
|
385
|
+
conf_thres (float, optional): Confidence threshold.
|
386
|
+
kpt_color (tuple, optional): Keypoint color (B, G, R).
|
385
387
|
|
386
388
|
Note:
|
387
389
|
- `kpt_line=True` currently only supports human pose plotting.
|
@@ -436,7 +438,16 @@ class Annotator:
|
|
436
438
|
self.draw.rectangle(xy, fill, outline, width)
|
437
439
|
|
438
440
|
def text(self, xy, text, txt_color=(255, 255, 255), anchor="top", box_style=False):
|
439
|
-
"""
|
441
|
+
"""
|
442
|
+
Add text to an image using PIL or cv2.
|
443
|
+
|
444
|
+
Args:
|
445
|
+
xy (List[int]): Top-left coordinates for text placement.
|
446
|
+
text (str): Text to be drawn.
|
447
|
+
txt_color (tuple, optional): Text color (R, G, B).
|
448
|
+
anchor (str, optional): Text anchor position ('top' or 'bottom').
|
449
|
+
box_style (bool, optional): Whether to draw text with a background box.
|
450
|
+
"""
|
440
451
|
if anchor == "bottom": # start y from font bottom
|
441
452
|
w, h = self.font.getsize(text) # text width, height
|
442
453
|
xy[1] += 1 - h
|
@@ -492,7 +503,7 @@ class Annotator:
|
|
492
503
|
@staticmethod
|
493
504
|
def get_bbox_dimension(bbox=None):
|
494
505
|
"""
|
495
|
-
Calculate the area of a bounding box.
|
506
|
+
Calculate the dimensions and area of a bounding box.
|
496
507
|
|
497
508
|
Args:
|
498
509
|
bbox (tuple): Bounding box coordinates in the format (x_min, y_min, x_max, y_max).
|
@@ -517,7 +528,16 @@ class Annotator:
|
|
517
528
|
@TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395
|
518
529
|
@plt_settings()
|
519
530
|
def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
|
520
|
-
"""
|
531
|
+
"""
|
532
|
+
Plot training labels including class histograms and box statistics.
|
533
|
+
|
534
|
+
Args:
|
535
|
+
boxes (np.ndarray): Bounding box coordinates in format [x, y, width, height].
|
536
|
+
cls (np.ndarray): Class indices.
|
537
|
+
names (Dict, optional): Dictionary mapping class indices to class names.
|
538
|
+
save_dir (Path, optional): Directory to save the plot.
|
539
|
+
on_plot (Callable, optional): Function to call after plot is saved.
|
540
|
+
"""
|
521
541
|
import pandas # scope for faster 'import ultralytics'
|
522
542
|
import seaborn # scope for faster 'import ultralytics'
|
523
543
|
|
@@ -580,16 +600,16 @@ def save_one_box(xyxy, im, file=Path("im.jpg"), gain=1.02, pad=10, square=False,
|
|
580
600
|
|
581
601
|
Args:
|
582
602
|
xyxy (torch.Tensor or list): A tensor or list representing the bounding box in xyxy format.
|
583
|
-
im (
|
584
|
-
file (Path, optional): The path where the cropped image will be saved.
|
585
|
-
gain (float, optional): A multiplicative factor to increase the size of the bounding box.
|
586
|
-
pad (int, optional): The number of pixels to add to the width and height of the bounding box.
|
587
|
-
square (bool, optional): If True, the bounding box will be transformed into a square.
|
588
|
-
BGR (bool, optional): If True, the image will be saved in BGR format, otherwise in RGB.
|
589
|
-
save (bool, optional): If True, the cropped image will be saved to disk.
|
603
|
+
im (np.ndarray): The input image.
|
604
|
+
file (Path, optional): The path where the cropped image will be saved.
|
605
|
+
gain (float, optional): A multiplicative factor to increase the size of the bounding box.
|
606
|
+
pad (int, optional): The number of pixels to add to the width and height of the bounding box.
|
607
|
+
square (bool, optional): If True, the bounding box will be transformed into a square.
|
608
|
+
BGR (bool, optional): If True, the image will be saved in BGR format, otherwise in RGB.
|
609
|
+
save (bool, optional): If True, the cropped image will be saved to disk.
|
590
610
|
|
591
611
|
Returns:
|
592
|
-
(
|
612
|
+
(np.ndarray): The cropped image.
|
593
613
|
|
594
614
|
Examples:
|
595
615
|
>>> from ultralytics.utils.plotting import save_one_box
|
@@ -653,7 +673,7 @@ def plot_images(
|
|
653
673
|
conf_thres: Confidence threshold for displaying detections.
|
654
674
|
|
655
675
|
Returns:
|
656
|
-
np.ndarray: Plotted image grid as a numpy array if save is False, None otherwise.
|
676
|
+
(np.ndarray): Plotted image grid as a numpy array if save is False, None otherwise.
|
657
677
|
|
658
678
|
Note:
|
659
679
|
This function supports both tensor and numpy array inputs. It will automatically
|
@@ -693,6 +713,7 @@ def plot_images(
|
|
693
713
|
|
694
714
|
# Annotate
|
695
715
|
fs = int((h + w) * ns * 0.01) # font size
|
716
|
+
fs = max(fs, 18) # ensure that the font size is large enough to be easily readable.
|
696
717
|
annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
|
697
718
|
for i in range(bs):
|
698
719
|
x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
|
@@ -789,13 +810,12 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
|
|
789
810
|
pose estimation, and classification. Plots are saved as 'results.png' in the directory where the CSV is located.
|
790
811
|
|
791
812
|
Args:
|
792
|
-
file (str, optional): Path to the CSV file containing the training results.
|
793
|
-
dir (str, optional): Directory where the CSV file is located if 'file' is not provided.
|
794
|
-
segment (bool, optional): Flag to indicate if the data is for segmentation.
|
795
|
-
pose (bool, optional): Flag to indicate if the data is for pose estimation.
|
796
|
-
classify (bool, optional): Flag to indicate if the data is for classification.
|
813
|
+
file (str, optional): Path to the CSV file containing the training results.
|
814
|
+
dir (str, optional): Directory where the CSV file is located if 'file' is not provided.
|
815
|
+
segment (bool, optional): Flag to indicate if the data is for segmentation.
|
816
|
+
pose (bool, optional): Flag to indicate if the data is for pose estimation.
|
817
|
+
classify (bool, optional): Flag to indicate if the data is for classification.
|
797
818
|
on_plot (callable, optional): Callback function to be executed after plotting. Takes filename as an argument.
|
798
|
-
Defaults to None.
|
799
819
|
|
800
820
|
Examples:
|
801
821
|
>>> from ultralytics.utils.plotting import plot_results
|
@@ -845,15 +865,15 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
|
|
845
865
|
|
846
866
|
def plt_color_scatter(v, f, bins=20, cmap="viridis", alpha=0.8, edgecolors="none"):
|
847
867
|
"""
|
848
|
-
|
868
|
+
Plot a scatter plot with points colored based on a 2D histogram.
|
849
869
|
|
850
870
|
Args:
|
851
871
|
v (array-like): Values for the x-axis.
|
852
872
|
f (array-like): Values for the y-axis.
|
853
|
-
bins (int, optional): Number of bins for the histogram.
|
854
|
-
cmap (str, optional): Colormap for the scatter plot.
|
855
|
-
alpha (float, optional): Alpha for the scatter plot.
|
856
|
-
edgecolors (str, optional): Edge colors for the scatter plot.
|
873
|
+
bins (int, optional): Number of bins for the histogram.
|
874
|
+
cmap (str, optional): Colormap for the scatter plot.
|
875
|
+
alpha (float, optional): Alpha for the scatter plot.
|
876
|
+
edgecolors (str, optional): Edge colors for the scatter plot.
|
857
877
|
|
858
878
|
Examples:
|
859
879
|
>>> v = np.random.rand(100)
|
@@ -880,7 +900,7 @@ def plot_tune_results(csv_file="tune_results.csv"):
|
|
880
900
|
in the CSV, color-coded based on fitness scores. The best-performing configurations are highlighted on the plots.
|
881
901
|
|
882
902
|
Args:
|
883
|
-
csv_file (str, optional): Path to the CSV file containing the tuning results.
|
903
|
+
csv_file (str, optional): Path to the CSV file containing the tuning results.
|
884
904
|
|
885
905
|
Examples:
|
886
906
|
>>> plot_tune_results("path/to/tune_results.csv")
|
@@ -959,8 +979,8 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
|
|
959
979
|
x (torch.Tensor): Features to be visualized.
|
960
980
|
module_type (str): Module type.
|
961
981
|
stage (int): Module stage within the model.
|
962
|
-
n (int, optional): Maximum number of feature maps to plot.
|
963
|
-
save_dir (Path, optional): Directory to save results.
|
982
|
+
n (int, optional): Maximum number of feature maps to plot.
|
983
|
+
save_dir (Path, optional): Directory to save results.
|
964
984
|
"""
|
965
985
|
for m in {"Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"}: # all model heads
|
966
986
|
if m in module_type:
|