ultralytics 8.3.88__py3-none-any.whl → 8.3.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. tests/conftest.py +2 -2
  2. tests/test_cli.py +13 -11
  3. tests/test_cuda.py +10 -1
  4. tests/test_integrations.py +1 -5
  5. tests/test_python.py +16 -16
  6. tests/test_solutions.py +9 -9
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +3 -1
  9. ultralytics/cfg/models/11/yolo11-cls.yaml +5 -5
  10. ultralytics/cfg/models/11/yolo11-obb.yaml +5 -5
  11. ultralytics/cfg/models/11/yolo11-pose.yaml +5 -5
  12. ultralytics/cfg/models/11/yolo11-seg.yaml +5 -5
  13. ultralytics/cfg/models/11/yolo11.yaml +5 -5
  14. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +5 -5
  15. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +5 -5
  16. ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -5
  17. ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -5
  18. ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
  19. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -5
  20. ultralytics/cfg/models/v8/yolov8-world.yaml +5 -5
  21. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -5
  22. ultralytics/cfg/models/v8/yolov8.yaml +5 -5
  23. ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
  24. ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
  25. ultralytics/cfg/models/v9/yolov9e-seg.yaml +1 -1
  26. ultralytics/cfg/models/v9/yolov9e.yaml +1 -1
  27. ultralytics/cfg/models/v9/yolov9m.yaml +1 -1
  28. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  29. ultralytics/cfg/models/v9/yolov9t.yaml +1 -1
  30. ultralytics/data/annotator.py +9 -14
  31. ultralytics/data/base.py +125 -39
  32. ultralytics/data/build.py +63 -24
  33. ultralytics/data/converter.py +34 -33
  34. ultralytics/data/dataset.py +207 -53
  35. ultralytics/data/loaders.py +1 -0
  36. ultralytics/data/split_dota.py +39 -12
  37. ultralytics/data/utils.py +33 -47
  38. ultralytics/engine/exporter.py +19 -17
  39. ultralytics/engine/model.py +69 -90
  40. ultralytics/engine/predictor.py +106 -21
  41. ultralytics/engine/trainer.py +32 -23
  42. ultralytics/engine/tuner.py +31 -38
  43. ultralytics/engine/validator.py +75 -41
  44. ultralytics/hub/__init__.py +21 -26
  45. ultralytics/hub/auth.py +9 -12
  46. ultralytics/hub/session.py +76 -21
  47. ultralytics/hub/utils.py +19 -17
  48. ultralytics/models/fastsam/model.py +23 -17
  49. ultralytics/models/fastsam/predict.py +36 -16
  50. ultralytics/models/fastsam/utils.py +5 -5
  51. ultralytics/models/fastsam/val.py +6 -6
  52. ultralytics/models/nas/model.py +29 -24
  53. ultralytics/models/nas/predict.py +14 -11
  54. ultralytics/models/nas/val.py +11 -13
  55. ultralytics/models/rtdetr/model.py +20 -11
  56. ultralytics/models/rtdetr/predict.py +21 -21
  57. ultralytics/models/rtdetr/train.py +25 -24
  58. ultralytics/models/rtdetr/val.py +47 -14
  59. ultralytics/models/sam/__init__.py +1 -1
  60. ultralytics/models/sam/amg.py +50 -4
  61. ultralytics/models/sam/model.py +8 -14
  62. ultralytics/models/sam/modules/decoders.py +18 -21
  63. ultralytics/models/sam/modules/encoders.py +25 -46
  64. ultralytics/models/sam/modules/memory_attention.py +19 -15
  65. ultralytics/models/sam/modules/sam.py +18 -25
  66. ultralytics/models/sam/modules/tiny_encoder.py +19 -29
  67. ultralytics/models/sam/modules/transformer.py +35 -57
  68. ultralytics/models/sam/modules/utils.py +15 -15
  69. ultralytics/models/sam/predict.py +0 -3
  70. ultralytics/models/utils/loss.py +87 -36
  71. ultralytics/models/utils/ops.py +26 -31
  72. ultralytics/models/yolo/classify/predict.py +30 -12
  73. ultralytics/models/yolo/classify/train.py +83 -19
  74. ultralytics/models/yolo/classify/val.py +45 -23
  75. ultralytics/models/yolo/detect/predict.py +29 -19
  76. ultralytics/models/yolo/detect/train.py +90 -23
  77. ultralytics/models/yolo/detect/val.py +150 -29
  78. ultralytics/models/yolo/model.py +1 -2
  79. ultralytics/models/yolo/obb/predict.py +18 -13
  80. ultralytics/models/yolo/obb/train.py +12 -8
  81. ultralytics/models/yolo/obb/val.py +35 -22
  82. ultralytics/models/yolo/pose/predict.py +28 -15
  83. ultralytics/models/yolo/pose/train.py +21 -8
  84. ultralytics/models/yolo/pose/val.py +51 -31
  85. ultralytics/models/yolo/segment/predict.py +27 -16
  86. ultralytics/models/yolo/segment/train.py +11 -8
  87. ultralytics/models/yolo/segment/val.py +110 -29
  88. ultralytics/models/yolo/world/train.py +43 -16
  89. ultralytics/models/yolo/world/train_world.py +61 -36
  90. ultralytics/nn/autobackend.py +28 -14
  91. ultralytics/nn/modules/__init__.py +12 -12
  92. ultralytics/nn/modules/activation.py +12 -3
  93. ultralytics/nn/modules/block.py +587 -84
  94. ultralytics/nn/modules/conv.py +418 -54
  95. ultralytics/nn/modules/head.py +3 -4
  96. ultralytics/nn/modules/transformer.py +320 -34
  97. ultralytics/nn/modules/utils.py +17 -3
  98. ultralytics/nn/tasks.py +226 -79
  99. ultralytics/solutions/ai_gym.py +2 -2
  100. ultralytics/solutions/analytics.py +4 -4
  101. ultralytics/solutions/heatmap.py +4 -4
  102. ultralytics/solutions/instance_segmentation.py +10 -4
  103. ultralytics/solutions/object_blurrer.py +2 -2
  104. ultralytics/solutions/object_counter.py +2 -2
  105. ultralytics/solutions/object_cropper.py +2 -2
  106. ultralytics/solutions/parking_management.py +9 -9
  107. ultralytics/solutions/queue_management.py +1 -1
  108. ultralytics/solutions/region_counter.py +2 -2
  109. ultralytics/solutions/security_alarm.py +7 -7
  110. ultralytics/solutions/solutions.py +7 -4
  111. ultralytics/solutions/speed_estimation.py +2 -2
  112. ultralytics/solutions/streamlit_inference.py +6 -6
  113. ultralytics/solutions/trackzone.py +9 -2
  114. ultralytics/solutions/vision_eye.py +4 -4
  115. ultralytics/trackers/basetrack.py +1 -1
  116. ultralytics/trackers/bot_sort.py +23 -22
  117. ultralytics/trackers/byte_tracker.py +4 -4
  118. ultralytics/trackers/track.py +2 -1
  119. ultralytics/trackers/utils/gmc.py +26 -27
  120. ultralytics/trackers/utils/kalman_filter.py +31 -29
  121. ultralytics/trackers/utils/matching.py +7 -7
  122. ultralytics/utils/__init__.py +37 -35
  123. ultralytics/utils/autobatch.py +5 -5
  124. ultralytics/utils/benchmarks.py +111 -18
  125. ultralytics/utils/callbacks/base.py +3 -3
  126. ultralytics/utils/callbacks/clearml.py +11 -11
  127. ultralytics/utils/callbacks/comet.py +35 -22
  128. ultralytics/utils/callbacks/dvc.py +11 -10
  129. ultralytics/utils/callbacks/hub.py +8 -8
  130. ultralytics/utils/callbacks/mlflow.py +1 -1
  131. ultralytics/utils/callbacks/neptune.py +12 -10
  132. ultralytics/utils/callbacks/raytune.py +1 -1
  133. ultralytics/utils/callbacks/tensorboard.py +6 -6
  134. ultralytics/utils/callbacks/wb.py +16 -16
  135. ultralytics/utils/checks.py +139 -68
  136. ultralytics/utils/dist.py +15 -2
  137. ultralytics/utils/downloads.py +37 -56
  138. ultralytics/utils/files.py +12 -13
  139. ultralytics/utils/instance.py +117 -52
  140. ultralytics/utils/loss.py +28 -33
  141. ultralytics/utils/metrics.py +246 -181
  142. ultralytics/utils/ops.py +65 -61
  143. ultralytics/utils/patches.py +8 -6
  144. ultralytics/utils/plotting.py +72 -59
  145. ultralytics/utils/tal.py +88 -57
  146. ultralytics/utils/torch_utils.py +202 -64
  147. ultralytics/utils/triton.py +13 -3
  148. ultralytics/utils/tuner.py +13 -25
  149. {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/METADATA +2 -2
  150. ultralytics-8.3.90.dist-info/RECORD +250 -0
  151. ultralytics-8.3.88.dist-info/RECORD +0 -250
  152. {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/LICENSE +0 -0
  153. {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/WHEEL +0 -0
  154. {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/entry_points.txt +0 -0
  155. {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/top_level.txt +0 -0
ultralytics/utils/ops.py CHANGED
@@ -18,15 +18,16 @@ class Profile(contextlib.ContextDecorator):
18
18
  """
19
19
  YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'.
20
20
 
21
- Example:
22
- ```python
23
- from ultralytics.utils.ops import Profile
21
+ Attributes:
22
+ t (float): Accumulated time.
23
+ device (torch.device): Device used for model inference.
24
+ cuda (bool): Whether CUDA is being used.
24
25
 
25
- with Profile(device=device) as dt:
26
- pass # slow operation here
27
-
28
- print(dt) # prints "Elapsed time is 9.5367431640625e-07 s"
29
- ```
26
+ Examples:
27
+ >>> from ultralytics.utils.ops import Profile
28
+ >>> with Profile(device=device) as dt:
29
+ ... pass # slow operation here
30
+ >>> print(dt) # prints "Elapsed time is 9.5367431640625e-07 s"
30
31
  """
31
32
 
32
33
  def __init__(self, t=0.0, device: torch.device = None):
@@ -34,8 +35,8 @@ class Profile(contextlib.ContextDecorator):
34
35
  Initialize the Profile class.
35
36
 
36
37
  Args:
37
- t (float): Initial time. Defaults to 0.0.
38
- device (torch.device): Devices used for model inference. Defaults to None (cpu).
38
+ t (float): Initial time.
39
+ device (torch.device): Device used for model inference.
39
40
  """
40
41
  self.t = t
41
42
  self.device = device
@@ -67,12 +68,12 @@ def segment2box(segment, width=640, height=640):
67
68
  Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).
68
69
 
69
70
  Args:
70
- segment (torch.Tensor): the segment label
71
- width (int): the width of the image. Defaults to 640
72
- height (int): The height of the image. Defaults to 640
71
+ segment (torch.Tensor): The segment label.
72
+ width (int): The width of the image.
73
+ height (int): The height of the image.
73
74
 
74
75
  Returns:
75
- (np.ndarray): the minimum and maximum x and y values of the segment.
76
+ (np.ndarray): The minimum and maximum x and y values of the segment.
76
77
  """
77
78
  x, y = segment.T # segment xy
78
79
  # any 3 out of 4 sides are outside the image, clip coordinates first, https://github.com/ultralytics/ultralytics/pull/18294
@@ -91,21 +92,20 @@ def segment2box(segment, width=640, height=640):
91
92
 
92
93
  def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):
93
94
  """
94
- Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originally
95
- specified in (img1_shape) to the shape of a different image (img0_shape).
95
+ Rescale bounding boxes from img1_shape to img0_shape.
96
96
 
97
97
  Args:
98
98
  img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
99
- boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
100
- img0_shape (tuple): the shape of the target image, in the format of (height, width).
101
- ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
99
+ boxes (torch.Tensor): The bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2).
100
+ img0_shape (tuple): The shape of the target image, in the format of (height, width).
101
+ ratio_pad (tuple): A tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
102
102
  calculated based on the size difference between the two images.
103
103
  padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
104
104
  rescaling.
105
- xywh (bool): The box format is xywh or not, default=False.
105
+ xywh (bool): The box format is xywh or not.
106
106
 
107
107
  Returns:
108
- boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
108
+ (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2).
109
109
  """
110
110
  if ratio_pad is None: # calculate from img0_shape
111
111
  gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
@@ -150,8 +150,8 @@ def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
150
150
  Args:
151
151
  boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
152
152
  scores (torch.Tensor): Confidence scores, shape (N,).
153
- threshold (float, optional): IoU threshold. Defaults to 0.45.
154
- use_triu (bool, optional): Whether to use `torch.triu` operator. It'd be useful for disable it
153
+ threshold (float): IoU threshold.
154
+ use_triu (bool): Whether to use `torch.triu` operator. It'd be useful for disable it
155
155
  when exporting obb models to some formats that do not support `torch.triu`.
156
156
 
157
157
  Returns:
@@ -214,7 +214,7 @@ def non_max_suppression(
214
214
  list contains the apriori labels for a given image. The list should be in the format
215
215
  output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
216
216
  max_det (int): The maximum number of boxes to keep after NMS.
217
- nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
217
+ nc (int): The number of classes output by the model. Any indices after this will be considered masks.
218
218
  max_time_img (float): The maximum time (seconds) for processing one image.
219
219
  max_nms (int): The maximum number of boxes into torchvision.ops.nms().
220
220
  max_wh (int): The maximum box width and height in pixels.
@@ -337,7 +337,7 @@ def clip_boxes(boxes, shape):
337
337
  Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
338
338
 
339
339
  Args:
340
- boxes (torch.Tensor): The bounding boxes to clip.
340
+ boxes (torch.Tensor | numpy.ndarray): The bounding boxes to clip.
341
341
  shape (tuple): The shape of the image.
342
342
 
343
343
  Returns:
@@ -363,7 +363,7 @@ def clip_coords(coords, shape):
363
363
  shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
364
364
 
365
365
  Returns:
366
- (torch.Tensor | numpy.ndarray): Clipped coordinates
366
+ (torch.Tensor | numpy.ndarray): Clipped coordinates.
367
367
  """
368
368
  if isinstance(coords, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
369
369
  coords[..., 0] = coords[..., 0].clamp(0, shape[1]) # x
@@ -455,10 +455,11 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
455
455
 
456
456
  Args:
457
457
  x (np.ndarray | torch.Tensor): The bounding box coordinates.
458
- w (int): Width of the image. Defaults to 640
459
- h (int): Height of the image. Defaults to 640
460
- padw (int): Padding width. Defaults to 0
461
- padh (int): Padding height. Defaults to 0
458
+ w (int): Width of the image.
459
+ h (int): Height of the image.
460
+ padw (int): Padding width.
461
+ padh (int): Padding height.
462
+
462
463
  Returns:
463
464
  y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
464
465
  x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
@@ -479,10 +480,10 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
479
480
 
480
481
  Args:
481
482
  x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
482
- w (int): The width of the image. Defaults to 640
483
- h (int): The height of the image. Defaults to 640
484
- clip (bool): If True, the boxes will be clipped to the image boundaries. Defaults to False
485
- eps (float): The minimum value of the box's width and height. Defaults to 0.0
483
+ w (int): The width of the image.
484
+ h (int): The height of the image.
485
+ clip (bool): If True, the boxes will be clipped to the image boundaries.
486
+ eps (float): The minimum value of the box's width and height.
486
487
 
487
488
  Returns:
488
489
  y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format
@@ -602,13 +603,13 @@ def xywhr2xyxyxyxy(x):
602
603
 
603
604
  def ltwh2xyxy(x):
604
605
  """
605
- It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
606
+ Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
606
607
 
607
608
  Args:
608
- x (np.ndarray | torch.Tensor): the input image
609
+ x (np.ndarray | torch.Tensor): The input image.
609
610
 
610
611
  Returns:
611
- y (np.ndarray | torch.Tensor): the xyxy coordinates of the bounding boxes.
612
+ (np.ndarray | torch.Tensor): The xyxy coordinates of the bounding boxes.
612
613
  """
613
614
  y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
614
615
  y[..., 2] = x[..., 2] + x[..., 0] # width
@@ -618,13 +619,13 @@ def ltwh2xyxy(x):
618
619
 
619
620
  def segments2boxes(segments):
620
621
  """
621
- It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
622
+ Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
622
623
 
623
624
  Args:
624
- segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates
625
+ segments (List): List of segments, each segment is a list of points, each point is a list of x, y coordinates.
625
626
 
626
627
  Returns:
627
- (np.ndarray): the xywh coordinates of the bounding boxes.
628
+ (np.ndarray): The xywh coordinates of the bounding boxes.
628
629
  """
629
630
  boxes = []
630
631
  for s in segments:
@@ -638,11 +639,11 @@ def resample_segments(segments, n=1000):
638
639
  Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each.
639
640
 
640
641
  Args:
641
- segments (list): a list of (n,2) arrays, where n is the number of points in the segment.
642
- n (int): number of points to resample the segment to. Defaults to 1000
642
+ segments (List): A list of (n,2) arrays, where n is the number of points in the segment.
643
+ n (int): Number of points to resample the segment to.
643
644
 
644
645
  Returns:
645
- segments (list): the resampled segments.
646
+ segments (List): The resampled segments.
646
647
  """
647
648
  for i, s in enumerate(segments):
648
649
  if len(s) == n:
@@ -659,14 +660,14 @@ def resample_segments(segments, n=1000):
659
660
 
660
661
  def crop_mask(masks, boxes):
661
662
  """
662
- It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
663
+ Crop masks to bounding boxes.
663
664
 
664
665
  Args:
665
- masks (torch.Tensor): [n, h, w] tensor of masks
666
- boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
666
+ masks (torch.Tensor): [n, h, w] tensor of masks.
667
+ boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form.
667
668
 
668
669
  Returns:
669
- (torch.Tensor): The masks are being cropped to the bounding box.
670
+ (torch.Tensor): Cropped masks.
670
671
  """
671
672
  _, h, w = masks.shape
672
673
  x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
@@ -685,7 +686,7 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
685
686
  masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
686
687
  bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
687
688
  shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
688
- upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
689
+ upsample (bool): A flag to indicate whether to upsample the mask to the original image size.
689
690
 
690
691
  Returns:
691
692
  (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
@@ -711,16 +712,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
711
712
 
712
713
  def process_mask_native(protos, masks_in, bboxes, shape):
713
714
  """
714
- It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
715
+ Apply masks to bounding boxes using the output of the mask head with native upsampling.
715
716
 
716
717
  Args:
717
- protos (torch.Tensor): [mask_dim, mask_h, mask_w]
718
+ protos (torch.Tensor): [mask_dim, mask_h, mask_w].
718
719
  masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
719
720
  bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
720
721
  shape (tuple): The size of the input image (h,w).
721
722
 
722
723
  Returns:
723
- masks (torch.Tensor): The returned masks with dimensions [h, w, n].
724
+ (torch.Tensor): The returned masks with dimensions [h, w, n].
724
725
  """
725
726
  c, mh, mw = protos.shape # CHW
726
727
  masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
@@ -738,6 +739,9 @@ def scale_masks(masks, shape, padding=True):
738
739
  shape (tuple): Height and width.
739
740
  padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
740
741
  rescaling.
742
+
743
+ Returns:
744
+ (torch.Tensor): Rescaled masks.
741
745
  """
742
746
  mh, mw = masks.shape[2:]
743
747
  gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
@@ -759,10 +763,10 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False
759
763
 
760
764
  Args:
761
765
  img1_shape (tuple): The shape of the image that the coords are from.
762
- coords (torch.Tensor): the coords to be scaled of shape n,2.
763
- img0_shape (tuple): the shape of the image that the segmentation is being applied to.
764
- ratio_pad (tuple): the ratio of the image size to the padded image size.
765
- normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False.
766
+ coords (torch.Tensor): The coords to be scaled of shape n,2.
767
+ img0_shape (tuple): The shape of the image that the segmentation is being applied to.
768
+ ratio_pad (tuple): The ratio of the image size to the padded image size.
769
+ normalize (bool): If True, the coordinates will be normalized to the range [0, 1].
766
770
  padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
767
771
  rescaling.
768
772
 
@@ -809,14 +813,14 @@ def regularize_rboxes(rboxes):
809
813
 
810
814
  def masks2segments(masks, strategy="all"):
811
815
  """
812
- It takes a list of masks(n,h,w) and returns a list of segments(n,xy).
816
+ Convert masks to segments.
813
817
 
814
818
  Args:
815
- masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160)
816
- strategy (str): 'all' or 'largest'. Defaults to all
819
+ masks (torch.Tensor): The output of the model, which is a tensor of shape (batch_size, 160, 160).
820
+ strategy (str): 'all' or 'largest'.
817
821
 
818
822
  Returns:
819
- segments (List): list of segment masks
823
+ (List): List of segment masks.
820
824
  """
821
825
  from ultralytics.data.converter import merge_multi_segment
822
826
 
@@ -856,10 +860,10 @@ def clean_str(s):
856
860
  Cleans a string by replacing special characters with '_' character.
857
861
 
858
862
  Args:
859
- s (str): a string needing special characters replaced
863
+ s (str): A string needing special characters replaced.
860
864
 
861
865
  Returns:
862
- (str): a string with special characters replaced by an underscore _
866
+ (str): A string with special characters replaced by an underscore _.
863
867
  """
864
868
  return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
865
869
 
@@ -18,7 +18,7 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
18
18
 
19
19
  Args:
20
20
  filename (str): Path to the file to read.
21
- flags (int, optional): Flag that can take values of cv2.IMREAD_*. Defaults to cv2.IMREAD_COLOR.
21
+ flags (int, optional): Flag that can take values of cv2.IMREAD_*.
22
22
 
23
23
  Returns:
24
24
  (np.ndarray): The read image.
@@ -33,7 +33,7 @@ def imwrite(filename: str, img: np.ndarray, params=None):
33
33
  Args:
34
34
  filename (str): Path to the file to write.
35
35
  img (np.ndarray): Image to write.
36
- params (list of ints, optional): Additional parameters. See OpenCV documentation.
36
+ params (List[int], optional): Additional parameters for image encoding.
37
37
 
38
38
  Returns:
39
39
  (bool): True if the file was written, False otherwise.
@@ -47,7 +47,7 @@ def imwrite(filename: str, img: np.ndarray, params=None):
47
47
 
48
48
  def imshow(winname: str, mat: np.ndarray):
49
49
  """
50
- Displays an image in the specified window.
50
+ Display an image in the specified window.
51
51
 
52
52
  Args:
53
53
  winname (str): Name of the window.
@@ -88,11 +88,13 @@ def torch_load(*args, **kwargs):
88
88
 
89
89
  def torch_save(*args, **kwargs):
90
90
  """
91
- Optionally use dill to serialize lambda functions where pickle does not, adding robustness with 3 retries and
92
- exponential standoff in case of save failure.
91
+ Save PyTorch objects with retry mechanism for robustness.
92
+
93
+ This function wraps torch.save with 3 retries and exponential backoff in case of save failures, which can occur
94
+ due to device flushing delays or antivirus scanning.
93
95
 
94
96
  Args:
95
- *args (tuple): Positional arguments to pass to torch.save.
97
+ *args (Any): Positional arguments to pass to torch.save.
96
98
  **kwargs (Any): Keyword arguments to pass to torch.save.
97
99
  """
98
100
  for i in range(4): # 3 retries
@@ -25,9 +25,9 @@ class Colors:
25
25
  RGB values.
26
26
 
27
27
  Attributes:
28
- palette (list of tuple): List of RGB color values.
28
+ palette (List[Tuple]): List of RGB color values.
29
29
  n (int): The number of colors in the palette.
30
- pose_palette (np.ndarray): A specific color palette array with dtype np.uint8.
30
+ pose_palette (np.ndarray): A specific color palette array for pose estimation with dtype np.uint8.
31
31
 
32
32
  Examples:
33
33
  >>> from ultralytics.utils.plotting import Colors
@@ -142,13 +142,13 @@ class Colors:
142
142
  )
143
143
 
144
144
  def __call__(self, i, bgr=False):
145
- """Converts hex color codes to RGB values."""
145
+ """Convert hex color codes to RGB values."""
146
146
  c = self.palette[int(i) % self.n]
147
147
  return (c[2], c[1], c[0]) if bgr else c
148
148
 
149
149
  @staticmethod
150
150
  def hex2rgb(h):
151
- """Converts hex color codes to RGB values (i.e. default PIL order)."""
151
+ """Convert hex color codes to RGB values (i.e. default PIL order)."""
152
152
  return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4))
153
153
 
154
154
 
@@ -160,13 +160,15 @@ class Annotator:
160
160
  Ultralytics Annotator for train/val mosaics and JPGs and predictions annotations.
161
161
 
162
162
  Attributes:
163
- im (Image.Image or numpy array): The image to annotate.
163
+ im (Image.Image or np.ndarray): The image to annotate.
164
164
  pil (bool): Whether to use PIL or cv2 for drawing annotations.
165
165
  font (ImageFont.truetype or ImageFont.load_default): Font used for text annotations.
166
166
  lw (float): Line width for drawing.
167
167
  skeleton (List[List[int]]): Skeleton structure for keypoints.
168
168
  limb_color (List[int]): Color palette for limbs.
169
169
  kpt_color (List[int]): Color palette for keypoints.
170
+ dark_colors (set): Set of colors considered dark for text contrast.
171
+ light_colors (set): Set of colors considered light for text contrast.
170
172
 
171
173
  Examples:
172
174
  >>> from ultralytics.utils.plotting import Annotator
@@ -256,7 +258,7 @@ class Annotator:
256
258
  txt_color (tuple, optional): The color of the text (R, G, B).
257
259
 
258
260
  Returns:
259
- txt_color (tuple): Text color for label
261
+ (tuple): Text color for label.
260
262
 
261
263
  Examples:
262
264
  >>> from ultralytics.utils.plotting import Annotator
@@ -273,14 +275,14 @@ class Annotator:
273
275
 
274
276
  def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), rotated=False):
275
277
  """
276
- Draws a bounding box to image with label.
278
+ Draw a bounding box on an image with a given label.
277
279
 
278
280
  Args:
279
281
  box (tuple): The bounding box coordinates (x1, y1, x2, y2).
280
- label (str): The text label to be displayed.
282
+ label (str, optional): The text label to be displayed.
281
283
  color (tuple, optional): The background color of the rectangle (B, G, R).
282
284
  txt_color (tuple, optional): The color of the text (R, G, B).
283
- rotated (bool, optional): Variable used to check if task is OBB
285
+ rotated (bool, optional): Whether the task is oriented bounding box detection.
284
286
 
285
287
  Examples:
286
288
  >>> from ultralytics.utils.plotting import Annotator
@@ -340,11 +342,11 @@ class Annotator:
340
342
  Plot masks on image.
341
343
 
342
344
  Args:
343
- masks (tensor): Predicted masks on cuda, shape: [n, h, w]
344
- colors (List[List[Int]]): Colors for predicted masks, [[r, g, b] * n]
345
- im_gpu (tensor): Image is in cuda, shape: [3, h, w], range: [0, 1]
346
- alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque
347
- retina_masks (bool): Whether to use high resolution masks or not. Defaults to False.
345
+ masks (torch.Tensor): Predicted masks on cuda, shape: [n, h, w]
346
+ colors (List[List[int]]): Colors for predicted masks, [[r, g, b] * n]
347
+ im_gpu (torch.Tensor): Image is in cuda, shape: [3, h, w], range: [0, 1]
348
+ alpha (float, optional): Mask transparency: 0.0 fully transparent, 1.0 opaque.
349
+ retina_masks (bool, optional): Whether to use high resolution masks or not.
348
350
  """
349
351
  if self.pil:
350
352
  # Convert to numpy first
@@ -377,11 +379,11 @@ class Annotator:
377
379
 
378
380
  Args:
379
381
  kpts (torch.Tensor): Keypoints, shape [17, 3] (x, y, confidence).
380
- shape (tuple, optional): Image shape (h, w). Defaults to (640, 640).
381
- radius (int, optional): Keypoint radius. Defaults to 5.
382
- kpt_line (bool, optional): Draw lines between keypoints. Defaults to True.
383
- conf_thres (float, optional): Confidence threshold. Defaults to 0.25.
384
- kpt_color (tuple, optional): Keypoint color (B, G, R). Defaults to None.
382
+ shape (tuple, optional): Image shape (h, w).
383
+ radius (int, optional): Keypoint radius.
384
+ kpt_line (bool, optional): Draw lines between keypoints.
385
+ conf_thres (float, optional): Confidence threshold.
386
+ kpt_color (tuple, optional): Keypoint color (B, G, R).
385
387
 
386
388
  Note:
387
389
  - `kpt_line=True` currently only supports human pose plotting.
@@ -436,7 +438,16 @@ class Annotator:
436
438
  self.draw.rectangle(xy, fill, outline, width)
437
439
 
438
440
  def text(self, xy, text, txt_color=(255, 255, 255), anchor="top", box_style=False):
439
- """Adds text to an image using PIL or cv2."""
441
+ """
442
+ Add text to an image using PIL or cv2.
443
+
444
+ Args:
445
+ xy (List[int]): Top-left coordinates for text placement.
446
+ text (str): Text to be drawn.
447
+ txt_color (tuple, optional): Text color (R, G, B).
448
+ anchor (str, optional): Text anchor position ('top' or 'bottom').
449
+ box_style (bool, optional): Whether to draw text with a background box.
450
+ """
440
451
  if anchor == "bottom": # start y from font bottom
441
452
  w, h = self.font.getsize(text) # text width, height
442
453
  xy[1] += 1 - h
@@ -492,7 +503,7 @@ class Annotator:
492
503
  @staticmethod
493
504
  def get_bbox_dimension(bbox=None):
494
505
  """
495
- Calculate the area of a bounding box.
506
+ Calculate the dimensions and area of a bounding box.
496
507
 
497
508
  Args:
498
509
  bbox (tuple): Bounding box coordinates in the format (x_min, y_min, x_max, y_max).
@@ -517,7 +528,16 @@ class Annotator:
517
528
  @TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395
518
529
  @plt_settings()
519
530
  def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
520
- """Plot training labels including class histograms and box statistics."""
531
+ """
532
+ Plot training labels including class histograms and box statistics.
533
+
534
+ Args:
535
+ boxes (np.ndarray): Bounding box coordinates in format [x, y, width, height].
536
+ cls (np.ndarray): Class indices.
537
+ names (Dict, optional): Dictionary mapping class indices to class names.
538
+ save_dir (Path, optional): Directory to save the plot.
539
+ on_plot (Callable, optional): Function to call after plot is saved.
540
+ """
521
541
  import pandas # scope for faster 'import ultralytics'
522
542
  import seaborn # scope for faster 'import ultralytics'
523
543
 
@@ -580,25 +600,22 @@ def save_one_box(xyxy, im, file=Path("im.jpg"), gain=1.02, pad=10, square=False,
580
600
 
581
601
  Args:
582
602
  xyxy (torch.Tensor or list): A tensor or list representing the bounding box in xyxy format.
583
- im (numpy.ndarray): The input image.
584
- file (Path, optional): The path where the cropped image will be saved. Defaults to 'im.jpg'.
585
- gain (float, optional): A multiplicative factor to increase the size of the bounding box. Defaults to 1.02.
586
- pad (int, optional): The number of pixels to add to the width and height of the bounding box. Defaults to 10.
587
- square (bool, optional): If True, the bounding box will be transformed into a square. Defaults to False.
588
- BGR (bool, optional): If True, the image will be saved in BGR format, otherwise in RGB. Defaults to False.
589
- save (bool, optional): If True, the cropped image will be saved to disk. Defaults to True.
603
+ im (np.ndarray): The input image.
604
+ file (Path, optional): The path where the cropped image will be saved.
605
+ gain (float, optional): A multiplicative factor to increase the size of the bounding box.
606
+ pad (int, optional): The number of pixels to add to the width and height of the bounding box.
607
+ square (bool, optional): If True, the bounding box will be transformed into a square.
608
+ BGR (bool, optional): If True, the image will be saved in BGR format, otherwise in RGB.
609
+ save (bool, optional): If True, the cropped image will be saved to disk.
590
610
 
591
611
  Returns:
592
- (numpy.ndarray): The cropped image.
593
-
594
- Example:
595
- ```python
596
- from ultralytics.utils.plotting import save_one_box
612
+ (np.ndarray): The cropped image.
597
613
 
598
- xyxy = [50, 50, 150, 150]
599
- im = cv2.imread("image.jpg")
600
- cropped_im = save_one_box(xyxy, im, file="cropped.jpg", square=True)
601
- ```
614
+ Examples:
615
+ >>> from ultralytics.utils.plotting import save_one_box
616
+ >>> xyxy = [50, 50, 150, 150]
617
+ >>> im = cv2.imread("image.jpg")
618
+ >>> cropped_im = save_one_box(xyxy, im, file="cropped.jpg", square=True)
602
619
  """
603
620
  if not isinstance(xyxy, torch.Tensor): # may be list
604
621
  xyxy = torch.stack(xyxy)
@@ -656,7 +673,7 @@ def plot_images(
656
673
  conf_thres: Confidence threshold for displaying detections.
657
674
 
658
675
  Returns:
659
- np.ndarray: Plotted image grid as a numpy array if save is False, None otherwise.
676
+ (np.ndarray): Plotted image grid as a numpy array if save is False, None otherwise.
660
677
 
661
678
  Note:
662
679
  This function supports both tensor and numpy array inputs. It will automatically
@@ -792,20 +809,16 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
792
809
  pose estimation, and classification. Plots are saved as 'results.png' in the directory where the CSV is located.
793
810
 
794
811
  Args:
795
- file (str, optional): Path to the CSV file containing the training results. Defaults to 'path/to/results.csv'.
796
- dir (str, optional): Directory where the CSV file is located if 'file' is not provided. Defaults to ''.
797
- segment (bool, optional): Flag to indicate if the data is for segmentation. Defaults to False.
798
- pose (bool, optional): Flag to indicate if the data is for pose estimation. Defaults to False.
799
- classify (bool, optional): Flag to indicate if the data is for classification. Defaults to False.
812
+ file (str, optional): Path to the CSV file containing the training results.
813
+ dir (str, optional): Directory where the CSV file is located if 'file' is not provided.
814
+ segment (bool, optional): Flag to indicate if the data is for segmentation.
815
+ pose (bool, optional): Flag to indicate if the data is for pose estimation.
816
+ classify (bool, optional): Flag to indicate if the data is for classification.
800
817
  on_plot (callable, optional): Callback function to be executed after plotting. Takes filename as an argument.
801
- Defaults to None.
802
-
803
- Example:
804
- ```python
805
- from ultralytics.utils.plotting import plot_results
806
818
 
807
- plot_results("path/to/results.csv", segment=True)
808
- ```
819
+ Examples:
820
+ >>> from ultralytics.utils.plotting import plot_results
821
+ >>> plot_results("path/to/results.csv", segment=True)
809
822
  """
810
823
  import pandas as pd # scope for faster 'import ultralytics'
811
824
  from scipy.ndimage import gaussian_filter1d
@@ -851,15 +864,15 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
851
864
 
852
865
  def plt_color_scatter(v, f, bins=20, cmap="viridis", alpha=0.8, edgecolors="none"):
853
866
  """
854
- Plots a scatter plot with points colored based on a 2D histogram.
867
+ Plot a scatter plot with points colored based on a 2D histogram.
855
868
 
856
869
  Args:
857
870
  v (array-like): Values for the x-axis.
858
871
  f (array-like): Values for the y-axis.
859
- bins (int, optional): Number of bins for the histogram. Defaults to 20.
860
- cmap (str, optional): Colormap for the scatter plot. Defaults to 'viridis'.
861
- alpha (float, optional): Alpha for the scatter plot. Defaults to 0.8.
862
- edgecolors (str, optional): Edge colors for the scatter plot. Defaults to 'none'.
872
+ bins (int, optional): Number of bins for the histogram.
873
+ cmap (str, optional): Colormap for the scatter plot.
874
+ alpha (float, optional): Alpha for the scatter plot.
875
+ edgecolors (str, optional): Edge colors for the scatter plot.
863
876
 
864
877
  Examples:
865
878
  >>> v = np.random.rand(100)
@@ -886,7 +899,7 @@ def plot_tune_results(csv_file="tune_results.csv"):
886
899
  in the CSV, color-coded based on fitness scores. The best-performing configurations are highlighted on the plots.
887
900
 
888
901
  Args:
889
- csv_file (str, optional): Path to the CSV file containing the tuning results. Defaults to 'tune_results.csv'.
902
+ csv_file (str, optional): Path to the CSV file containing the tuning results.
890
903
 
891
904
  Examples:
892
905
  >>> plot_tune_results("path/to/tune_results.csv")
@@ -965,8 +978,8 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
965
978
  x (torch.Tensor): Features to be visualized.
966
979
  module_type (str): Module type.
967
980
  stage (int): Module stage within the model.
968
- n (int, optional): Maximum number of feature maps to plot. Defaults to 32.
969
- save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp').
981
+ n (int, optional): Maximum number of feature maps to plot.
982
+ save_dir (Path, optional): Directory to save results.
970
983
  """
971
984
  for m in {"Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"}: # all model heads
972
985
  if m in module_type: