ultralytics 8.3.89__py3-none-any.whl → 8.3.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. tests/conftest.py +2 -2
  2. tests/test_cli.py +13 -11
  3. tests/test_cuda.py +10 -1
  4. tests/test_exports.py +2 -2
  5. tests/test_integrations.py +1 -5
  6. tests/test_python.py +16 -16
  7. tests/test_solutions.py +9 -9
  8. ultralytics/__init__.py +1 -1
  9. ultralytics/cfg/__init__.py +3 -1
  10. ultralytics/cfg/models/11/yolo11-cls.yaml +5 -5
  11. ultralytics/cfg/models/11/yolo11-obb.yaml +5 -5
  12. ultralytics/cfg/models/11/yolo11-pose.yaml +5 -5
  13. ultralytics/cfg/models/11/yolo11-seg.yaml +5 -5
  14. ultralytics/cfg/models/11/yolo11.yaml +5 -5
  15. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +5 -5
  16. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +5 -5
  17. ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -5
  18. ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -5
  19. ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
  20. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -5
  21. ultralytics/cfg/models/v8/yolov8-world.yaml +5 -5
  22. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -5
  23. ultralytics/cfg/models/v8/yolov8.yaml +5 -5
  24. ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
  25. ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
  26. ultralytics/cfg/models/v9/yolov9e-seg.yaml +1 -1
  27. ultralytics/cfg/models/v9/yolov9e.yaml +1 -1
  28. ultralytics/cfg/models/v9/yolov9m.yaml +1 -1
  29. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  30. ultralytics/cfg/models/v9/yolov9t.yaml +1 -1
  31. ultralytics/data/annotator.py +9 -14
  32. ultralytics/data/base.py +118 -30
  33. ultralytics/data/build.py +63 -24
  34. ultralytics/data/converter.py +5 -5
  35. ultralytics/data/dataset.py +207 -53
  36. ultralytics/data/loaders.py +1 -0
  37. ultralytics/data/split_dota.py +39 -12
  38. ultralytics/data/utils.py +15 -19
  39. ultralytics/engine/exporter.py +24 -23
  40. ultralytics/engine/model.py +67 -88
  41. ultralytics/engine/predictor.py +106 -21
  42. ultralytics/engine/trainer.py +32 -23
  43. ultralytics/engine/tuner.py +21 -18
  44. ultralytics/engine/validator.py +75 -41
  45. ultralytics/hub/__init__.py +12 -13
  46. ultralytics/hub/auth.py +9 -12
  47. ultralytics/hub/session.py +76 -21
  48. ultralytics/hub/utils.py +19 -17
  49. ultralytics/models/fastsam/model.py +20 -11
  50. ultralytics/models/fastsam/predict.py +36 -16
  51. ultralytics/models/fastsam/utils.py +5 -5
  52. ultralytics/models/fastsam/val.py +6 -6
  53. ultralytics/models/nas/model.py +22 -11
  54. ultralytics/models/nas/predict.py +9 -4
  55. ultralytics/models/nas/val.py +5 -5
  56. ultralytics/models/rtdetr/model.py +20 -11
  57. ultralytics/models/rtdetr/predict.py +18 -15
  58. ultralytics/models/rtdetr/train.py +20 -16
  59. ultralytics/models/rtdetr/val.py +42 -6
  60. ultralytics/models/sam/__init__.py +1 -1
  61. ultralytics/models/sam/amg.py +50 -4
  62. ultralytics/models/sam/model.py +8 -14
  63. ultralytics/models/sam/modules/decoders.py +18 -21
  64. ultralytics/models/sam/modules/encoders.py +25 -46
  65. ultralytics/models/sam/modules/memory_attention.py +19 -15
  66. ultralytics/models/sam/modules/sam.py +18 -25
  67. ultralytics/models/sam/modules/tiny_encoder.py +19 -29
  68. ultralytics/models/sam/modules/transformer.py +35 -57
  69. ultralytics/models/sam/modules/utils.py +15 -15
  70. ultralytics/models/sam/predict.py +0 -3
  71. ultralytics/models/utils/loss.py +87 -36
  72. ultralytics/models/utils/ops.py +26 -31
  73. ultralytics/models/yolo/classify/predict.py +24 -3
  74. ultralytics/models/yolo/classify/train.py +77 -10
  75. ultralytics/models/yolo/classify/val.py +40 -15
  76. ultralytics/models/yolo/detect/predict.py +23 -10
  77. ultralytics/models/yolo/detect/train.py +85 -15
  78. ultralytics/models/yolo/detect/val.py +145 -21
  79. ultralytics/models/yolo/model.py +1 -2
  80. ultralytics/models/yolo/obb/predict.py +12 -4
  81. ultralytics/models/yolo/obb/train.py +7 -0
  82. ultralytics/models/yolo/obb/val.py +25 -7
  83. ultralytics/models/yolo/pose/predict.py +22 -6
  84. ultralytics/models/yolo/pose/train.py +17 -1
  85. ultralytics/models/yolo/pose/val.py +46 -21
  86. ultralytics/models/yolo/segment/predict.py +22 -8
  87. ultralytics/models/yolo/segment/train.py +6 -0
  88. ultralytics/models/yolo/segment/val.py +100 -14
  89. ultralytics/models/yolo/world/train.py +38 -8
  90. ultralytics/models/yolo/world/train_world.py +39 -10
  91. ultralytics/nn/autobackend.py +28 -14
  92. ultralytics/nn/modules/__init__.py +3 -0
  93. ultralytics/nn/modules/activation.py +12 -3
  94. ultralytics/nn/modules/block.py +587 -84
  95. ultralytics/nn/modules/conv.py +418 -54
  96. ultralytics/nn/modules/head.py +3 -4
  97. ultralytics/nn/modules/transformer.py +320 -34
  98. ultralytics/nn/modules/utils.py +17 -3
  99. ultralytics/nn/tasks.py +221 -69
  100. ultralytics/solutions/ai_gym.py +2 -2
  101. ultralytics/solutions/analytics.py +4 -4
  102. ultralytics/solutions/heatmap.py +4 -4
  103. ultralytics/solutions/instance_segmentation.py +10 -4
  104. ultralytics/solutions/object_blurrer.py +2 -2
  105. ultralytics/solutions/object_counter.py +2 -2
  106. ultralytics/solutions/object_cropper.py +2 -2
  107. ultralytics/solutions/parking_management.py +9 -9
  108. ultralytics/solutions/queue_management.py +1 -1
  109. ultralytics/solutions/region_counter.py +2 -2
  110. ultralytics/solutions/security_alarm.py +7 -7
  111. ultralytics/solutions/solutions.py +7 -4
  112. ultralytics/solutions/speed_estimation.py +2 -2
  113. ultralytics/solutions/streamlit_inference.py +6 -6
  114. ultralytics/solutions/trackzone.py +9 -2
  115. ultralytics/solutions/vision_eye.py +4 -4
  116. ultralytics/trackers/basetrack.py +1 -1
  117. ultralytics/trackers/bot_sort.py +23 -22
  118. ultralytics/trackers/byte_tracker.py +4 -4
  119. ultralytics/trackers/track.py +2 -1
  120. ultralytics/trackers/utils/gmc.py +26 -27
  121. ultralytics/trackers/utils/kalman_filter.py +31 -29
  122. ultralytics/trackers/utils/matching.py +7 -7
  123. ultralytics/utils/__init__.py +32 -27
  124. ultralytics/utils/autobatch.py +5 -5
  125. ultralytics/utils/benchmarks.py +111 -18
  126. ultralytics/utils/callbacks/base.py +3 -3
  127. ultralytics/utils/callbacks/clearml.py +11 -11
  128. ultralytics/utils/callbacks/comet.py +42 -24
  129. ultralytics/utils/callbacks/dvc.py +11 -10
  130. ultralytics/utils/callbacks/hub.py +8 -8
  131. ultralytics/utils/callbacks/mlflow.py +1 -1
  132. ultralytics/utils/callbacks/neptune.py +12 -10
  133. ultralytics/utils/callbacks/raytune.py +1 -1
  134. ultralytics/utils/callbacks/tensorboard.py +6 -6
  135. ultralytics/utils/callbacks/wb.py +16 -16
  136. ultralytics/utils/checks.py +116 -35
  137. ultralytics/utils/dist.py +15 -2
  138. ultralytics/utils/downloads.py +13 -9
  139. ultralytics/utils/files.py +12 -13
  140. ultralytics/utils/instance.py +112 -45
  141. ultralytics/utils/loss.py +28 -33
  142. ultralytics/utils/metrics.py +246 -181
  143. ultralytics/utils/ops.py +61 -53
  144. ultralytics/utils/patches.py +8 -6
  145. ultralytics/utils/plotting.py +65 -45
  146. ultralytics/utils/tal.py +88 -57
  147. ultralytics/utils/torch_utils.py +181 -33
  148. ultralytics/utils/triton.py +13 -3
  149. ultralytics/utils/tuner.py +8 -16
  150. {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/METADATA +1 -1
  151. ultralytics-8.3.91.dist-info/RECORD +250 -0
  152. ultralytics-8.3.89.dist-info/RECORD +0 -250
  153. {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/LICENSE +0 -0
  154. {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/WHEEL +0 -0
  155. {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/entry_points.txt +0 -0
  156. {ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/top_level.txt +0 -0
ultralytics/utils/ops.py CHANGED
@@ -18,6 +18,11 @@ class Profile(contextlib.ContextDecorator):
18
18
  """
19
19
  YOLOv8 Profile class. Use as a decorator with @Profile() or as a context manager with 'with Profile():'.
20
20
 
21
+ Attributes:
22
+ t (float): Accumulated time.
23
+ device (torch.device): Device used for model inference.
24
+ cuda (bool): Whether CUDA is being used.
25
+
21
26
  Examples:
22
27
  >>> from ultralytics.utils.ops import Profile
23
28
  >>> with Profile(device=device) as dt:
@@ -30,8 +35,8 @@ class Profile(contextlib.ContextDecorator):
30
35
  Initialize the Profile class.
31
36
 
32
37
  Args:
33
- t (float): Initial time. Defaults to 0.0.
34
- device (torch.device): Devices used for model inference. Defaults to None (cpu).
38
+ t (float): Initial time.
39
+ device (torch.device): Device used for model inference.
35
40
  """
36
41
  self.t = t
37
42
  self.device = device
@@ -63,12 +68,12 @@ def segment2box(segment, width=640, height=640):
63
68
  Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy).
64
69
 
65
70
  Args:
66
- segment (torch.Tensor): the segment label
67
- width (int): the width of the image. Defaults to 640
68
- height (int): The height of the image. Defaults to 640
71
+ segment (torch.Tensor): The segment label.
72
+ width (int): The width of the image.
73
+ height (int): The height of the image.
69
74
 
70
75
  Returns:
71
- (np.ndarray): the minimum and maximum x and y values of the segment.
76
+ (np.ndarray): The minimum and maximum x and y values of the segment.
72
77
  """
73
78
  x, y = segment.T # segment xy
74
79
  # any 3 out of 4 sides are outside the image, clip coordinates first, https://github.com/ultralytics/ultralytics/pull/18294
@@ -87,21 +92,20 @@ def segment2box(segment, width=640, height=640):
87
92
 
88
93
  def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False):
89
94
  """
90
- Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originally
91
- specified in (img1_shape) to the shape of a different image (img0_shape).
95
+ Rescale bounding boxes from img1_shape to img0_shape.
92
96
 
93
97
  Args:
94
98
  img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
95
- boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
96
- img0_shape (tuple): the shape of the target image, in the format of (height, width).
97
- ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
99
+ boxes (torch.Tensor): The bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2).
100
+ img0_shape (tuple): The shape of the target image, in the format of (height, width).
101
+ ratio_pad (tuple): A tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
98
102
  calculated based on the size difference between the two images.
99
103
  padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
100
104
  rescaling.
101
- xywh (bool): The box format is xywh or not, default=False.
105
+ xywh (bool): The box format is xywh or not.
102
106
 
103
107
  Returns:
104
- boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
108
+ (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2).
105
109
  """
106
110
  if ratio_pad is None: # calculate from img0_shape
107
111
  gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
@@ -146,8 +150,8 @@ def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
146
150
  Args:
147
151
  boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
148
152
  scores (torch.Tensor): Confidence scores, shape (N,).
149
- threshold (float, optional): IoU threshold. Defaults to 0.45.
150
- use_triu (bool, optional): Whether to use `torch.triu` operator. It'd be useful for disable it
153
+ threshold (float): IoU threshold.
154
+ use_triu (bool): Whether to use `torch.triu` operator. It'd be useful for disable it
151
155
  when exporting obb models to some formats that do not support `torch.triu`.
152
156
 
153
157
  Returns:
@@ -210,7 +214,7 @@ def non_max_suppression(
210
214
  list contains the apriori labels for a given image. The list should be in the format
211
215
  output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
212
216
  max_det (int): The maximum number of boxes to keep after NMS.
213
- nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
217
+ nc (int): The number of classes output by the model. Any indices after this will be considered masks.
214
218
  max_time_img (float): The maximum time (seconds) for processing one image.
215
219
  max_nms (int): The maximum number of boxes into torchvision.ops.nms().
216
220
  max_wh (int): The maximum box width and height in pixels.
@@ -333,7 +337,7 @@ def clip_boxes(boxes, shape):
333
337
  Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
334
338
 
335
339
  Args:
336
- boxes (torch.Tensor): The bounding boxes to clip.
340
+ boxes (torch.Tensor | numpy.ndarray): The bounding boxes to clip.
337
341
  shape (tuple): The shape of the image.
338
342
 
339
343
  Returns:
@@ -359,7 +363,7 @@ def clip_coords(coords, shape):
359
363
  shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
360
364
 
361
365
  Returns:
362
- (torch.Tensor | numpy.ndarray): Clipped coordinates
366
+ (torch.Tensor | numpy.ndarray): Clipped coordinates.
363
367
  """
364
368
  if isinstance(coords, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
365
369
  coords[..., 0] = coords[..., 0].clamp(0, shape[1]) # x
@@ -451,10 +455,11 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
451
455
 
452
456
  Args:
453
457
  x (np.ndarray | torch.Tensor): The bounding box coordinates.
454
- w (int): Width of the image. Defaults to 640
455
- h (int): Height of the image. Defaults to 640
456
- padw (int): Padding width. Defaults to 0
457
- padh (int): Padding height. Defaults to 0
458
+ w (int): Width of the image.
459
+ h (int): Height of the image.
460
+ padw (int): Padding width.
461
+ padh (int): Padding height.
462
+
458
463
  Returns:
459
464
  y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
460
465
  x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
@@ -475,10 +480,10 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
475
480
 
476
481
  Args:
477
482
  x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
478
- w (int): The width of the image. Defaults to 640
479
- h (int): The height of the image. Defaults to 640
480
- clip (bool): If True, the boxes will be clipped to the image boundaries. Defaults to False
481
- eps (float): The minimum value of the box's width and height. Defaults to 0.0
483
+ w (int): The width of the image.
484
+ h (int): The height of the image.
485
+ clip (bool): If True, the boxes will be clipped to the image boundaries.
486
+ eps (float): The minimum value of the box's width and height.
482
487
 
483
488
  Returns:
484
489
  y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height, normalized) format
@@ -598,13 +603,13 @@ def xywhr2xyxyxyxy(x):
598
603
 
599
604
  def ltwh2xyxy(x):
600
605
  """
601
- It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
606
+ Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
602
607
 
603
608
  Args:
604
- x (np.ndarray | torch.Tensor): the input image
609
+ x (np.ndarray | torch.Tensor): The input image.
605
610
 
606
611
  Returns:
607
- y (np.ndarray | torch.Tensor): the xyxy coordinates of the bounding boxes.
612
+ (np.ndarray | torch.Tensor): The xyxy coordinates of the bounding boxes.
608
613
  """
609
614
  y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
610
615
  y[..., 2] = x[..., 2] + x[..., 0] # width
@@ -614,13 +619,13 @@ def ltwh2xyxy(x):
614
619
 
615
620
  def segments2boxes(segments):
616
621
  """
617
- It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
622
+ Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
618
623
 
619
624
  Args:
620
- segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates
625
+ segments (List): List of segments, each segment is a list of points, each point is a list of x, y coordinates.
621
626
 
622
627
  Returns:
623
- (np.ndarray): the xywh coordinates of the bounding boxes.
628
+ (np.ndarray): The xywh coordinates of the bounding boxes.
624
629
  """
625
630
  boxes = []
626
631
  for s in segments:
@@ -634,11 +639,11 @@ def resample_segments(segments, n=1000):
634
639
  Inputs a list of segments (n,2) and returns a list of segments (n,2) up-sampled to n points each.
635
640
 
636
641
  Args:
637
- segments (list): a list of (n,2) arrays, where n is the number of points in the segment.
638
- n (int): number of points to resample the segment to. Defaults to 1000
642
+ segments (List): A list of (n,2) arrays, where n is the number of points in the segment.
643
+ n (int): Number of points to resample the segment to.
639
644
 
640
645
  Returns:
641
- segments (list): the resampled segments.
646
+ segments (List): The resampled segments.
642
647
  """
643
648
  for i, s in enumerate(segments):
644
649
  if len(s) == n:
@@ -655,14 +660,14 @@ def resample_segments(segments, n=1000):
655
660
 
656
661
  def crop_mask(masks, boxes):
657
662
  """
658
- It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
663
+ Crop masks to bounding boxes.
659
664
 
660
665
  Args:
661
- masks (torch.Tensor): [n, h, w] tensor of masks
662
- boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
666
+ masks (torch.Tensor): [n, h, w] tensor of masks.
667
+ boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form.
663
668
 
664
669
  Returns:
665
- (torch.Tensor): The masks are being cropped to the bounding box.
670
+ (torch.Tensor): Cropped masks.
666
671
  """
667
672
  _, h, w = masks.shape
668
673
  x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
@@ -681,7 +686,7 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
681
686
  masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
682
687
  bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
683
688
  shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
684
- upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
689
+ upsample (bool): A flag to indicate whether to upsample the mask to the original image size.
685
690
 
686
691
  Returns:
687
692
  (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
@@ -707,16 +712,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
707
712
 
708
713
  def process_mask_native(protos, masks_in, bboxes, shape):
709
714
  """
710
- It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
715
+ Apply masks to bounding boxes using the output of the mask head with native upsampling.
711
716
 
712
717
  Args:
713
- protos (torch.Tensor): [mask_dim, mask_h, mask_w]
718
+ protos (torch.Tensor): [mask_dim, mask_h, mask_w].
714
719
  masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
715
720
  bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
716
721
  shape (tuple): The size of the input image (h,w).
717
722
 
718
723
  Returns:
719
- masks (torch.Tensor): The returned masks with dimensions [h, w, n].
724
+ (torch.Tensor): The returned masks with dimensions [h, w, n].
720
725
  """
721
726
  c, mh, mw = protos.shape # CHW
722
727
  masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
@@ -734,6 +739,9 @@ def scale_masks(masks, shape, padding=True):
734
739
  shape (tuple): Height and width.
735
740
  padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
736
741
  rescaling.
742
+
743
+ Returns:
744
+ (torch.Tensor): Rescaled masks.
737
745
  """
738
746
  mh, mw = masks.shape[2:]
739
747
  gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
@@ -755,10 +763,10 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False
755
763
 
756
764
  Args:
757
765
  img1_shape (tuple): The shape of the image that the coords are from.
758
- coords (torch.Tensor): the coords to be scaled of shape n,2.
759
- img0_shape (tuple): the shape of the image that the segmentation is being applied to.
760
- ratio_pad (tuple): the ratio of the image size to the padded image size.
761
- normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False.
766
+ coords (torch.Tensor): The coords to be scaled of shape n,2.
767
+ img0_shape (tuple): The shape of the image that the segmentation is being applied to.
768
+ ratio_pad (tuple): The ratio of the image size to the padded image size.
769
+ normalize (bool): If True, the coordinates will be normalized to the range [0, 1].
762
770
  padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
763
771
  rescaling.
764
772
 
@@ -805,14 +813,14 @@ def regularize_rboxes(rboxes):
805
813
 
806
814
  def masks2segments(masks, strategy="all"):
807
815
  """
808
- It takes a list of masks(n,h,w) and returns a list of segments(n,xy).
816
+ Convert masks to segments.
809
817
 
810
818
  Args:
811
- masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160)
812
- strategy (str): 'all' or 'largest'. Defaults to all
819
+ masks (torch.Tensor): The output of the model, which is a tensor of shape (batch_size, 160, 160).
820
+ strategy (str): 'all' or 'largest'.
813
821
 
814
822
  Returns:
815
- segments (List): list of segment masks
823
+ (List): List of segment masks.
816
824
  """
817
825
  from ultralytics.data.converter import merge_multi_segment
818
826
 
@@ -852,10 +860,10 @@ def clean_str(s):
852
860
  Cleans a string by replacing special characters with '_' character.
853
861
 
854
862
  Args:
855
- s (str): a string needing special characters replaced
863
+ s (str): A string needing special characters replaced.
856
864
 
857
865
  Returns:
858
- (str): a string with special characters replaced by an underscore _
866
+ (str): A string with special characters replaced by an underscore _.
859
867
  """
860
868
  return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
861
869
 
@@ -18,7 +18,7 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
18
18
 
19
19
  Args:
20
20
  filename (str): Path to the file to read.
21
- flags (int, optional): Flag that can take values of cv2.IMREAD_*. Defaults to cv2.IMREAD_COLOR.
21
+ flags (int, optional): Flag that can take values of cv2.IMREAD_*.
22
22
 
23
23
  Returns:
24
24
  (np.ndarray): The read image.
@@ -33,7 +33,7 @@ def imwrite(filename: str, img: np.ndarray, params=None):
33
33
  Args:
34
34
  filename (str): Path to the file to write.
35
35
  img (np.ndarray): Image to write.
36
- params (list of ints, optional): Additional parameters. See OpenCV documentation.
36
+ params (List[int], optional): Additional parameters for image encoding.
37
37
 
38
38
  Returns:
39
39
  (bool): True if the file was written, False otherwise.
@@ -47,7 +47,7 @@ def imwrite(filename: str, img: np.ndarray, params=None):
47
47
 
48
48
  def imshow(winname: str, mat: np.ndarray):
49
49
  """
50
- Displays an image in the specified window.
50
+ Display an image in the specified window.
51
51
 
52
52
  Args:
53
53
  winname (str): Name of the window.
@@ -88,11 +88,13 @@ def torch_load(*args, **kwargs):
88
88
 
89
89
  def torch_save(*args, **kwargs):
90
90
  """
91
- Optionally use dill to serialize lambda functions where pickle does not, adding robustness with 3 retries and
92
- exponential standoff in case of save failure.
91
+ Save PyTorch objects with retry mechanism for robustness.
92
+
93
+ This function wraps torch.save with 3 retries and exponential backoff in case of save failures, which can occur
94
+ due to device flushing delays or antivirus scanning.
93
95
 
94
96
  Args:
95
- *args (tuple): Positional arguments to pass to torch.save.
97
+ *args (Any): Positional arguments to pass to torch.save.
96
98
  **kwargs (Any): Keyword arguments to pass to torch.save.
97
99
  """
98
100
  for i in range(4): # 3 retries
@@ -25,9 +25,9 @@ class Colors:
25
25
  RGB values.
26
26
 
27
27
  Attributes:
28
- palette (list of tuple): List of RGB color values.
28
+ palette (List[Tuple]): List of RGB color values.
29
29
  n (int): The number of colors in the palette.
30
- pose_palette (np.ndarray): A specific color palette array with dtype np.uint8.
30
+ pose_palette (np.ndarray): A specific color palette array for pose estimation with dtype np.uint8.
31
31
 
32
32
  Examples:
33
33
  >>> from ultralytics.utils.plotting import Colors
@@ -142,13 +142,13 @@ class Colors:
142
142
  )
143
143
 
144
144
  def __call__(self, i, bgr=False):
145
- """Converts hex color codes to RGB values."""
145
+ """Convert hex color codes to RGB values."""
146
146
  c = self.palette[int(i) % self.n]
147
147
  return (c[2], c[1], c[0]) if bgr else c
148
148
 
149
149
  @staticmethod
150
150
  def hex2rgb(h):
151
- """Converts hex color codes to RGB values (i.e. default PIL order)."""
151
+ """Convert hex color codes to RGB values (i.e. default PIL order)."""
152
152
  return tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4))
153
153
 
154
154
 
@@ -160,13 +160,15 @@ class Annotator:
160
160
  Ultralytics Annotator for train/val mosaics and JPGs and predictions annotations.
161
161
 
162
162
  Attributes:
163
- im (Image.Image or numpy array): The image to annotate.
163
+ im (Image.Image or np.ndarray): The image to annotate.
164
164
  pil (bool): Whether to use PIL or cv2 for drawing annotations.
165
165
  font (ImageFont.truetype or ImageFont.load_default): Font used for text annotations.
166
166
  lw (float): Line width for drawing.
167
167
  skeleton (List[List[int]]): Skeleton structure for keypoints.
168
168
  limb_color (List[int]): Color palette for limbs.
169
169
  kpt_color (List[int]): Color palette for keypoints.
170
+ dark_colors (set): Set of colors considered dark for text contrast.
171
+ light_colors (set): Set of colors considered light for text contrast.
170
172
 
171
173
  Examples:
172
174
  >>> from ultralytics.utils.plotting import Annotator
@@ -256,7 +258,7 @@ class Annotator:
256
258
  txt_color (tuple, optional): The color of the text (R, G, B).
257
259
 
258
260
  Returns:
259
- txt_color (tuple): Text color for label
261
+ (tuple): Text color for label.
260
262
 
261
263
  Examples:
262
264
  >>> from ultralytics.utils.plotting import Annotator
@@ -273,14 +275,14 @@ class Annotator:
273
275
 
274
276
  def box_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), rotated=False):
275
277
  """
276
- Draws a bounding box to image with label.
278
+ Draw a bounding box on an image with a given label.
277
279
 
278
280
  Args:
279
281
  box (tuple): The bounding box coordinates (x1, y1, x2, y2).
280
- label (str): The text label to be displayed.
282
+ label (str, optional): The text label to be displayed.
281
283
  color (tuple, optional): The background color of the rectangle (B, G, R).
282
284
  txt_color (tuple, optional): The color of the text (R, G, B).
283
- rotated (bool, optional): Variable used to check if task is OBB
285
+ rotated (bool, optional): Whether the task is oriented bounding box detection.
284
286
 
285
287
  Examples:
286
288
  >>> from ultralytics.utils.plotting import Annotator
@@ -340,11 +342,11 @@ class Annotator:
340
342
  Plot masks on image.
341
343
 
342
344
  Args:
343
- masks (tensor): Predicted masks on cuda, shape: [n, h, w]
344
- colors (List[List[Int]]): Colors for predicted masks, [[r, g, b] * n]
345
- im_gpu (tensor): Image is in cuda, shape: [3, h, w], range: [0, 1]
346
- alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque
347
- retina_masks (bool): Whether to use high resolution masks or not. Defaults to False.
345
+ masks (torch.Tensor): Predicted masks on cuda, shape: [n, h, w]
346
+ colors (List[List[int]]): Colors for predicted masks, [[r, g, b] * n]
347
+ im_gpu (torch.Tensor): Image is in cuda, shape: [3, h, w], range: [0, 1]
348
+ alpha (float, optional): Mask transparency: 0.0 fully transparent, 1.0 opaque.
349
+ retina_masks (bool, optional): Whether to use high resolution masks or not.
348
350
  """
349
351
  if self.pil:
350
352
  # Convert to numpy first
@@ -377,11 +379,11 @@ class Annotator:
377
379
 
378
380
  Args:
379
381
  kpts (torch.Tensor): Keypoints, shape [17, 3] (x, y, confidence).
380
- shape (tuple, optional): Image shape (h, w). Defaults to (640, 640).
381
- radius (int, optional): Keypoint radius. Defaults to 5.
382
- kpt_line (bool, optional): Draw lines between keypoints. Defaults to True.
383
- conf_thres (float, optional): Confidence threshold. Defaults to 0.25.
384
- kpt_color (tuple, optional): Keypoint color (B, G, R). Defaults to None.
382
+ shape (tuple, optional): Image shape (h, w).
383
+ radius (int, optional): Keypoint radius.
384
+ kpt_line (bool, optional): Draw lines between keypoints.
385
+ conf_thres (float, optional): Confidence threshold.
386
+ kpt_color (tuple, optional): Keypoint color (B, G, R).
385
387
 
386
388
  Note:
387
389
  - `kpt_line=True` currently only supports human pose plotting.
@@ -436,7 +438,16 @@ class Annotator:
436
438
  self.draw.rectangle(xy, fill, outline, width)
437
439
 
438
440
  def text(self, xy, text, txt_color=(255, 255, 255), anchor="top", box_style=False):
439
- """Adds text to an image using PIL or cv2."""
441
+ """
442
+ Add text to an image using PIL or cv2.
443
+
444
+ Args:
445
+ xy (List[int]): Top-left coordinates for text placement.
446
+ text (str): Text to be drawn.
447
+ txt_color (tuple, optional): Text color (R, G, B).
448
+ anchor (str, optional): Text anchor position ('top' or 'bottom').
449
+ box_style (bool, optional): Whether to draw text with a background box.
450
+ """
440
451
  if anchor == "bottom": # start y from font bottom
441
452
  w, h = self.font.getsize(text) # text width, height
442
453
  xy[1] += 1 - h
@@ -492,7 +503,7 @@ class Annotator:
492
503
  @staticmethod
493
504
  def get_bbox_dimension(bbox=None):
494
505
  """
495
- Calculate the area of a bounding box.
506
+ Calculate the dimensions and area of a bounding box.
496
507
 
497
508
  Args:
498
509
  bbox (tuple): Bounding box coordinates in the format (x_min, y_min, x_max, y_max).
@@ -517,7 +528,16 @@ class Annotator:
517
528
  @TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395
518
529
  @plt_settings()
519
530
  def plot_labels(boxes, cls, names=(), save_dir=Path(""), on_plot=None):
520
- """Plot training labels including class histograms and box statistics."""
531
+ """
532
+ Plot training labels including class histograms and box statistics.
533
+
534
+ Args:
535
+ boxes (np.ndarray): Bounding box coordinates in format [x, y, width, height].
536
+ cls (np.ndarray): Class indices.
537
+ names (Dict, optional): Dictionary mapping class indices to class names.
538
+ save_dir (Path, optional): Directory to save the plot.
539
+ on_plot (Callable, optional): Function to call after plot is saved.
540
+ """
521
541
  import pandas # scope for faster 'import ultralytics'
522
542
  import seaborn # scope for faster 'import ultralytics'
523
543
 
@@ -580,16 +600,16 @@ def save_one_box(xyxy, im, file=Path("im.jpg"), gain=1.02, pad=10, square=False,
580
600
 
581
601
  Args:
582
602
  xyxy (torch.Tensor or list): A tensor or list representing the bounding box in xyxy format.
583
- im (numpy.ndarray): The input image.
584
- file (Path, optional): The path where the cropped image will be saved. Defaults to 'im.jpg'.
585
- gain (float, optional): A multiplicative factor to increase the size of the bounding box. Defaults to 1.02.
586
- pad (int, optional): The number of pixels to add to the width and height of the bounding box. Defaults to 10.
587
- square (bool, optional): If True, the bounding box will be transformed into a square. Defaults to False.
588
- BGR (bool, optional): If True, the image will be saved in BGR format, otherwise in RGB. Defaults to False.
589
- save (bool, optional): If True, the cropped image will be saved to disk. Defaults to True.
603
+ im (np.ndarray): The input image.
604
+ file (Path, optional): The path where the cropped image will be saved.
605
+ gain (float, optional): A multiplicative factor to increase the size of the bounding box.
606
+ pad (int, optional): The number of pixels to add to the width and height of the bounding box.
607
+ square (bool, optional): If True, the bounding box will be transformed into a square.
608
+ BGR (bool, optional): If True, the image will be saved in BGR format, otherwise in RGB.
609
+ save (bool, optional): If True, the cropped image will be saved to disk.
590
610
 
591
611
  Returns:
592
- (numpy.ndarray): The cropped image.
612
+ (np.ndarray): The cropped image.
593
613
 
594
614
  Examples:
595
615
  >>> from ultralytics.utils.plotting import save_one_box
@@ -653,7 +673,7 @@ def plot_images(
653
673
  conf_thres: Confidence threshold for displaying detections.
654
674
 
655
675
  Returns:
656
- np.ndarray: Plotted image grid as a numpy array if save is False, None otherwise.
676
+ (np.ndarray): Plotted image grid as a numpy array if save is False, None otherwise.
657
677
 
658
678
  Note:
659
679
  This function supports both tensor and numpy array inputs. It will automatically
@@ -693,6 +713,7 @@ def plot_images(
693
713
 
694
714
  # Annotate
695
715
  fs = int((h + w) * ns * 0.01) # font size
716
+ fs = max(fs, 18) # ensure that the font size is large enough to be easily readable.
696
717
  annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
697
718
  for i in range(bs):
698
719
  x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin
@@ -789,13 +810,12 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
789
810
  pose estimation, and classification. Plots are saved as 'results.png' in the directory where the CSV is located.
790
811
 
791
812
  Args:
792
- file (str, optional): Path to the CSV file containing the training results. Defaults to 'path/to/results.csv'.
793
- dir (str, optional): Directory where the CSV file is located if 'file' is not provided. Defaults to ''.
794
- segment (bool, optional): Flag to indicate if the data is for segmentation. Defaults to False.
795
- pose (bool, optional): Flag to indicate if the data is for pose estimation. Defaults to False.
796
- classify (bool, optional): Flag to indicate if the data is for classification. Defaults to False.
813
+ file (str, optional): Path to the CSV file containing the training results.
814
+ dir (str, optional): Directory where the CSV file is located if 'file' is not provided.
815
+ segment (bool, optional): Flag to indicate if the data is for segmentation.
816
+ pose (bool, optional): Flag to indicate if the data is for pose estimation.
817
+ classify (bool, optional): Flag to indicate if the data is for classification.
797
818
  on_plot (callable, optional): Callback function to be executed after plotting. Takes filename as an argument.
798
- Defaults to None.
799
819
 
800
820
  Examples:
801
821
  >>> from ultralytics.utils.plotting import plot_results
@@ -845,15 +865,15 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
845
865
 
846
866
  def plt_color_scatter(v, f, bins=20, cmap="viridis", alpha=0.8, edgecolors="none"):
847
867
  """
848
- Plots a scatter plot with points colored based on a 2D histogram.
868
+ Plot a scatter plot with points colored based on a 2D histogram.
849
869
 
850
870
  Args:
851
871
  v (array-like): Values for the x-axis.
852
872
  f (array-like): Values for the y-axis.
853
- bins (int, optional): Number of bins for the histogram. Defaults to 20.
854
- cmap (str, optional): Colormap for the scatter plot. Defaults to 'viridis'.
855
- alpha (float, optional): Alpha for the scatter plot. Defaults to 0.8.
856
- edgecolors (str, optional): Edge colors for the scatter plot. Defaults to 'none'.
873
+ bins (int, optional): Number of bins for the histogram.
874
+ cmap (str, optional): Colormap for the scatter plot.
875
+ alpha (float, optional): Alpha for the scatter plot.
876
+ edgecolors (str, optional): Edge colors for the scatter plot.
857
877
 
858
878
  Examples:
859
879
  >>> v = np.random.rand(100)
@@ -880,7 +900,7 @@ def plot_tune_results(csv_file="tune_results.csv"):
880
900
  in the CSV, color-coded based on fitness scores. The best-performing configurations are highlighted on the plots.
881
901
 
882
902
  Args:
883
- csv_file (str, optional): Path to the CSV file containing the tuning results. Defaults to 'tune_results.csv'.
903
+ csv_file (str, optional): Path to the CSV file containing the tuning results.
884
904
 
885
905
  Examples:
886
906
  >>> plot_tune_results("path/to/tune_results.csv")
@@ -959,8 +979,8 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
959
979
  x (torch.Tensor): Features to be visualized.
960
980
  module_type (str): Module type.
961
981
  stage (int): Module stage within the model.
962
- n (int, optional): Maximum number of feature maps to plot. Defaults to 32.
963
- save_dir (Path, optional): Directory to save results. Defaults to Path('runs/detect/exp').
982
+ n (int, optional): Maximum number of feature maps to plot.
983
+ save_dir (Path, optional): Directory to save results.
964
984
  """
965
985
  for m in {"Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"}: # all model heads
966
986
  if m in module_type: