ultralytics 8.2.81__py3-none-any.whl → 8.2.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (97) hide show
  1. tests/test_solutions.py +0 -4
  2. ultralytics/__init__.py +1 -1
  3. ultralytics/cfg/__init__.py +14 -16
  4. ultralytics/data/annotator.py +1 -1
  5. ultralytics/data/augment.py +58 -58
  6. ultralytics/data/base.py +3 -3
  7. ultralytics/data/converter.py +7 -8
  8. ultralytics/data/explorer/explorer.py +7 -23
  9. ultralytics/data/loaders.py +1 -1
  10. ultralytics/data/split_dota.py +11 -3
  11. ultralytics/data/utils.py +6 -10
  12. ultralytics/engine/exporter.py +2 -4
  13. ultralytics/engine/model.py +47 -47
  14. ultralytics/engine/predictor.py +1 -1
  15. ultralytics/engine/results.py +28 -28
  16. ultralytics/engine/trainer.py +11 -8
  17. ultralytics/engine/tuner.py +7 -8
  18. ultralytics/engine/validator.py +3 -5
  19. ultralytics/hub/__init__.py +5 -5
  20. ultralytics/hub/auth.py +6 -2
  21. ultralytics/hub/session.py +3 -5
  22. ultralytics/models/fastsam/model.py +13 -10
  23. ultralytics/models/fastsam/predict.py +2 -2
  24. ultralytics/models/fastsam/utils.py +0 -1
  25. ultralytics/models/nas/model.py +4 -4
  26. ultralytics/models/nas/predict.py +1 -2
  27. ultralytics/models/nas/val.py +1 -1
  28. ultralytics/models/rtdetr/predict.py +1 -1
  29. ultralytics/models/rtdetr/train.py +1 -1
  30. ultralytics/models/rtdetr/val.py +1 -1
  31. ultralytics/models/sam/model.py +11 -11
  32. ultralytics/models/sam/modules/decoders.py +7 -4
  33. ultralytics/models/sam/modules/sam.py +9 -1
  34. ultralytics/models/sam/modules/tiny_encoder.py +1 -1
  35. ultralytics/models/sam/modules/transformer.py +0 -2
  36. ultralytics/models/sam/modules/utils.py +1 -1
  37. ultralytics/models/sam/predict.py +10 -10
  38. ultralytics/models/utils/loss.py +29 -17
  39. ultralytics/models/utils/ops.py +1 -5
  40. ultralytics/models/yolo/classify/predict.py +1 -1
  41. ultralytics/models/yolo/classify/train.py +1 -1
  42. ultralytics/models/yolo/classify/val.py +1 -1
  43. ultralytics/models/yolo/detect/predict.py +1 -1
  44. ultralytics/models/yolo/detect/train.py +1 -1
  45. ultralytics/models/yolo/detect/val.py +1 -1
  46. ultralytics/models/yolo/model.py +6 -2
  47. ultralytics/models/yolo/obb/predict.py +1 -1
  48. ultralytics/models/yolo/obb/train.py +1 -1
  49. ultralytics/models/yolo/obb/val.py +2 -2
  50. ultralytics/models/yolo/pose/predict.py +1 -1
  51. ultralytics/models/yolo/pose/train.py +1 -1
  52. ultralytics/models/yolo/pose/val.py +1 -1
  53. ultralytics/models/yolo/segment/predict.py +1 -1
  54. ultralytics/models/yolo/segment/train.py +1 -1
  55. ultralytics/models/yolo/segment/val.py +1 -1
  56. ultralytics/models/yolo/world/train.py +1 -1
  57. ultralytics/nn/autobackend.py +2 -2
  58. ultralytics/nn/modules/__init__.py +2 -2
  59. ultralytics/nn/modules/block.py +8 -20
  60. ultralytics/nn/modules/conv.py +1 -3
  61. ultralytics/nn/modules/head.py +16 -31
  62. ultralytics/nn/modules/transformer.py +0 -1
  63. ultralytics/nn/modules/utils.py +0 -1
  64. ultralytics/nn/tasks.py +11 -9
  65. ultralytics/solutions/__init__.py +1 -0
  66. ultralytics/solutions/ai_gym.py +0 -2
  67. ultralytics/solutions/analytics.py +1 -6
  68. ultralytics/solutions/heatmap.py +0 -1
  69. ultralytics/solutions/object_counter.py +0 -2
  70. ultralytics/solutions/queue_management.py +0 -2
  71. ultralytics/trackers/basetrack.py +1 -1
  72. ultralytics/trackers/byte_tracker.py +2 -2
  73. ultralytics/trackers/utils/gmc.py +5 -5
  74. ultralytics/trackers/utils/kalman_filter.py +1 -1
  75. ultralytics/trackers/utils/matching.py +1 -5
  76. ultralytics/utils/__init__.py +122 -23
  77. ultralytics/utils/autobatch.py +7 -4
  78. ultralytics/utils/benchmarks.py +6 -14
  79. ultralytics/utils/callbacks/base.py +0 -1
  80. ultralytics/utils/callbacks/comet.py +0 -1
  81. ultralytics/utils/callbacks/tensorboard.py +0 -1
  82. ultralytics/utils/checks.py +15 -18
  83. ultralytics/utils/downloads.py +6 -7
  84. ultralytics/utils/files.py +3 -4
  85. ultralytics/utils/instance.py +17 -7
  86. ultralytics/utils/metrics.py +15 -15
  87. ultralytics/utils/ops.py +8 -8
  88. ultralytics/utils/plotting.py +25 -35
  89. ultralytics/utils/tal.py +27 -18
  90. ultralytics/utils/torch_utils.py +12 -13
  91. ultralytics/utils/tuner.py +2 -3
  92. {ultralytics-8.2.81.dist-info → ultralytics-8.2.82.dist-info}/METADATA +1 -1
  93. {ultralytics-8.2.81.dist-info → ultralytics-8.2.82.dist-info}/RECORD +97 -97
  94. {ultralytics-8.2.81.dist-info → ultralytics-8.2.82.dist-info}/LICENSE +0 -0
  95. {ultralytics-8.2.81.dist-info → ultralytics-8.2.82.dist-info}/WHEEL +0 -0
  96. {ultralytics-8.2.81.dist-info → ultralytics-8.2.82.dist-info}/entry_points.txt +0 -0
  97. {ultralytics-8.2.81.dist-info → ultralytics-8.2.82.dist-info}/top_level.txt +0 -0
@@ -75,7 +75,7 @@ def delete_dsstore(path, files_to_delete=(".DS_Store", "__MACOSX")):
75
75
  ```python
76
76
  from ultralytics.utils.downloads import delete_dsstore
77
77
 
78
- delete_dsstore('path/to/dir')
78
+ delete_dsstore("path/to/dir")
79
79
  ```
80
80
 
81
81
  Note:
@@ -107,7 +107,7 @@ def zip_directory(directory, compress=True, exclude=(".DS_Store", "__MACOSX"), p
107
107
  ```python
108
108
  from ultralytics.utils.downloads import zip_directory
109
109
 
110
- file = zip_directory('path/to/dir')
110
+ file = zip_directory("path/to/dir")
111
111
  ```
112
112
  """
113
113
  from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile
@@ -153,7 +153,7 @@ def unzip_file(file, path=None, exclude=(".DS_Store", "__MACOSX"), exist_ok=Fals
153
153
  ```python
154
154
  from ultralytics.utils.downloads import unzip_file
155
155
 
156
- dir = unzip_file('path/to/file.zip')
156
+ dir = unzip_file("path/to/file.zip")
157
157
  ```
158
158
  """
159
159
  from zipfile import BadZipFile, ZipFile, is_zipfile
@@ -392,10 +392,9 @@ def get_github_assets(repo="ultralytics/assets", version="latest", retry=False):
392
392
 
393
393
  Example:
394
394
  ```python
395
- tag, assets = get_github_assets(repo='ultralytics/assets', version='latest')
395
+ tag, assets = get_github_assets(repo="ultralytics/assets", version="latest")
396
396
  ```
397
397
  """
398
-
399
398
  if version != "latest":
400
399
  version = f"tags/{version}" # i.e. tags/v6.2
401
400
  url = f"https://api.github.com/repos/{repo}/releases/{version}"
@@ -425,7 +424,7 @@ def attempt_download_asset(file, repo="ultralytics/assets", release="v8.2.0", **
425
424
 
426
425
  Example:
427
426
  ```python
428
- file_path = attempt_download_asset('yolov8n.pt', repo='ultralytics/assets', release='latest')
427
+ file_path = attempt_download_asset("yolov8n.pt", repo="ultralytics/assets", release="latest")
429
428
  ```
430
429
  """
431
430
  from ultralytics.utils import SETTINGS # scoped for circular import
@@ -480,7 +479,7 @@ def download(url, dir=Path.cwd(), unzip=True, delete=False, curl=False, threads=
480
479
 
481
480
  Example:
482
481
  ```python
483
- download('https://ultralytics.com/assets/example.zip', dir='path/to/dir', unzip=True)
482
+ download("https://ultralytics.com/assets/example.zip", dir="path/to/dir", unzip=True)
484
483
  ```
485
484
  """
486
485
  dir = Path(dir)
@@ -28,13 +28,13 @@ class WorkingDirectory(contextlib.ContextDecorator):
28
28
  Examples:
29
29
  Using as a context manager:
30
30
  >>> with WorkingDirectory('/path/to/new/dir'):
31
- >>> # Perform operations in the new directory
31
+ >>> # Perform operations in the new directory
32
32
  >>> pass
33
33
 
34
34
  Using as a decorator:
35
35
  >>> @WorkingDirectory('/path/to/new/dir')
36
36
  >>> def some_function():
37
- >>> # Perform operations in the new directory
37
+ >>> # Perform operations in the new directory
38
38
  >>> pass
39
39
  """
40
40
 
@@ -69,9 +69,8 @@ def spaces_in_path(path):
69
69
  Use the context manager to handle paths with spaces:
70
70
  >>> from ultralytics.utils.files import spaces_in_path
71
71
  >>> with spaces_in_path('/path/with spaces') as new_path:
72
- >>> # Your code here
72
+ >>> # Your code here
73
73
  """
74
-
75
74
  # If path has spaces, replace them with underscores
76
75
  if " " in str(path):
77
76
  string = isinstance(path, str) # input type
@@ -96,8 +96,11 @@ class Bboxes:
96
96
 
97
97
  def mul(self, scale):
98
98
  """
99
+ Multiply bounding box coordinates by scale factor(s).
100
+
99
101
  Args:
100
- scale (tuple | list | int): the scale for four coords.
102
+ scale (int | tuple | list): Scale factor(s) for four coordinates.
103
+ If int, the same scale is applied to all coordinates.
101
104
  """
102
105
  if isinstance(scale, Number):
103
106
  scale = to_4tuple(scale)
@@ -110,8 +113,11 @@ class Bboxes:
110
113
 
111
114
  def add(self, offset):
112
115
  """
116
+ Add offset to bounding box coordinates.
117
+
113
118
  Args:
114
- offset (tuple | list | int): the offset for four coords.
119
+ offset (int | tuple | list): Offset(s) for four coordinates.
120
+ If int, the same offset is applied to all coordinates.
115
121
  """
116
122
  if isinstance(offset, Number):
117
123
  offset = to_4tuple(offset)
@@ -199,7 +205,7 @@ class Instances:
199
205
  instances = Instances(
200
206
  bboxes=np.array([[10, 10, 30, 30], [20, 20, 40, 40]]),
201
207
  segments=[np.array([[5, 5], [10, 10]]), np.array([[15, 15], [20, 20]])],
202
- keypoints=np.array([[[5, 5, 1], [10, 10, 1]], [[15, 15, 1], [20, 20, 1]]])
208
+ keypoints=np.array([[[5, 5, 1], [10, 10, 1]], [[15, 15, 1], [20, 20, 1]]]),
203
209
  )
204
210
  ```
205
211
 
@@ -210,10 +216,14 @@ class Instances:
210
216
 
211
217
  def __init__(self, bboxes, segments=None, keypoints=None, bbox_format="xywh", normalized=True) -> None:
212
218
  """
219
+ Initialize the object with bounding boxes, segments, and keypoints.
220
+
213
221
  Args:
214
- bboxes (ndarray): bboxes with shape [N, 4].
215
- segments (list | ndarray): segments.
216
- keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3].
222
+ bboxes (np.ndarray): Bounding boxes, shape [N, 4].
223
+ segments (list | np.ndarray, optional): Segmentation masks. Defaults to None.
224
+ keypoints (np.ndarray, optional): Keypoints, shape [N, 17, 3] and format (x, y, visible). Defaults to None.
225
+ bbox_format (str, optional): Format of bboxes. Defaults to "xywh".
226
+ normalized (bool, optional): Whether the coordinates are normalized. Defaults to True.
217
227
  """
218
228
  self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format)
219
229
  self.keypoints = keypoints
@@ -230,7 +240,7 @@ class Instances:
230
240
  return self._bboxes.areas()
231
241
 
232
242
  def scale(self, scale_w, scale_h, bbox_only=False):
233
- """This might be similar with denormalize func but without normalized sign."""
243
+ """Similar to denormalize func but without normalized sign."""
234
244
  self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
235
245
  if bbox_only:
236
246
  return
@@ -30,7 +30,6 @@ def bbox_ioa(box1, box2, iou=False, eps=1e-7):
30
30
  Returns:
31
31
  (np.ndarray): A numpy array of shape (n, m) representing the intersection over box2 area.
32
32
  """
33
-
34
33
  # Get the coordinates of bounding boxes
35
34
  b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
36
35
  b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
@@ -53,7 +52,7 @@ def bbox_ioa(box1, box2, iou=False, eps=1e-7):
53
52
  def box_iou(box1, box2, eps=1e-7):
54
53
  """
55
54
  Calculate intersection-over-union (IoU) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
56
- Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
55
+ Based on https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py.
57
56
 
58
57
  Args:
59
58
  box1 (torch.Tensor): A tensor of shape (N, 4) representing N bounding boxes.
@@ -63,7 +62,6 @@ def box_iou(box1, box2, eps=1e-7):
63
62
  Returns:
64
63
  (torch.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.
65
64
  """
66
-
67
65
  # NOTE: Need .float() to get accurate iou values
68
66
  # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
69
67
  (a1, a2), (b1, b2) = box1.float().unsqueeze(1).chunk(2, 2), box2.float().unsqueeze(0).chunk(2, 2)
@@ -90,7 +88,6 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7
90
88
  Returns:
91
89
  (torch.Tensor): IoU, GIoU, DIoU, or CIoU values depending on the specified flags.
92
90
  """
93
-
94
91
  # Get the coordinates of bounding boxes
95
92
  if xywh: # transform from xywh to xyxy
96
93
  (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
@@ -195,15 +192,22 @@ def _get_covariance_matrix(boxes):
195
192
 
196
193
  def probiou(obb1, obb2, CIoU=False, eps=1e-7):
197
194
  """
198
- Calculate the prob IoU between oriented bounding boxes, https://arxiv.org/pdf/2106.06072v1.pdf.
195
+ Calculate probabilistic IoU between oriented bounding boxes.
196
+
197
+ Implements the algorithm from https://arxiv.org/pdf/2106.06072v1.pdf.
199
198
 
200
199
  Args:
201
- obb1 (torch.Tensor): A tensor of shape (N, 5) representing ground truth obbs, with xywhr format.
202
- obb2 (torch.Tensor): A tensor of shape (N, 5) representing predicted obbs, with xywhr format.
203
- eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
200
+ obb1 (torch.Tensor): Ground truth OBBs, shape (N, 5), format xywhr.
201
+ obb2 (torch.Tensor): Predicted OBBs, shape (N, 5), format xywhr.
202
+ CIoU (bool, optional): If True, calculate CIoU. Defaults to False.
203
+ eps (float, optional): Small value to avoid division by zero. Defaults to 1e-7.
204
204
 
205
205
  Returns:
206
- (torch.Tensor): A tensor of shape (N, ) representing obb similarities.
206
+ (torch.Tensor): OBB similarities, shape (N,).
207
+
208
+ Note:
209
+ OBB format: [center_x, center_y, width, height, rotation_angle].
210
+ If CIoU is True, returns CIoU instead of IoU.
207
211
  """
208
212
  x1, y1 = obb1[..., :2].split(1, dim=-1)
209
213
  x2, y2 = obb2[..., :2].split(1, dim=-1)
@@ -507,7 +511,6 @@ def compute_ap(recall, precision):
507
511
  (np.ndarray): Precision envelope curve.
508
512
  (np.ndarray): Modified recall curve with sentinel values added at the beginning and end.
509
513
  """
510
-
511
514
  # Append sentinel values to beginning and end
512
515
  mrec = np.concatenate(([0.0], recall, [1.0]))
513
516
  mpre = np.concatenate(([1.0], precision, [0.0]))
@@ -560,7 +563,6 @@ def ap_per_class(
560
563
  x (np.ndarray): X-axis values for the curves. Shape: (1000,).
561
564
  prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
562
565
  """
563
-
564
566
  # Sort by objectness
565
567
  i = np.argsort(-conf)
566
568
  tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
@@ -792,8 +794,8 @@ class Metric(SimpleClass):
792
794
 
793
795
  class DetMetrics(SimpleClass):
794
796
  """
795
- This class is a utility class for computing detection metrics such as precision, recall, and mean average precision
796
- (mAP) of an object detection model.
797
+ Utility class for computing detection metrics such as precision, recall, and mean average precision (mAP) of an
798
+ object detection model.
797
799
 
798
800
  Args:
799
801
  save_dir (Path): A path to the directory where the output plots will be saved. Defaults to current directory.
@@ -942,7 +944,6 @@ class SegmentMetrics(SimpleClass):
942
944
  pred_cls (list): List of predicted classes.
943
945
  target_cls (list): List of target classes.
944
946
  """
945
-
946
947
  results_mask = ap_per_class(
947
948
  tp_m,
948
949
  conf,
@@ -1084,7 +1085,6 @@ class PoseMetrics(SegmentMetrics):
1084
1085
  pred_cls (list): List of predicted classes.
1085
1086
  target_cls (list): List of target classes.
1086
1087
  """
1087
-
1088
1088
  results_pose = ap_per_class(
1089
1089
  tp_p,
1090
1090
  conf,
ultralytics/utils/ops.py CHANGED
@@ -141,14 +141,15 @@ def make_divisible(x, divisor):
141
141
 
142
142
  def nms_rotated(boxes, scores, threshold=0.45):
143
143
  """
144
- NMS for obbs, powered by probiou and fast-nms.
144
+ NMS for oriented bounding boxes using probiou and fast-nms.
145
145
 
146
146
  Args:
147
- boxes (torch.Tensor): (N, 5), xywhr.
148
- scores (torch.Tensor): (N, ).
149
- threshold (float): IoU threshold.
147
+ boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
148
+ scores (torch.Tensor): Confidence scores, shape (N,).
149
+ threshold (float, optional): IoU threshold. Defaults to 0.45.
150
150
 
151
151
  Returns:
152
+ (torch.Tensor): Indices of boxes to keep after NMS.
152
153
  """
153
154
  if len(boxes) == 0:
154
155
  return np.empty((0,), dtype=np.int8)
@@ -597,7 +598,7 @@ def ltwh2xyxy(x):
597
598
 
598
599
  def segments2boxes(segments):
599
600
  """
600
- It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
601
+ It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
601
602
 
602
603
  Args:
603
604
  segments (list): list of segments, each segment is a list of points, each point is a list of x, y coordinates
@@ -667,7 +668,6 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
667
668
  (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
668
669
  are the height and width of the input image. The mask is applied to the bounding boxes.
669
670
  """
670
-
671
671
  c, mh, mw = protos.shape # CHW
672
672
  ih, iw = shape
673
673
  masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # CHW
@@ -785,7 +785,7 @@ def regularize_rboxes(rboxes):
785
785
 
786
786
  def masks2segments(masks, strategy="largest"):
787
787
  """
788
- It takes a list of masks(n,h,w) and returns a list of segments(n,xy)
788
+ It takes a list of masks(n,h,w) and returns a list of segments(n,xy).
789
789
 
790
790
  Args:
791
791
  masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160)
@@ -823,7 +823,7 @@ def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
823
823
 
824
824
  def clean_str(s):
825
825
  """
826
- Cleans a string by replacing special characters with underscore _
826
+ Cleans a string by replacing special characters with '_' character.
827
827
 
828
828
  Args:
829
829
  s (str): a string needing special characters replaced
@@ -204,7 +204,6 @@ class Annotator:
204
204
  txt_color (tuple, optional): The color of the text (R, G, B).
205
205
  margin (int, optional): The margin between the text and the rectangle border.
206
206
  """
207
-
208
207
  # If label have more than 3 characters, skip other characters, due to circle size
209
208
  if len(label) > 3:
210
209
  print(
@@ -246,7 +245,6 @@ class Annotator:
246
245
  txt_color (tuple, optional): The color of the text (R, G, B).
247
246
  margin (int, optional): The margin between the text and the rectangle border.
248
247
  """
249
-
250
248
  # Calculate the center of the bounding box
251
249
  x_center, y_center = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
252
250
  # Get the size of the text
@@ -284,7 +282,6 @@ class Annotator:
284
282
  txt_color (tuple, optional): The color of the text (R, G, B).
285
283
  rotated (bool, optional): Variable used to check if task is OBB
286
284
  """
287
-
288
285
  txt_color = self.get_txt_color(color, txt_color)
289
286
  if isinstance(box, torch.Tensor):
290
287
  box = box.tolist()
@@ -343,7 +340,6 @@ class Annotator:
343
340
  alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque
344
341
  retina_masks (bool): Whether to use high resolution masks or not. Defaults to False.
345
342
  """
346
-
347
343
  if self.pil:
348
344
  # Convert to numpy first
349
345
  self.im = np.asarray(self.im).copy()
@@ -374,17 +370,18 @@ class Annotator:
374
370
  Plot keypoints on the image.
375
371
 
376
372
  Args:
377
- kpts (tensor): Predicted keypoints with shape [17, 3]. Each keypoint has (x, y, confidence).
378
- shape (tuple): Image shape as a tuple (h, w), where h is the height and w is the width.
379
- radius (int, optional): Radius of the drawn keypoints. Default is 5.
380
- kpt_line (bool, optional): If True, the function will draw lines connecting keypoints
381
- for human pose. Default is True.
382
- kpt_color (tuple, optional): The color of the keypoints (B, G, R).
373
+ kpts (torch.Tensor): Keypoints, shape [17, 3] (x, y, confidence).
374
+ shape (tuple, optional): Image shape (h, w). Defaults to (640, 640).
375
+ radius (int, optional): Keypoint radius. Defaults to 5.
376
+ kpt_line (bool, optional): Draw lines between keypoints. Defaults to True.
377
+ conf_thres (float, optional): Confidence threshold. Defaults to 0.25.
378
+ kpt_color (tuple, optional): Keypoint color (B, G, R). Defaults to None.
383
379
 
384
380
  Note:
385
- `kpt_line=True` currently only supports human pose plotting.
381
+ - `kpt_line=True` currently only supports human pose plotting.
382
+ - Modifies self.im in-place.
383
+ - If self.pil is True, converts image to numpy array and back to PIL.
386
384
  """
387
-
388
385
  if self.pil:
389
386
  # Convert to numpy first
390
387
  self.im = np.asarray(self.im).copy()
@@ -488,7 +485,6 @@ class Annotator:
488
485
  Returns:
489
486
  angle (degree): Degree value of angle between three points
490
487
  """
491
-
492
488
  x_min, y_min, x_max, y_max = bbox
493
489
  width = x_max - x_min
494
490
  height = y_max - y_min
@@ -503,7 +499,6 @@ class Annotator:
503
499
  color (tuple): Region Color value
504
500
  thickness (int): Region area thickness value
505
501
  """
506
-
507
502
  cv2.polylines(self.im, [np.array(reg_pts, dtype=np.int32)], isClosed=True, color=color, thickness=thickness)
508
503
 
509
504
  def draw_centroid_and_tracks(self, track, color=(255, 0, 255), track_thickness=2):
@@ -515,7 +510,6 @@ class Annotator:
515
510
  color (tuple): tracks line color
516
511
  track_thickness (int): track line thickness value
517
512
  """
518
-
519
513
  points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
520
514
  cv2.polylines(self.im, [points], isClosed=False, color=color, thickness=track_thickness)
521
515
  cv2.circle(self.im, (int(track[-1][0]), int(track[-1][1])), track_thickness * 2, color, -1)
@@ -530,7 +524,6 @@ class Annotator:
530
524
  region_color (RGB): queue region color
531
525
  txt_color (RGB): text display color
532
526
  """
533
-
534
527
  x_values = [point[0] for point in points]
535
528
  y_values = [point[1] for point in points]
536
529
  center_x = sum(x_values) // len(points)
@@ -574,7 +567,6 @@ class Annotator:
574
567
  y_center (float): y position center point for bounding box
575
568
  margin (int): gap between text and rectangle for better display
576
569
  """
577
-
578
570
  text_size = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0]
579
571
  text_x = x_center - text_size[0] // 2
580
572
  text_y = y_center + text_size[1] // 2
@@ -597,7 +589,6 @@ class Annotator:
597
589
  bg_color (bgr color): display color for text background
598
590
  margin (int): gap between text and rectangle for better display
599
591
  """
600
-
601
592
  horizontal_gap = int(im0.shape[1] * 0.02)
602
593
  vertical_gap = int(im0.shape[0] * 0.01)
603
594
  text_y_offset = 0
@@ -629,7 +620,6 @@ class Annotator:
629
620
  Returns:
630
621
  angle (degree): Degree value of angle between three points
631
622
  """
632
-
633
623
  a, b, c = np.array(a), np.array(b), np.array(c)
634
624
  radians = np.arctan2(c[1] - b[1], c[0] - b[0]) - np.arctan2(a[1] - b[1], a[0] - b[0])
635
625
  angle = np.abs(radians * 180.0 / np.pi)
@@ -642,12 +632,19 @@ class Annotator:
642
632
  Draw specific keypoints for gym steps counting.
643
633
 
644
634
  Args:
645
- keypoints (list): list of keypoints data to be plotted
646
- indices (list): keypoints ids list to be plotted
647
- shape (tuple): imgsz for model inference
648
- radius (int): Keypoint radius value
649
- """
635
+ keypoints (list): Keypoints data to be plotted.
636
+ indices (list, optional): Keypoint indices to be plotted. Defaults to [2, 5, 7].
637
+ shape (tuple, optional): Image size for model inference. Defaults to (640, 640).
638
+ radius (int, optional): Keypoint radius. Defaults to 2.
639
+ conf_thres (float, optional): Confidence threshold for keypoints. Defaults to 0.25.
640
+
641
+ Returns:
642
+ (numpy.ndarray): Image with drawn keypoints.
650
643
 
644
+ Note:
645
+ Keypoint format: [x, y] or [x, y, confidence].
646
+ Modifies self.im in-place.
647
+ """
651
648
  if indices is None:
652
649
  indices = [2, 5, 7]
653
650
  for i, k in enumerate(keypoints):
@@ -675,7 +672,6 @@ class Annotator:
675
672
  color (tuple): text background color for workout monitoring
676
673
  txt_color (tuple): text foreground color for workout monitoring
677
674
  """
678
-
679
675
  angle_text, count_text, stage_text = (f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}")
680
676
 
681
677
  # Draw angle
@@ -744,7 +740,6 @@ class Annotator:
744
740
  label (str): Detection label text
745
741
  txt_color (RGB): text color
746
742
  """
747
-
748
743
  cv2.polylines(self.im, [np.int32([mask])], isClosed=True, color=mask_color, thickness=2)
749
744
  text_size, _ = cv2.getTextSize(label, 0, self.sf, self.tf)
750
745
 
@@ -772,7 +767,6 @@ class Annotator:
772
767
  line_color (RGB): Distance line color.
773
768
  centroid_color (RGB): Bounding box centroid color.
774
769
  """
775
-
776
770
  (text_width_m, text_height_m), _ = cv2.getTextSize(f"Distance M: {distance_m:.2f}m", 0, self.sf, self.tf)
777
771
  cv2.rectangle(self.im, (15, 25), (15 + text_width_m + 10, 25 + text_height_m + 20), line_color, -1)
778
772
  cv2.putText(
@@ -813,7 +807,6 @@ class Annotator:
813
807
  color (tuple): object centroid and line color value
814
808
  pin_color (tuple): visioneye point color value
815
809
  """
816
-
817
810
  center_bbox = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
818
811
  cv2.circle(self.im, center_point, self.tf * 2, pin_color, -1)
819
812
  cv2.circle(self.im, center_bbox, self.tf * 2, color, -1)
@@ -902,11 +895,10 @@ def save_one_box(xyxy, im, file=Path("im.jpg"), gain=1.02, pad=10, square=False,
902
895
  from ultralytics.utils.plotting import save_one_box
903
896
 
904
897
  xyxy = [50, 50, 150, 150]
905
- im = cv2.imread('image.jpg')
906
- cropped_im = save_one_box(xyxy, im, file='cropped.jpg', square=True)
898
+ im = cv2.imread("image.jpg")
899
+ cropped_im = save_one_box(xyxy, im, file="cropped.jpg", square=True)
907
900
  ```
908
901
  """
909
-
910
902
  if not isinstance(xyxy, torch.Tensor): # may be list
911
903
  xyxy = torch.stack(xyxy)
912
904
  b = ops.xyxy2xywh(xyxy.view(-1, 4)) # boxes
@@ -1109,7 +1101,7 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
1109
1101
  ```python
1110
1102
  from ultralytics.utils.plotting import plot_results
1111
1103
 
1112
- plot_results('path/to/results.csv', segment=True)
1104
+ plot_results("path/to/results.csv", segment=True)
1113
1105
  ```
1114
1106
  """
1115
1107
  import pandas as pd # scope for faster 'import ultralytics'
@@ -1171,7 +1163,6 @@ def plt_color_scatter(v, f, bins=20, cmap="viridis", alpha=0.8, edgecolors="none
1171
1163
  >>> f = np.random.rand(100)
1172
1164
  >>> plt_color_scatter(v, f)
1173
1165
  """
1174
-
1175
1166
  # Calculate 2D histogram and corresponding colors
1176
1167
  hist, xedges, yedges = np.histogram2d(v, f, bins=bins)
1177
1168
  colors = [
@@ -1195,9 +1186,8 @@ def plot_tune_results(csv_file="tune_results.csv"):
1195
1186
  csv_file (str, optional): Path to the CSV file containing the tuning results. Defaults to 'tune_results.csv'.
1196
1187
 
1197
1188
  Examples:
1198
- >>> plot_tune_results('path/to/tune_results.csv')
1189
+ >>> plot_tune_results("path/to/tune_results.csv")
1199
1190
  """
1200
-
1201
1191
  import pandas as pd # scope for faster 'import ultralytics'
1202
1192
  from scipy.ndimage import gaussian_filter1d
1203
1193
 
ultralytics/utils/tal.py CHANGED
@@ -140,7 +140,6 @@ class TaskAlignedAssigner(nn.Module):
140
140
  Returns:
141
141
  (Tensor): A tensor of shape (b, max_num_obj, h*w) containing the selected top-k candidates.
142
142
  """
143
-
144
143
  # (b, max_num_obj, topk)
145
144
  topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest)
146
145
  if topk_mask is None:
@@ -184,7 +183,6 @@ class TaskAlignedAssigner(nn.Module):
184
183
  for positive anchor points, where num_classes is the number
185
184
  of object classes.
186
185
  """
187
-
188
186
  # Assigned target labels, (b, 1)
189
187
  batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None]
190
188
  target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w)
@@ -212,14 +210,19 @@ class TaskAlignedAssigner(nn.Module):
212
210
  @staticmethod
213
211
  def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9):
214
212
  """
215
- Select the positive anchor center in gt.
213
+ Select positive anchor centers within ground truth bounding boxes.
216
214
 
217
215
  Args:
218
- xy_centers (Tensor): shape(h*w, 2)
219
- gt_bboxes (Tensor): shape(b, n_boxes, 4)
216
+ xy_centers (torch.Tensor): Anchor center coordinates, shape (h*w, 2).
217
+ gt_bboxes (torch.Tensor): Ground truth bounding boxes, shape (b, n_boxes, 4).
218
+ eps (float, optional): Small value for numerical stability. Defaults to 1e-9.
220
219
 
221
220
  Returns:
222
- (Tensor): shape(b, n_boxes, h*w)
221
+ (torch.Tensor): Boolean mask of positive anchors, shape (b, n_boxes, h*w).
222
+
223
+ Note:
224
+ b: batch size, n_boxes: number of ground truth boxes, h: height, w: width.
225
+ Bounding box format: [x_min, y_min, x_max, y_max].
223
226
  """
224
227
  n_anchors = xy_centers.shape[0]
225
228
  bs, n_boxes, _ = gt_bboxes.shape
@@ -231,18 +234,22 @@ class TaskAlignedAssigner(nn.Module):
231
234
  @staticmethod
232
235
  def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
233
236
  """
234
- If an anchor box is assigned to multiple gts, the one with the highest IoU will be selected.
237
+ Select anchor boxes with highest IoU when assigned to multiple ground truths.
235
238
 
236
239
  Args:
237
- mask_pos (Tensor): shape(b, n_max_boxes, h*w)
238
- overlaps (Tensor): shape(b, n_max_boxes, h*w)
240
+ mask_pos (torch.Tensor): Positive mask, shape (b, n_max_boxes, h*w).
241
+ overlaps (torch.Tensor): IoU overlaps, shape (b, n_max_boxes, h*w).
242
+ n_max_boxes (int): Maximum number of ground truth boxes.
239
243
 
240
244
  Returns:
241
- target_gt_idx (Tensor): shape(b, h*w)
242
- fg_mask (Tensor): shape(b, h*w)
243
- mask_pos (Tensor): shape(b, n_max_boxes, h*w)
245
+ target_gt_idx (torch.Tensor): Indices of assigned ground truths, shape (b, h*w).
246
+ fg_mask (torch.Tensor): Foreground mask, shape (b, h*w).
247
+ mask_pos (torch.Tensor): Updated positive mask, shape (b, n_max_boxes, h*w).
248
+
249
+ Note:
250
+ b: batch size, h: height, w: width.
244
251
  """
245
- # (b, n_max_boxes, h*w) -> (b, h*w)
252
+ # Convert (b, n_max_boxes, h*w) -> (b, h*w)
246
253
  fg_mask = mask_pos.sum(-2)
247
254
  if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes
248
255
  mask_multi_gts = (fg_mask.unsqueeze(1) > 1).expand(-1, n_max_boxes, -1) # (b, n_max_boxes, h*w)
@@ -328,14 +335,16 @@ def bbox2dist(anchor_points, bbox, reg_max):
328
335
 
329
336
  def dist2rbox(pred_dist, pred_angle, anchor_points, dim=-1):
330
337
  """
331
- Decode predicted object bounding box coordinates from anchor points and distribution.
338
+ Decode predicted rotated bounding box coordinates from anchor points and distribution.
332
339
 
333
340
  Args:
334
- pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
335
- pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).
336
- anchor_points (torch.Tensor): Anchor points, (h*w, 2).
341
+ pred_dist (torch.Tensor): Predicted rotated distance, shape (bs, h*w, 4).
342
+ pred_angle (torch.Tensor): Predicted angle, shape (bs, h*w, 1).
343
+ anchor_points (torch.Tensor): Anchor points, shape (h*w, 2).
344
+ dim (int, optional): Dimension along which to split. Defaults to -1.
345
+
337
346
  Returns:
338
- (torch.Tensor): Predicted rotated bounding boxes, (bs, h*w, 4).
347
+ (torch.Tensor): Predicted rotated bounding boxes, shape (bs, h*w, 4).
339
348
  """
340
349
  lt, rb = pred_dist.split(2, dim=dim)
341
350
  cos, sin = torch.cos(pred_angle), torch.sin(pred_angle)