ultralytics 8.3.89__py3-none-any.whl → 8.3.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. tests/conftest.py +2 -2
  2. tests/test_cli.py +13 -11
  3. tests/test_cuda.py +10 -1
  4. tests/test_integrations.py +1 -5
  5. tests/test_python.py +16 -16
  6. tests/test_solutions.py +9 -9
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +3 -1
  9. ultralytics/cfg/models/11/yolo11-cls.yaml +5 -5
  10. ultralytics/cfg/models/11/yolo11-obb.yaml +5 -5
  11. ultralytics/cfg/models/11/yolo11-pose.yaml +5 -5
  12. ultralytics/cfg/models/11/yolo11-seg.yaml +5 -5
  13. ultralytics/cfg/models/11/yolo11.yaml +5 -5
  14. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +5 -5
  15. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +5 -5
  16. ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -5
  17. ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -5
  18. ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
  19. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -5
  20. ultralytics/cfg/models/v8/yolov8-world.yaml +5 -5
  21. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -5
  22. ultralytics/cfg/models/v8/yolov8.yaml +5 -5
  23. ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
  24. ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
  25. ultralytics/cfg/models/v9/yolov9e-seg.yaml +1 -1
  26. ultralytics/cfg/models/v9/yolov9e.yaml +1 -1
  27. ultralytics/cfg/models/v9/yolov9m.yaml +1 -1
  28. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  29. ultralytics/cfg/models/v9/yolov9t.yaml +1 -1
  30. ultralytics/data/annotator.py +9 -14
  31. ultralytics/data/base.py +118 -30
  32. ultralytics/data/build.py +63 -24
  33. ultralytics/data/converter.py +5 -5
  34. ultralytics/data/dataset.py +207 -53
  35. ultralytics/data/loaders.py +1 -0
  36. ultralytics/data/split_dota.py +39 -12
  37. ultralytics/data/utils.py +13 -19
  38. ultralytics/engine/exporter.py +19 -17
  39. ultralytics/engine/model.py +67 -88
  40. ultralytics/engine/predictor.py +106 -21
  41. ultralytics/engine/trainer.py +32 -23
  42. ultralytics/engine/tuner.py +21 -18
  43. ultralytics/engine/validator.py +75 -41
  44. ultralytics/hub/__init__.py +12 -13
  45. ultralytics/hub/auth.py +9 -12
  46. ultralytics/hub/session.py +76 -21
  47. ultralytics/hub/utils.py +19 -17
  48. ultralytics/models/fastsam/model.py +20 -11
  49. ultralytics/models/fastsam/predict.py +36 -16
  50. ultralytics/models/fastsam/utils.py +5 -5
  51. ultralytics/models/fastsam/val.py +6 -6
  52. ultralytics/models/nas/model.py +22 -11
  53. ultralytics/models/nas/predict.py +9 -4
  54. ultralytics/models/nas/val.py +5 -5
  55. ultralytics/models/rtdetr/model.py +20 -11
  56. ultralytics/models/rtdetr/predict.py +18 -15
  57. ultralytics/models/rtdetr/train.py +20 -16
  58. ultralytics/models/rtdetr/val.py +42 -6
  59. ultralytics/models/sam/__init__.py +1 -1
  60. ultralytics/models/sam/amg.py +50 -4
  61. ultralytics/models/sam/model.py +8 -14
  62. ultralytics/models/sam/modules/decoders.py +18 -21
  63. ultralytics/models/sam/modules/encoders.py +25 -46
  64. ultralytics/models/sam/modules/memory_attention.py +19 -15
  65. ultralytics/models/sam/modules/sam.py +18 -25
  66. ultralytics/models/sam/modules/tiny_encoder.py +19 -29
  67. ultralytics/models/sam/modules/transformer.py +35 -57
  68. ultralytics/models/sam/modules/utils.py +15 -15
  69. ultralytics/models/sam/predict.py +0 -3
  70. ultralytics/models/utils/loss.py +87 -36
  71. ultralytics/models/utils/ops.py +26 -31
  72. ultralytics/models/yolo/classify/predict.py +24 -3
  73. ultralytics/models/yolo/classify/train.py +77 -10
  74. ultralytics/models/yolo/classify/val.py +40 -15
  75. ultralytics/models/yolo/detect/predict.py +23 -10
  76. ultralytics/models/yolo/detect/train.py +85 -15
  77. ultralytics/models/yolo/detect/val.py +145 -21
  78. ultralytics/models/yolo/model.py +1 -2
  79. ultralytics/models/yolo/obb/predict.py +12 -4
  80. ultralytics/models/yolo/obb/train.py +7 -0
  81. ultralytics/models/yolo/obb/val.py +25 -7
  82. ultralytics/models/yolo/pose/predict.py +22 -6
  83. ultralytics/models/yolo/pose/train.py +17 -1
  84. ultralytics/models/yolo/pose/val.py +46 -21
  85. ultralytics/models/yolo/segment/predict.py +22 -8
  86. ultralytics/models/yolo/segment/train.py +6 -0
  87. ultralytics/models/yolo/segment/val.py +100 -14
  88. ultralytics/models/yolo/world/train.py +38 -8
  89. ultralytics/models/yolo/world/train_world.py +39 -10
  90. ultralytics/nn/autobackend.py +28 -14
  91. ultralytics/nn/modules/__init__.py +3 -0
  92. ultralytics/nn/modules/activation.py +12 -3
  93. ultralytics/nn/modules/block.py +587 -84
  94. ultralytics/nn/modules/conv.py +418 -54
  95. ultralytics/nn/modules/head.py +3 -4
  96. ultralytics/nn/modules/transformer.py +320 -34
  97. ultralytics/nn/modules/utils.py +17 -3
  98. ultralytics/nn/tasks.py +221 -69
  99. ultralytics/solutions/ai_gym.py +2 -2
  100. ultralytics/solutions/analytics.py +4 -4
  101. ultralytics/solutions/heatmap.py +4 -4
  102. ultralytics/solutions/instance_segmentation.py +10 -4
  103. ultralytics/solutions/object_blurrer.py +2 -2
  104. ultralytics/solutions/object_counter.py +2 -2
  105. ultralytics/solutions/object_cropper.py +2 -2
  106. ultralytics/solutions/parking_management.py +9 -9
  107. ultralytics/solutions/queue_management.py +1 -1
  108. ultralytics/solutions/region_counter.py +2 -2
  109. ultralytics/solutions/security_alarm.py +7 -7
  110. ultralytics/solutions/solutions.py +7 -4
  111. ultralytics/solutions/speed_estimation.py +2 -2
  112. ultralytics/solutions/streamlit_inference.py +6 -6
  113. ultralytics/solutions/trackzone.py +9 -2
  114. ultralytics/solutions/vision_eye.py +4 -4
  115. ultralytics/trackers/basetrack.py +1 -1
  116. ultralytics/trackers/bot_sort.py +23 -22
  117. ultralytics/trackers/byte_tracker.py +4 -4
  118. ultralytics/trackers/track.py +2 -1
  119. ultralytics/trackers/utils/gmc.py +26 -27
  120. ultralytics/trackers/utils/kalman_filter.py +31 -29
  121. ultralytics/trackers/utils/matching.py +7 -7
  122. ultralytics/utils/__init__.py +32 -27
  123. ultralytics/utils/autobatch.py +5 -5
  124. ultralytics/utils/benchmarks.py +111 -18
  125. ultralytics/utils/callbacks/base.py +3 -3
  126. ultralytics/utils/callbacks/clearml.py +11 -11
  127. ultralytics/utils/callbacks/comet.py +35 -22
  128. ultralytics/utils/callbacks/dvc.py +11 -10
  129. ultralytics/utils/callbacks/hub.py +8 -8
  130. ultralytics/utils/callbacks/mlflow.py +1 -1
  131. ultralytics/utils/callbacks/neptune.py +12 -10
  132. ultralytics/utils/callbacks/raytune.py +1 -1
  133. ultralytics/utils/callbacks/tensorboard.py +6 -6
  134. ultralytics/utils/callbacks/wb.py +16 -16
  135. ultralytics/utils/checks.py +116 -35
  136. ultralytics/utils/dist.py +15 -2
  137. ultralytics/utils/downloads.py +13 -9
  138. ultralytics/utils/files.py +12 -13
  139. ultralytics/utils/instance.py +112 -45
  140. ultralytics/utils/loss.py +28 -33
  141. ultralytics/utils/metrics.py +246 -181
  142. ultralytics/utils/ops.py +61 -53
  143. ultralytics/utils/patches.py +8 -6
  144. ultralytics/utils/plotting.py +64 -45
  145. ultralytics/utils/tal.py +88 -57
  146. ultralytics/utils/torch_utils.py +181 -33
  147. ultralytics/utils/triton.py +13 -3
  148. ultralytics/utils/tuner.py +8 -16
  149. {ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/METADATA +1 -1
  150. ultralytics-8.3.90.dist-info/RECORD +250 -0
  151. ultralytics-8.3.89.dist-info/RECORD +0 -250
  152. {ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/LICENSE +0 -0
  153. {ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/WHEEL +0 -0
  154. {ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/entry_points.txt +0 -0
  155. {ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,7 @@ class WorkingDirectory(contextlib.ContextDecorator):
18
18
  It ensures that the original working directory is restored after the context or decorated function completes.
19
19
 
20
20
  Attributes:
21
- dir (Path): The new directory to switch to.
21
+ dir (Path | str): The new directory to switch to.
22
22
  cwd (Path): The original current working directory before the switch.
23
23
 
24
24
  Methods:
@@ -55,21 +55,21 @@ class WorkingDirectory(contextlib.ContextDecorator):
55
55
  @contextmanager
56
56
  def spaces_in_path(path):
57
57
  """
58
- Context manager to handle paths with spaces in their names. If a path contains spaces, it replaces them with
59
- underscores, copies the file/directory to the new path, executes the context code block, then copies the
60
- file/directory back to its original location.
58
+ Context manager to handle paths with spaces in their names.
59
+
60
+ If a path contains spaces, it replaces them with underscores, copies the file/directory to the new path, executes
61
+ the context code block, then copies the file/directory back to its original location.
61
62
 
62
63
  Args:
63
64
  path (str | Path): The original path that may contain spaces.
64
65
 
65
66
  Yields:
66
- (Path): Temporary path with spaces replaced by underscores if spaces were present, otherwise the original path.
67
+ (Path | str): Temporary path with spaces replaced by underscores if spaces were present, otherwise the original path.
67
68
 
68
69
  Examples:
69
- Use the context manager to handle paths with spaces:
70
- >>> from ultralytics.utils.files import spaces_in_path
71
70
  >>> with spaces_in_path('/path/with spaces') as new_path:
72
71
  >>> # Your code here
72
+ >>> pass
73
73
  """
74
74
  # If path has spaces, replace them with underscores
75
75
  if " " in str(path):
@@ -106,21 +106,20 @@ def spaces_in_path(path):
106
106
 
107
107
  def increment_path(path, exist_ok=False, sep="", mkdir=False):
108
108
  """
109
- Increments a file or directory path, i.e., runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
109
+ Increment a file or directory path, i.e., runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
110
110
 
111
111
  If the path exists and `exist_ok` is not True, the path will be incremented by appending a number and `sep` to
112
112
  the end of the path. If the path is a file, the file extension will be preserved. If the path is a directory, the
113
- number will be appended directly to the end of the path. If `mkdir` is set to True, the path will be created as a
114
- directory if it does not already exist.
113
+ number will be appended directly to the end of the path.
115
114
 
116
115
  Args:
117
- path (str | pathlib.Path): Path to increment.
116
+ path (str | Path): Path to increment.
118
117
  exist_ok (bool): If True, the path will not be incremented and returned as-is.
119
118
  sep (str): Separator to use between the path and the incrementation number.
120
119
  mkdir (bool): Create a directory if it does not exist.
121
120
 
122
121
  Returns:
123
- (pathlib.Path): Incremented path.
122
+ (Path): Incremented path.
124
123
 
125
124
  Examples:
126
125
  Increment a directory path:
@@ -185,7 +184,7 @@ def get_latest_run(search_dir="."):
185
184
 
186
185
  def update_models(model_names=("yolo11n.pt",), source_dir=Path("."), update_names=False):
187
186
  """
188
- Updates and re-saves specified YOLO models in an 'updated_models' subdirectory.
187
+ Update and re-save specified YOLO models in an 'updated_models' subdirectory.
189
188
 
190
189
  Args:
191
190
  model_names (Tuple[str, ...]): Model filenames to update.
@@ -14,7 +14,7 @@ def _ntuple(n):
14
14
  """From PyTorch internals."""
15
15
 
16
16
  def parse(x):
17
- """Parse bounding boxes format between XYWH and LTWH."""
17
+ """Parse input to return n-tuple by repeating singleton values n times."""
18
18
  return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n))
19
19
 
20
20
  return parse
@@ -39,7 +39,7 @@ class Bboxes:
39
39
  Bounding box data should be provided in numpy arrays.
40
40
 
41
41
  Attributes:
42
- bboxes (numpy.ndarray): The bounding boxes stored in a 2D numpy array.
42
+ bboxes (np.ndarray): The bounding boxes stored in a 2D numpy array with shape (N, 4).
43
43
  format (str): The format of the bounding boxes ('xyxy', 'xywh', or 'ltwh').
44
44
 
45
45
  Note:
@@ -47,7 +47,13 @@ class Bboxes:
47
47
  """
48
48
 
49
49
  def __init__(self, bboxes, format="xyxy") -> None:
50
- """Initializes the Bboxes class with bounding box data in a specified format."""
50
+ """
51
+ Initialize the Bboxes class with bounding box data in a specified format.
52
+
53
+ Args:
54
+ bboxes (np.ndarray): Array of bounding boxes with shape (N, 4) or (4,).
55
+ format (str): Format of the bounding boxes, one of 'xyxy', 'xywh', or 'ltwh'.
56
+ """
51
57
  assert format in _formats, f"Invalid bounding box format: {format}, format must be one of {_formats}"
52
58
  bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes
53
59
  assert bboxes.ndim == 2
@@ -57,7 +63,12 @@ class Bboxes:
57
63
  # self.normalized = normalized
58
64
 
59
65
  def convert(self, format):
60
- """Converts bounding box format from one type to another."""
66
+ """
67
+ Convert bounding box format from one type to another.
68
+
69
+ Args:
70
+ format (str): Target format for conversion, one of 'xyxy', 'xywh', or 'ltwh'.
71
+ """
61
72
  assert format in _formats, f"Invalid bounding box format: {format}, format must be one of {_formats}"
62
73
  if self.format == format:
63
74
  return
@@ -140,10 +151,9 @@ class Bboxes:
140
151
  Args:
141
152
  boxes_list (List[Bboxes]): A list of Bboxes objects to concatenate.
142
153
  axis (int, optional): The axis along which to concatenate the bounding boxes.
143
- Defaults to 0.
144
154
 
145
155
  Returns:
146
- Bboxes: A new Bboxes object containing the concatenated bounding boxes.
156
+ (Bboxes): A new Bboxes object containing the concatenated bounding boxes.
147
157
 
148
158
  Note:
149
159
  The input should be a list or tuple of Bboxes objects.
@@ -162,11 +172,11 @@ class Bboxes:
162
172
  Retrieve a specific bounding box or a set of bounding boxes using indexing.
163
173
 
164
174
  Args:
165
- index (int, slice, or np.ndarray): The index, slice, or boolean array to select
166
- the desired bounding boxes.
175
+ index (int | slice | np.ndarray): The index, slice, or boolean array to select
176
+ the desired bounding boxes.
167
177
 
168
178
  Returns:
169
- Bboxes: A new Bboxes object containing the selected bounding boxes.
179
+ (Bboxes): A new Bboxes object containing the selected bounding boxes.
170
180
 
171
181
  Raises:
172
182
  AssertionError: If the indexed bounding boxes do not form a 2-dimensional matrix.
@@ -188,28 +198,29 @@ class Instances:
188
198
 
189
199
  Attributes:
190
200
  _bboxes (Bboxes): Internal object for handling bounding box operations.
191
- keypoints (np.ndarray): keypoints(x, y, visible) with shape [N, 17, 3]. Default is None.
201
+ keypoints (np.ndarray): Keypoints with shape (N, 17, 3) in format (x, y, visible).
192
202
  normalized (bool): Flag indicating whether the bounding box coordinates are normalized.
193
- segments (np.ndarray): Segments array with shape [N, 1000, 2] after resampling.
194
-
195
- Args:
196
- bboxes (np.ndarray): An array of bounding boxes with shape [N, 4].
197
- segments (list | ndarray, optional): A list or array of object segments. Default is None.
198
- keypoints (ndarray, optional): An array of keypoints with shape [N, 17, 3]. Default is None.
199
- bbox_format (str, optional): The format of bounding boxes ('xywh' or 'xyxy'). Default is 'xywh'.
200
- normalized (bool, optional): Whether the bounding box coordinates are normalized. Default is True.
203
+ segments (np.ndarray): Segments array with shape (N, M, 2) after resampling.
204
+
205
+ Methods:
206
+ convert_bbox: Convert bounding box format.
207
+ scale: Scale coordinates by given factors.
208
+ denormalize: Convert normalized coordinates to absolute coordinates.
209
+ normalize: Convert absolute coordinates to normalized coordinates.
210
+ add_padding: Add padding to coordinates.
211
+ flipud: Flip coordinates vertically.
212
+ fliplr: Flip coordinates horizontally.
213
+ clip: Clip coordinates to stay within image boundaries.
214
+ remove_zero_area_boxes: Remove boxes with zero area.
215
+ update: Update instance variables.
216
+ concatenate: Concatenate multiple Instances objects.
201
217
 
202
218
  Examples:
203
- Create an Instances object
204
219
  >>> instances = Instances(
205
220
  ... bboxes=np.array([[10, 10, 30, 30], [20, 20, 40, 40]]),
206
221
  ... segments=[np.array([[5, 5], [10, 10]]), np.array([[15, 15], [20, 20]])],
207
222
  ... keypoints=np.array([[[5, 5, 1], [10, 10, 1]], [[15, 15, 1], [20, 20, 1]]]),
208
223
  ... )
209
-
210
- Note:
211
- The bounding box format is either 'xywh' or 'xyxy', and is determined by the `bbox_format` argument.
212
- This class does not perform input validation, and it assumes the inputs are well-formed.
213
224
  """
214
225
 
215
226
  def __init__(self, bboxes, segments=None, keypoints=None, bbox_format="xywh", normalized=True) -> None:
@@ -217,11 +228,11 @@ class Instances:
217
228
  Initialize the object with bounding boxes, segments, and keypoints.
218
229
 
219
230
  Args:
220
- bboxes (np.ndarray): Bounding boxes, shape [N, 4].
221
- segments (list | np.ndarray, optional): Segmentation masks. Defaults to None.
222
- keypoints (np.ndarray, optional): Keypoints, shape [N, 17, 3] and format (x, y, visible). Defaults to None.
223
- bbox_format (str, optional): Format of bboxes. Defaults to "xywh".
224
- normalized (bool, optional): Whether the coordinates are normalized. Defaults to True.
231
+ bboxes (np.ndarray): Bounding boxes, shape (N, 4).
232
+ segments (List | np.ndarray, optional): Segmentation masks.
233
+ keypoints (np.ndarray, optional): Keypoints, shape (N, 17, 3) in format (x, y, visible).
234
+ bbox_format (str, optional): Format of bboxes.
235
+ normalized (bool, optional): Whether the coordinates are normalized.
225
236
  """
226
237
  self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format)
227
238
  self.keypoints = keypoints
@@ -229,7 +240,12 @@ class Instances:
229
240
  self.segments = segments
230
241
 
231
242
  def convert_bbox(self, format):
232
- """Convert bounding box format."""
243
+ """
244
+ Convert bounding box format.
245
+
246
+ Args:
247
+ format (str): Target format for conversion, one of 'xyxy', 'xywh', or 'ltwh'.
248
+ """
233
249
  self._bboxes.convert(format=format)
234
250
 
235
251
  @property
@@ -238,7 +254,14 @@ class Instances:
238
254
  return self._bboxes.areas()
239
255
 
240
256
  def scale(self, scale_w, scale_h, bbox_only=False):
241
- """Similar to denormalize func but without normalized sign."""
257
+ """
258
+ Scale coordinates by given factors.
259
+
260
+ Args:
261
+ scale_w (float): Scale factor for width.
262
+ scale_h (float): Scale factor for height.
263
+ bbox_only (bool, optional): Whether to scale only bounding boxes.
264
+ """
242
265
  self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
243
266
  if bbox_only:
244
267
  return
@@ -249,7 +272,13 @@ class Instances:
249
272
  self.keypoints[..., 1] *= scale_h
250
273
 
251
274
  def denormalize(self, w, h):
252
- """Denormalizes boxes, segments, and keypoints from normalized coordinates."""
275
+ """
276
+ Convert normalized coordinates to absolute coordinates.
277
+
278
+ Args:
279
+ w (int): Image width.
280
+ h (int): Image height.
281
+ """
253
282
  if not self.normalized:
254
283
  return
255
284
  self._bboxes.mul(scale=(w, h, w, h))
@@ -261,7 +290,13 @@ class Instances:
261
290
  self.normalized = False
262
291
 
263
292
  def normalize(self, w, h):
264
- """Normalize bounding boxes, segments, and keypoints to image dimensions."""
293
+ """
294
+ Convert absolute coordinates to normalized coordinates.
295
+
296
+ Args:
297
+ w (int): Image width.
298
+ h (int): Image height.
299
+ """
265
300
  if self.normalized:
266
301
  return
267
302
  self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h))
@@ -273,7 +308,13 @@ class Instances:
273
308
  self.normalized = True
274
309
 
275
310
  def add_padding(self, padw, padh):
276
- """Handle rect and mosaic situation."""
311
+ """
312
+ Add padding to coordinates.
313
+
314
+ Args:
315
+ padw (int): Padding width.
316
+ padh (int): Padding height.
317
+ """
277
318
  assert not self.normalized, "you should add padding with absolute coordinates."
278
319
  self._bboxes.add(offset=(padw, padh, padw, padh))
279
320
  self.segments[..., 0] += padw
@@ -287,12 +328,10 @@ class Instances:
287
328
  Retrieve a specific instance or a set of instances using indexing.
288
329
 
289
330
  Args:
290
- index (int, slice, or np.ndarray): The index, slice, or boolean array to select
291
- the desired instances.
331
+ index (int | slice | np.ndarray): The index, slice, or boolean array to select the desired instances.
292
332
 
293
333
  Returns:
294
- Instances: A new Instances object containing the selected bounding boxes,
295
- segments, and keypoints if present.
334
+ (Instances): A new Instances object containing the selected boxes, segments, and keypoints if present.
296
335
 
297
336
  Note:
298
337
  When using boolean indexing, make sure to provide a boolean array with the same
@@ -311,7 +350,12 @@ class Instances:
311
350
  )
312
351
 
313
352
  def flipud(self, h):
314
- """Flips the coordinates of bounding boxes, segments, and keypoints vertically."""
353
+ """
354
+ Flip coordinates vertically.
355
+
356
+ Args:
357
+ h (int): Image height.
358
+ """
315
359
  if self._bboxes.format == "xyxy":
316
360
  y1 = self.bboxes[:, 1].copy()
317
361
  y2 = self.bboxes[:, 3].copy()
@@ -324,7 +368,12 @@ class Instances:
324
368
  self.keypoints[..., 1] = h - self.keypoints[..., 1]
325
369
 
326
370
  def fliplr(self, w):
327
- """Reverses the order of the bounding boxes and segments horizontally."""
371
+ """
372
+ Flip coordinates horizontally.
373
+
374
+ Args:
375
+ w (int): Image width.
376
+ """
328
377
  if self._bboxes.format == "xyxy":
329
378
  x1 = self.bboxes[:, 0].copy()
330
379
  x2 = self.bboxes[:, 2].copy()
@@ -337,7 +386,13 @@ class Instances:
337
386
  self.keypoints[..., 0] = w - self.keypoints[..., 0]
338
387
 
339
388
  def clip(self, w, h):
340
- """Clips bounding boxes, segments, and keypoints values to stay within image boundaries."""
389
+ """
390
+ Clip coordinates to stay within image boundaries.
391
+
392
+ Args:
393
+ w (int): Image width.
394
+ h (int): Image height.
395
+ """
341
396
  ori_format = self._bboxes.format
342
397
  self.convert_bbox(format="xyxy")
343
398
  self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w)
@@ -351,7 +406,12 @@ class Instances:
351
406
  self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
352
407
 
353
408
  def remove_zero_area_boxes(self):
354
- """Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height."""
409
+ """
410
+ Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height.
411
+
412
+ Returns:
413
+ (np.ndarray): Boolean array indicating which boxes were kept.
414
+ """
355
415
  good = self.bbox_areas > 0
356
416
  if not all(good):
357
417
  self._bboxes = self._bboxes[good]
@@ -362,7 +422,14 @@ class Instances:
362
422
  return good
363
423
 
364
424
  def update(self, bboxes, segments=None, keypoints=None):
365
- """Updates instance variables."""
425
+ """
426
+ Update instance variables.
427
+
428
+ Args:
429
+ bboxes (np.ndarray): New bounding boxes.
430
+ segments (np.ndarray, optional): New segments.
431
+ keypoints (np.ndarray, optional): New keypoints.
432
+ """
366
433
  self._bboxes = Bboxes(bboxes, format=self._bboxes.format)
367
434
  if segments is not None:
368
435
  self.segments = segments
@@ -376,14 +443,14 @@ class Instances:
376
443
  @classmethod
377
444
  def concatenate(cls, instances_list: List["Instances"], axis=0) -> "Instances":
378
445
  """
379
- Concatenates a list of Instances objects into a single Instances object.
446
+ Concatenate a list of Instances objects into a single Instances object.
380
447
 
381
448
  Args:
382
449
  instances_list (List[Instances]): A list of Instances objects to concatenate.
383
- axis (int, optional): The axis along which the arrays will be concatenated. Defaults to 0.
450
+ axis (int, optional): The axis along which the arrays will be concatenated.
384
451
 
385
452
  Returns:
386
- Instances: A new Instances object containing the concatenated bounding boxes,
453
+ (Instances): A new Instances object containing the concatenated bounding boxes,
387
454
  segments, and keypoints if present.
388
455
 
389
456
  Note:
ultralytics/utils/loss.py CHANGED
@@ -26,7 +26,7 @@ class VarifocalLoss(nn.Module):
26
26
 
27
27
  @staticmethod
28
28
  def forward(pred_score, gt_score, label, alpha=0.75, gamma=2.0):
29
- """Computes varfocal loss."""
29
+ """Compute varfocal loss between predictions and ground truth."""
30
30
  weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label
31
31
  with autocast(enabled=False):
32
32
  loss = (
@@ -41,12 +41,12 @@ class FocalLoss(nn.Module):
41
41
  """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
42
42
 
43
43
  def __init__(self):
44
- """Initializer for FocalLoss class with no parameters."""
44
+ """Initialize FocalLoss class with no parameters."""
45
45
  super().__init__()
46
46
 
47
47
  @staticmethod
48
48
  def forward(pred, label, gamma=1.5, alpha=0.25):
49
- """Calculates and updates confusion matrix for object detection/classification tasks."""
49
+ """Calculate focal loss with modulating factors for class imbalance."""
50
50
  loss = F.binary_cross_entropy_with_logits(pred, label, reduction="none")
51
51
  # p_t = torch.exp(-loss)
52
52
  # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
@@ -63,20 +63,15 @@ class FocalLoss(nn.Module):
63
63
 
64
64
 
65
65
  class DFLoss(nn.Module):
66
- """Criterion class for computing DFL losses during training."""
66
+ """Criterion class for computing Distribution Focal Loss (DFL)."""
67
67
 
68
68
  def __init__(self, reg_max=16) -> None:
69
- """Initialize the DFL module."""
69
+ """Initialize the DFL module with regularization maximum."""
70
70
  super().__init__()
71
71
  self.reg_max = reg_max
72
72
 
73
73
  def __call__(self, pred_dist, target):
74
- """
75
- Return sum of left and right DFL losses.
76
-
77
- Distribution Focal Loss (DFL) proposed in Generalized Focal Loss
78
- https://ieeexplore.ieee.org/document/9792391
79
- """
74
+ """Return sum of left and right DFL losses from https://ieeexplore.ieee.org/document/9792391."""
80
75
  target = target.clamp_(0, self.reg_max - 1 - 0.01)
81
76
  tl = target.long() # target left
82
77
  tr = tl + 1 # target right
@@ -89,7 +84,7 @@ class DFLoss(nn.Module):
89
84
 
90
85
 
91
86
  class BboxLoss(nn.Module):
92
- """Criterion class for computing training losses during training."""
87
+ """Criterion class for computing training losses for bounding boxes."""
93
88
 
94
89
  def __init__(self, reg_max=16):
95
90
  """Initialize the BboxLoss module with regularization maximum and DFL settings."""
@@ -97,7 +92,7 @@ class BboxLoss(nn.Module):
97
92
  self.dfl_loss = DFLoss(reg_max) if reg_max > 1 else None
98
93
 
99
94
  def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
100
- """IoU loss."""
95
+ """Compute IoU and DFL losses for bounding boxes."""
101
96
  weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
102
97
  iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
103
98
  loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
@@ -114,14 +109,14 @@ class BboxLoss(nn.Module):
114
109
 
115
110
 
116
111
  class RotatedBboxLoss(BboxLoss):
117
- """Criterion class for computing training losses during training."""
112
+ """Criterion class for computing training losses for rotated bounding boxes."""
118
113
 
119
114
  def __init__(self, reg_max):
120
115
  """Initialize the BboxLoss module with regularization maximum and DFL settings."""
121
116
  super().__init__(reg_max)
122
117
 
123
118
  def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
124
- """IoU loss."""
119
+ """Compute IoU and DFL losses for rotated bounding boxes."""
125
120
  weight = target_scores.sum(-1)[fg_mask].unsqueeze(-1)
126
121
  iou = probiou(pred_bboxes[fg_mask], target_bboxes[fg_mask])
127
122
  loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
@@ -138,15 +133,15 @@ class RotatedBboxLoss(BboxLoss):
138
133
 
139
134
 
140
135
  class KeypointLoss(nn.Module):
141
- """Criterion class for computing training losses."""
136
+ """Criterion class for computing keypoint losses."""
142
137
 
143
138
  def __init__(self, sigmas) -> None:
144
- """Initialize the KeypointLoss class."""
139
+ """Initialize the KeypointLoss class with keypoint sigmas."""
145
140
  super().__init__()
146
141
  self.sigmas = sigmas
147
142
 
148
143
  def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
149
- """Calculates keypoint loss factor and Euclidean distance loss for predicted and actual keypoints."""
144
+ """Calculate keypoint loss factor and Euclidean distance loss for keypoints."""
150
145
  d = (pred_kpts[..., 0] - gt_kpts[..., 0]).pow(2) + (pred_kpts[..., 1] - gt_kpts[..., 1]).pow(2)
151
146
  kpt_loss_factor = kpt_mask.shape[1] / (torch.sum(kpt_mask != 0, dim=1) + 1e-9)
152
147
  # e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula
@@ -155,10 +150,10 @@ class KeypointLoss(nn.Module):
155
150
 
156
151
 
157
152
  class v8DetectionLoss:
158
- """Criterion class for computing training losses."""
153
+ """Criterion class for computing training losses for YOLOv8 object detection."""
159
154
 
160
155
  def __init__(self, model, tal_topk=10): # model must be de-paralleled
161
- """Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function."""
156
+ """Initialize v8DetectionLoss with model parameters and task-aligned assignment settings."""
162
157
  device = next(model.parameters()).device # get model device
163
158
  h = model.args # hyperparameters
164
159
 
@@ -178,7 +173,7 @@ class v8DetectionLoss:
178
173
  self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
179
174
 
180
175
  def preprocess(self, targets, batch_size, scale_tensor):
181
- """Preprocesses the target counts and matches with the input batch size to output a tensor."""
176
+ """Preprocess targets by converting to tensor format and scaling coordinates."""
182
177
  nl, ne = targets.shape
183
178
  if nl == 0:
184
179
  out = torch.zeros(batch_size, 0, ne - 1, device=self.device)
@@ -261,15 +256,15 @@ class v8DetectionLoss:
261
256
 
262
257
 
263
258
  class v8SegmentationLoss(v8DetectionLoss):
264
- """Criterion class for computing training losses."""
259
+ """Criterion class for computing training losses for YOLOv8 segmentation."""
265
260
 
266
261
  def __init__(self, model): # model must be de-paralleled
267
- """Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument."""
262
+ """Initialize the v8SegmentationLoss class with model parameters and mask overlap setting."""
268
263
  super().__init__(model)
269
264
  self.overlap = model.args.overlap_mask
270
265
 
271
266
  def __call__(self, preds, batch):
272
- """Calculate and return the loss for the YOLO model."""
267
+ """Calculate and return the combined loss for detection and segmentation."""
273
268
  loss = torch.zeros(4, device=self.device) # box, cls, dfl
274
269
  feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
275
270
  batch_size, _, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width
@@ -444,10 +439,10 @@ class v8SegmentationLoss(v8DetectionLoss):
444
439
 
445
440
 
446
441
  class v8PoseLoss(v8DetectionLoss):
447
- """Criterion class for computing training losses."""
442
+ """Criterion class for computing training losses for YOLOv8 pose estimation."""
448
443
 
449
444
  def __init__(self, model): # model must be de-paralleled
450
- """Initializes v8PoseLoss with model, sets keypoint variables and declares a keypoint loss instance."""
445
+ """Initialize v8PoseLoss with model parameters and keypoint-specific loss functions."""
451
446
  super().__init__(model)
452
447
  self.kpt_shape = model.model[-1].kpt_shape
453
448
  self.bce_pose = nn.BCEWithLogitsLoss()
@@ -457,7 +452,7 @@ class v8PoseLoss(v8DetectionLoss):
457
452
  self.keypoint_loss = KeypointLoss(sigmas=sigmas)
458
453
 
459
454
  def __call__(self, preds, batch):
460
- """Calculate the total loss and detach it."""
455
+ """Calculate the total loss and detach it for pose estimation."""
461
456
  loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility
462
457
  feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
463
458
  pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
@@ -524,7 +519,7 @@ class v8PoseLoss(v8DetectionLoss):
524
519
 
525
520
  @staticmethod
526
521
  def kpts_decode(anchor_points, pred_kpts):
527
- """Decodes predicted keypoints to image coordinates."""
522
+ """Decode predicted keypoints to image coordinates."""
528
523
  y = pred_kpts.clone()
529
524
  y[..., :2] *= 2.0
530
525
  y[..., 0] += anchor_points[:, [0]] - 0.5
@@ -599,7 +594,7 @@ class v8PoseLoss(v8DetectionLoss):
599
594
 
600
595
 
601
596
  class v8ClassificationLoss:
602
- """Criterion class for computing training losses."""
597
+ """Criterion class for computing training losses for classification."""
603
598
 
604
599
  def __call__(self, preds, batch):
605
600
  """Compute the classification loss between predictions and true labels."""
@@ -613,13 +608,13 @@ class v8OBBLoss(v8DetectionLoss):
613
608
  """Calculates losses for object detection, classification, and box distribution in rotated YOLO models."""
614
609
 
615
610
  def __init__(self, model):
616
- """Initializes v8OBBLoss with model, assigner, and rotated bbox loss; note model must be de-paralleled."""
611
+ """Initialize v8OBBLoss with model, assigner, and rotated bbox loss; model must be de-paralleled."""
617
612
  super().__init__(model)
618
613
  self.assigner = RotatedTaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
619
614
  self.bbox_loss = RotatedBboxLoss(self.reg_max).to(self.device)
620
615
 
621
616
  def preprocess(self, targets, batch_size, scale_tensor):
622
- """Preprocesses the target counts and matches with the input batch size to output a tensor."""
617
+ """Preprocess targets for oriented bounding box detection."""
623
618
  if targets.shape[0] == 0:
624
619
  out = torch.zeros(batch_size, 0, 6, device=self.device)
625
620
  else:
@@ -636,7 +631,7 @@ class v8OBBLoss(v8DetectionLoss):
636
631
  return out
637
632
 
638
633
  def __call__(self, preds, batch):
639
- """Calculate and return the loss for the YOLO model."""
634
+ """Calculate and return the loss for oriented bounding box detection."""
640
635
  loss = torch.zeros(3, device=self.device) # box, cls, dfl
641
636
  feats, pred_angle = preds if isinstance(preds[0], list) else preds[1]
642
637
  batch_size = pred_angle.shape[0] # batch size, number of masks, mask height, mask width
@@ -726,7 +721,7 @@ class v8OBBLoss(v8DetectionLoss):
726
721
 
727
722
 
728
723
  class E2EDetectLoss:
729
- """Criterion class for computing training losses."""
724
+ """Criterion class for computing training losses for end-to-end detection."""
730
725
 
731
726
  def __init__(self, model):
732
727
  """Initialize E2EDetectLoss with one-to-many and one-to-one detection losses using the provided model."""