ultralytics 8.2.81__py3-none-any.whl → 8.2.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (97) hide show
  1. tests/test_solutions.py +0 -4
  2. ultralytics/__init__.py +1 -1
  3. ultralytics/cfg/__init__.py +21 -21
  4. ultralytics/data/annotator.py +1 -1
  5. ultralytics/data/augment.py +58 -58
  6. ultralytics/data/base.py +3 -3
  7. ultralytics/data/converter.py +7 -8
  8. ultralytics/data/explorer/explorer.py +7 -23
  9. ultralytics/data/loaders.py +2 -2
  10. ultralytics/data/split_dota.py +11 -3
  11. ultralytics/data/utils.py +6 -10
  12. ultralytics/engine/exporter.py +2 -4
  13. ultralytics/engine/model.py +47 -47
  14. ultralytics/engine/predictor.py +1 -1
  15. ultralytics/engine/results.py +28 -28
  16. ultralytics/engine/trainer.py +11 -8
  17. ultralytics/engine/tuner.py +7 -8
  18. ultralytics/engine/validator.py +3 -5
  19. ultralytics/hub/__init__.py +5 -5
  20. ultralytics/hub/auth.py +6 -2
  21. ultralytics/hub/session.py +3 -5
  22. ultralytics/models/fastsam/model.py +13 -10
  23. ultralytics/models/fastsam/predict.py +2 -2
  24. ultralytics/models/fastsam/utils.py +0 -1
  25. ultralytics/models/nas/model.py +4 -4
  26. ultralytics/models/nas/predict.py +1 -2
  27. ultralytics/models/nas/val.py +1 -1
  28. ultralytics/models/rtdetr/predict.py +1 -1
  29. ultralytics/models/rtdetr/train.py +1 -1
  30. ultralytics/models/rtdetr/val.py +1 -1
  31. ultralytics/models/sam/model.py +11 -11
  32. ultralytics/models/sam/modules/decoders.py +7 -4
  33. ultralytics/models/sam/modules/sam.py +9 -1
  34. ultralytics/models/sam/modules/tiny_encoder.py +1 -1
  35. ultralytics/models/sam/modules/transformer.py +0 -2
  36. ultralytics/models/sam/modules/utils.py +1 -1
  37. ultralytics/models/sam/predict.py +10 -10
  38. ultralytics/models/utils/loss.py +29 -17
  39. ultralytics/models/utils/ops.py +1 -5
  40. ultralytics/models/yolo/classify/predict.py +1 -1
  41. ultralytics/models/yolo/classify/train.py +1 -1
  42. ultralytics/models/yolo/classify/val.py +1 -1
  43. ultralytics/models/yolo/detect/predict.py +1 -1
  44. ultralytics/models/yolo/detect/train.py +1 -1
  45. ultralytics/models/yolo/detect/val.py +1 -1
  46. ultralytics/models/yolo/model.py +6 -2
  47. ultralytics/models/yolo/obb/predict.py +1 -1
  48. ultralytics/models/yolo/obb/train.py +1 -1
  49. ultralytics/models/yolo/obb/val.py +2 -2
  50. ultralytics/models/yolo/pose/predict.py +1 -1
  51. ultralytics/models/yolo/pose/train.py +1 -1
  52. ultralytics/models/yolo/pose/val.py +1 -1
  53. ultralytics/models/yolo/segment/predict.py +1 -1
  54. ultralytics/models/yolo/segment/train.py +1 -1
  55. ultralytics/models/yolo/segment/val.py +1 -1
  56. ultralytics/models/yolo/world/train.py +1 -1
  57. ultralytics/nn/autobackend.py +2 -2
  58. ultralytics/nn/modules/__init__.py +2 -2
  59. ultralytics/nn/modules/block.py +8 -20
  60. ultralytics/nn/modules/conv.py +1 -3
  61. ultralytics/nn/modules/head.py +16 -31
  62. ultralytics/nn/modules/transformer.py +0 -1
  63. ultralytics/nn/modules/utils.py +0 -1
  64. ultralytics/nn/tasks.py +11 -9
  65. ultralytics/solutions/__init__.py +1 -0
  66. ultralytics/solutions/ai_gym.py +0 -2
  67. ultralytics/solutions/analytics.py +1 -6
  68. ultralytics/solutions/heatmap.py +0 -1
  69. ultralytics/solutions/object_counter.py +0 -2
  70. ultralytics/solutions/queue_management.py +0 -2
  71. ultralytics/trackers/basetrack.py +1 -1
  72. ultralytics/trackers/byte_tracker.py +2 -2
  73. ultralytics/trackers/utils/gmc.py +5 -5
  74. ultralytics/trackers/utils/kalman_filter.py +1 -1
  75. ultralytics/trackers/utils/matching.py +1 -5
  76. ultralytics/utils/__init__.py +137 -24
  77. ultralytics/utils/autobatch.py +7 -4
  78. ultralytics/utils/benchmarks.py +6 -14
  79. ultralytics/utils/callbacks/base.py +0 -1
  80. ultralytics/utils/callbacks/comet.py +0 -1
  81. ultralytics/utils/callbacks/tensorboard.py +0 -1
  82. ultralytics/utils/checks.py +15 -18
  83. ultralytics/utils/downloads.py +6 -7
  84. ultralytics/utils/files.py +3 -4
  85. ultralytics/utils/instance.py +17 -7
  86. ultralytics/utils/metrics.py +16 -16
  87. ultralytics/utils/ops.py +8 -8
  88. ultralytics/utils/plotting.py +25 -35
  89. ultralytics/utils/tal.py +27 -18
  90. ultralytics/utils/torch_utils.py +12 -13
  91. ultralytics/utils/tuner.py +2 -3
  92. {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/METADATA +4 -3
  93. {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/RECORD +97 -97
  94. {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/WHEEL +1 -1
  95. {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/LICENSE +0 -0
  96. {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/entry_points.txt +0 -0
  97. {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/top_level.txt +0 -0
@@ -41,8 +41,8 @@ class SAM(Model):
41
41
  info: Logs information about the SAM model.
42
42
 
43
43
  Examples:
44
- >>> sam = SAM('sam_b.pt')
45
- >>> results = sam.predict('image.jpg', points=[[500, 375]])
44
+ >>> sam = SAM("sam_b.pt")
45
+ >>> results = sam.predict("image.jpg", points=[[500, 375]])
46
46
  >>> for r in results:
47
47
  >>> print(f"Detected {len(r.masks)} masks")
48
48
  """
@@ -58,7 +58,7 @@ class SAM(Model):
58
58
  NotImplementedError: If the model file extension is not .pt or .pth.
59
59
 
60
60
  Examples:
61
- >>> sam = SAM('sam_b.pt')
61
+ >>> sam = SAM("sam_b.pt")
62
62
  >>> print(sam.is_sam2)
63
63
  """
64
64
  if model and Path(model).suffix not in {".pt", ".pth"}:
@@ -78,8 +78,8 @@ class SAM(Model):
78
78
  task (str | None): Task name. If provided, it specifies the particular task the model is being loaded for.
79
79
 
80
80
  Examples:
81
- >>> sam = SAM('sam_b.pt')
82
- >>> sam._load('path/to/custom_weights.pt')
81
+ >>> sam = SAM("sam_b.pt")
82
+ >>> sam._load("path/to/custom_weights.pt")
83
83
  """
84
84
  self.model = build_sam(weights)
85
85
 
@@ -100,8 +100,8 @@ class SAM(Model):
100
100
  (List): The model predictions.
101
101
 
102
102
  Examples:
103
- >>> sam = SAM('sam_b.pt')
104
- >>> results = sam.predict('image.jpg', points=[[500, 375]])
103
+ >>> sam = SAM("sam_b.pt")
104
+ >>> results = sam.predict("image.jpg", points=[[500, 375]])
105
105
  >>> for r in results:
106
106
  ... print(f"Detected {len(r.masks)} masks")
107
107
  """
@@ -130,8 +130,8 @@ class SAM(Model):
130
130
  (List): The model predictions, typically containing segmentation masks and other relevant information.
131
131
 
132
132
  Examples:
133
- >>> sam = SAM('sam_b.pt')
134
- >>> results = sam('image.jpg', points=[[500, 375]])
133
+ >>> sam = SAM("sam_b.pt")
134
+ >>> results = sam("image.jpg", points=[[500, 375]])
135
135
  >>> print(f"Detected {len(results[0].masks)} masks")
136
136
  """
137
137
  return self.predict(source, stream, bboxes, points, labels, **kwargs)
@@ -151,7 +151,7 @@ class SAM(Model):
151
151
  (Tuple): A tuple containing the model's information (string representations of the model).
152
152
 
153
153
  Examples:
154
- >>> sam = SAM('sam_b.pt')
154
+ >>> sam = SAM("sam_b.pt")
155
155
  >>> info = sam.info()
156
156
  >>> print(info[0]) # Print summary information
157
157
  """
@@ -167,7 +167,7 @@ class SAM(Model):
167
167
  class. For SAM2 models, it maps to SAM2Predictor, otherwise to the standard Predictor.
168
168
 
169
169
  Examples:
170
- >>> sam = SAM('sam_b.pt')
170
+ >>> sam = SAM("sam_b.pt")
171
171
  >>> task_map = sam.task_map
172
172
  >>> print(task_map)
173
173
  {'segment': <class 'ultralytics.models.sam.predict.Predictor'>}
@@ -32,8 +32,9 @@ class MaskDecoder(nn.Module):
32
32
 
33
33
  Examples:
34
34
  >>> decoder = MaskDecoder(transformer_dim=256, transformer=transformer_module)
35
- >>> masks, iou_pred = decoder(image_embeddings, image_pe, sparse_prompt_embeddings,
36
- ... dense_prompt_embeddings, multimask_output=True)
35
+ >>> masks, iou_pred = decoder(
36
+ ... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, multimask_output=True
37
+ ... )
37
38
  >>> print(f"Predicted masks shape: {masks.shape}, IoU predictions shape: {iou_pred.shape}")
38
39
  """
39
40
 
@@ -213,7 +214,8 @@ class SAM2MaskDecoder(nn.Module):
213
214
  >>> dense_prompt_embeddings = torch.rand(1, 256, 64, 64)
214
215
  >>> decoder = SAM2MaskDecoder(256, transformer)
215
216
  >>> masks, iou_pred, sam_tokens_out, obj_score_logits = decoder.forward(
216
- ... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False)
217
+ ... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
218
+ ... )
217
219
  """
218
220
 
219
221
  def __init__(
@@ -345,7 +347,8 @@ class SAM2MaskDecoder(nn.Module):
345
347
  >>> dense_prompt_embeddings = torch.rand(1, 256, 64, 64)
346
348
  >>> decoder = SAM2MaskDecoder(256, transformer)
347
349
  >>> masks, iou_pred, sam_tokens_out, obj_score_logits = decoder.forward(
348
- ... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False)
350
+ ... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
351
+ ... )
349
352
  """
350
353
  masks, iou_pred, mask_tokens_out, object_score_logits = self.predict_masks(
351
354
  image_embeddings=image_embeddings,
@@ -417,7 +417,15 @@ class SAM2Model(torch.nn.Module):
417
417
  >>> point_inputs = {"point_coords": torch.rand(1, 2, 2), "point_labels": torch.tensor([[1, 0]])}
418
418
  >>> mask_inputs = torch.rand(1, 1, 512, 512)
419
419
  >>> results = model._forward_sam_heads(backbone_features, point_inputs, mask_inputs)
420
- >>> low_res_multimasks, high_res_multimasks, ious, low_res_masks, high_res_masks, obj_ptr, object_score_logits = results
420
+ >>> (
421
+ ... low_res_multimasks,
422
+ ... high_res_multimasks,
423
+ ... ious,
424
+ ... low_res_masks,
425
+ ... high_res_masks,
426
+ ... obj_ptr,
427
+ ... object_score_logits,
428
+ ... ) = results
421
429
  """
422
430
  B = backbone_features.size(0)
423
431
  device = backbone_features.device
@@ -716,7 +716,7 @@ class BasicLayer(nn.Module):
716
716
 
717
717
  Examples:
718
718
  >>> layer = BasicLayer(dim=96, input_resolution=(56, 56), depth=2, num_heads=3, window_size=7)
719
- >>> x = torch.randn(1, 56*56, 96)
719
+ >>> x = torch.randn(1, 56 * 56, 96)
720
720
  >>> output = layer(x)
721
721
  >>> print(output.shape)
722
722
  """
@@ -232,7 +232,6 @@ class TwoWayAttentionBlock(nn.Module):
232
232
 
233
233
  def forward(self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor) -> Tuple[Tensor, Tensor]:
234
234
  """Applies two-way attention to process query and key embeddings in a transformer block."""
235
-
236
235
  # Self attention block
237
236
  if self.skip_first_layer_pe:
238
237
  queries = self.self_attn(q=queries, k=queries, v=queries)
@@ -353,7 +352,6 @@ class Attention(nn.Module):
353
352
 
354
353
  def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
355
354
  """Applies multi-head attention to query, key, and value tensors with optional downsampling."""
356
-
357
355
  # Input projections
358
356
  q = self.q_proj(q)
359
357
  k = self.k_proj(k)
@@ -22,7 +22,7 @@ def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num
22
22
 
23
23
  Examples:
24
24
  >>> frame_idx = 5
25
- >>> cond_frame_outputs = {1: 'a', 3: 'b', 7: 'c', 9: 'd'}
25
+ >>> cond_frame_outputs = {1: "a", 3: "b", 7: "c", 9: "d"}
26
26
  >>> max_cond_frame_num = 2
27
27
  >>> selected, unselected = select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num)
28
28
  >>> print(selected)
@@ -69,8 +69,8 @@ class Predictor(BasePredictor):
69
69
 
70
70
  Examples:
71
71
  >>> predictor = Predictor()
72
- >>> predictor.setup_model(model_path='sam_model.pt')
73
- >>> predictor.set_image('image.jpg')
72
+ >>> predictor.setup_model(model_path="sam_model.pt")
73
+ >>> predictor.set_image("image.jpg")
74
74
  >>> masks, scores, boxes = predictor.generate()
75
75
  >>> results = predictor.postprocess((masks, scores, boxes), im, orig_img)
76
76
  """
@@ -90,8 +90,8 @@ class Predictor(BasePredictor):
90
90
 
91
91
  Examples:
92
92
  >>> predictor = Predictor(cfg=DEFAULT_CFG)
93
- >>> predictor = Predictor(overrides={'imgsz': 640})
94
- >>> predictor = Predictor(_callbacks={'on_predict_start': custom_callback})
93
+ >>> predictor = Predictor(overrides={"imgsz": 640})
94
+ >>> predictor = Predictor(_callbacks={"on_predict_start": custom_callback})
95
95
  """
96
96
  if overrides is None:
97
97
  overrides = {}
@@ -188,8 +188,8 @@ class Predictor(BasePredictor):
188
188
 
189
189
  Examples:
190
190
  >>> predictor = Predictor()
191
- >>> predictor.setup_model(model_path='sam_model.pt')
192
- >>> predictor.set_image('image.jpg')
191
+ >>> predictor.setup_model(model_path="sam_model.pt")
192
+ >>> predictor.set_image("image.jpg")
193
193
  >>> masks, scores, logits = predictor.inference(im, bboxes=[[0, 0, 100, 100]])
194
194
  """
195
195
  # Override prompts if any stored in self.prompts
@@ -475,8 +475,8 @@ class Predictor(BasePredictor):
475
475
 
476
476
  Examples:
477
477
  >>> predictor = Predictor()
478
- >>> predictor.setup_source('path/to/images')
479
- >>> predictor.setup_source('video.mp4')
478
+ >>> predictor.setup_source("path/to/images")
479
+ >>> predictor.setup_source("video.mp4")
480
480
  >>> predictor.setup_source(None) # Uses default source if available
481
481
 
482
482
  Notes:
@@ -504,8 +504,8 @@ class Predictor(BasePredictor):
504
504
 
505
505
  Examples:
506
506
  >>> predictor = Predictor()
507
- >>> predictor.set_image('path/to/image.jpg')
508
- >>> predictor.set_image(cv2.imread('path/to/image.jpg'))
507
+ >>> predictor.set_image("path/to/image.jpg")
508
+ >>> predictor.set_image(cv2.imread("path/to/image.jpg"))
509
509
 
510
510
  Notes:
511
511
  - This method should be called before performing inference on a new image.
@@ -34,15 +34,19 @@ class DETRLoss(nn.Module):
34
34
  self, nc=80, loss_gain=None, aux_loss=True, use_fl=True, use_vfl=False, use_uni_match=False, uni_match_ind=0
35
35
  ):
36
36
  """
37
- DETR loss function.
37
+ Initialize DETR loss function with customizable components and gains.
38
+
39
+ Uses default loss_gain if not provided. Initializes HungarianMatcher with
40
+ preset cost gains. Supports auxiliary losses and various loss types.
38
41
 
39
42
  Args:
40
- nc (int): The number of classes.
41
- loss_gain (dict): The coefficient of loss.
42
- aux_loss (bool): If 'aux_loss = True', loss at each decoder layer are to be used.
43
- use_vfl (bool): Use VarifocalLoss or not.
44
- use_uni_match (bool): Whether to use a fixed layer to assign labels for auxiliary branch.
45
- uni_match_ind (int): The fixed indices of a layer.
43
+ nc (int): Number of classes.
44
+ loss_gain (dict): Coefficients for different loss components.
45
+ aux_loss (bool): Use auxiliary losses from each decoder layer.
46
+ use_fl (bool): Use FocalLoss.
47
+ use_vfl (bool): Use VarifocalLoss.
48
+ use_uni_match (bool): Use fixed layer for auxiliary branch label assignment.
49
+ uni_match_ind (int): Index of fixed layer for uni_match.
46
50
  """
47
51
  super().__init__()
48
52
 
@@ -82,9 +86,7 @@ class DETRLoss(nn.Module):
82
86
  return {name_class: loss_cls.squeeze() * self.loss_gain["class"]}
83
87
 
84
88
  def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=""):
85
- """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
86
- boxes.
87
- """
89
+ """Computes bounding box and GIoU losses for predicted and ground truth bounding boxes."""
88
90
  # Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
89
91
  name_bbox = f"loss_bbox{postfix}"
90
92
  name_giou = f"loss_giou{postfix}"
@@ -250,14 +252,24 @@ class DETRLoss(nn.Module):
250
252
 
251
253
  def forward(self, pred_bboxes, pred_scores, batch, postfix="", **kwargs):
252
254
  """
255
+ Calculate loss for predicted bounding boxes and scores.
256
+
253
257
  Args:
254
- pred_bboxes (torch.Tensor): [l, b, query, 4]
255
- pred_scores (torch.Tensor): [l, b, query, num_classes]
256
- batch (dict): A dict includes:
257
- gt_cls (torch.Tensor) with shape [num_gts, ],
258
- gt_bboxes (torch.Tensor): [num_gts, 4],
259
- gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
260
- postfix (str): postfix of loss name.
258
+ pred_bboxes (torch.Tensor): Predicted bounding boxes, shape [l, b, query, 4].
259
+ pred_scores (torch.Tensor): Predicted class scores, shape [l, b, query, num_classes].
260
+ batch (dict): Batch information containing:
261
+ cls (torch.Tensor): Ground truth classes, shape [num_gts].
262
+ bboxes (torch.Tensor): Ground truth bounding boxes, shape [num_gts, 4].
263
+ gt_groups (List[int]): Number of ground truths for each image in the batch.
264
+ postfix (str): Postfix for loss names.
265
+ **kwargs (Any): Additional arguments, may include 'match_indices'.
266
+
267
+ Returns:
268
+ (dict): Computed losses, including main and auxiliary (if enabled).
269
+
270
+ Note:
271
+ Uses last elements of pred_bboxes and pred_scores for main loss, and the rest for auxiliary losses if
272
+ self.aux_loss is True.
261
273
  """
262
274
  self.device = pred_bboxes.device
263
275
  match_indices = kwargs.get("match_indices", None)
@@ -32,9 +32,7 @@ class HungarianMatcher(nn.Module):
32
32
  """
33
33
 
34
34
  def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
35
- """Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
36
- gamma factors.
37
- """
35
+ """Initializes a HungarianMatcher module for optimal assignment of predicted and ground truth bounding boxes."""
38
36
  super().__init__()
39
37
  if cost_gain is None:
40
38
  cost_gain = {"class": 1, "bbox": 5, "giou": 2, "mask": 1, "dice": 1}
@@ -70,7 +68,6 @@ class HungarianMatcher(nn.Module):
70
68
  For each batch element, it holds:
71
69
  len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
72
70
  """
73
-
74
71
  bs, nq, nc = pred_scores.shape
75
72
 
76
73
  if sum(gt_groups) == 0:
@@ -175,7 +172,6 @@ def get_cdn_group(
175
172
  bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn'
176
173
  is less than or equal to 0, the function returns None for all elements in the tuple.
177
174
  """
178
-
179
175
  if (not training) or num_dn <= 0:
180
176
  return None, None, None, None
181
177
  gt_groups = batch["gt_groups"]
@@ -21,7 +21,7 @@ class ClassificationPredictor(BasePredictor):
21
21
  from ultralytics.utils import ASSETS
22
22
  from ultralytics.models.yolo.classify import ClassificationPredictor
23
23
 
24
- args = dict(model='yolov8n-cls.pt', source=ASSETS)
24
+ args = dict(model="yolov8n-cls.pt", source=ASSETS)
25
25
  predictor = ClassificationPredictor(overrides=args)
26
26
  predictor.predict_cli()
27
27
  ```
@@ -22,7 +22,7 @@ class ClassificationTrainer(BaseTrainer):
22
22
  ```python
23
23
  from ultralytics.models.yolo.classify import ClassificationTrainer
24
24
 
25
- args = dict(model='yolov8n-cls.pt', data='imagenet10', epochs=3)
25
+ args = dict(model="yolov8n-cls.pt", data="imagenet10", epochs=3)
26
26
  trainer = ClassificationTrainer(overrides=args)
27
27
  trainer.train()
28
28
  ```
@@ -20,7 +20,7 @@ class ClassificationValidator(BaseValidator):
20
20
  ```python
21
21
  from ultralytics.models.yolo.classify import ClassificationValidator
22
22
 
23
- args = dict(model='yolov8n-cls.pt', data='imagenet10')
23
+ args = dict(model="yolov8n-cls.pt", data="imagenet10")
24
24
  validator = ClassificationValidator(args=args)
25
25
  validator()
26
26
  ```
@@ -14,7 +14,7 @@ class DetectionPredictor(BasePredictor):
14
14
  from ultralytics.utils import ASSETS
15
15
  from ultralytics.models.yolo.detect import DetectionPredictor
16
16
 
17
- args = dict(model='yolov8n.pt', source=ASSETS)
17
+ args = dict(model="yolov8n.pt", source=ASSETS)
18
18
  predictor = DetectionPredictor(overrides=args)
19
19
  predictor.predict_cli()
20
20
  ```
@@ -24,7 +24,7 @@ class DetectionTrainer(BaseTrainer):
24
24
  ```python
25
25
  from ultralytics.models.yolo.detect import DetectionTrainer
26
26
 
27
- args = dict(model='yolov8n.pt', data='coco8.yaml', epochs=3)
27
+ args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3)
28
28
  trainer = DetectionTrainer(overrides=args)
29
29
  trainer.train()
30
30
  ```
@@ -22,7 +22,7 @@ class DetectionValidator(BaseValidator):
22
22
  ```python
23
23
  from ultralytics.models.yolo.detect import DetectionValidator
24
24
 
25
- args = dict(model='yolov8n.pt', data='coco8.yaml')
25
+ args = dict(model="yolov8n.pt", data="coco8.yaml")
26
26
  validator = DetectionValidator(args=args)
27
27
  validator()
28
28
  ```
@@ -64,10 +64,14 @@ class YOLOWorld(Model):
64
64
 
65
65
  def __init__(self, model="yolov8s-world.pt", verbose=False) -> None:
66
66
  """
67
- Initializes the YOLOv8-World model with the given pre-trained model file. Supports *.pt and *.yaml formats.
67
+ Initialize YOLOv8-World model with a pre-trained model file.
68
+
69
+ Loads a YOLOv8-World model for object detection. If no custom class names are provided, it assigns default
70
+ COCO class names.
68
71
 
69
72
  Args:
70
- model (str | Path): Path to the pre-trained model. Defaults to 'yolov8s-world.pt'.
73
+ model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
74
+ verbose (bool): If True, prints additional information during initialization.
71
75
  """
72
76
  super().__init__(model=model, task="detect", verbose=verbose)
73
77
 
@@ -16,7 +16,7 @@ class OBBPredictor(DetectionPredictor):
16
16
  from ultralytics.utils import ASSETS
17
17
  from ultralytics.models.yolo.obb import OBBPredictor
18
18
 
19
- args = dict(model='yolov8n-obb.pt', source=ASSETS)
19
+ args = dict(model="yolov8n-obb.pt", source=ASSETS)
20
20
  predictor = OBBPredictor(overrides=args)
21
21
  predictor.predict_cli()
22
22
  ```
@@ -15,7 +15,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
15
15
  ```python
16
16
  from ultralytics.models.yolo.obb import OBBTrainer
17
17
 
18
- args = dict(model='yolov8n-obb.pt', data='dota8.yaml', epochs=3)
18
+ args = dict(model="yolov8n-obb.pt", data="dota8.yaml", epochs=3)
19
19
  trainer = OBBTrainer(overrides=args)
20
20
  trainer.train()
21
21
  ```
@@ -18,9 +18,9 @@ class OBBValidator(DetectionValidator):
18
18
  ```python
19
19
  from ultralytics.models.yolo.obb import OBBValidator
20
20
 
21
- args = dict(model='yolov8n-obb.pt', data='dota8.yaml')
21
+ args = dict(model="yolov8n-obb.pt", data="dota8.yaml")
22
22
  validator = OBBValidator(args=args)
23
- validator(model=args['model'])
23
+ validator(model=args["model"])
24
24
  ```
25
25
  """
26
26
 
@@ -14,7 +14,7 @@ class PosePredictor(DetectionPredictor):
14
14
  from ultralytics.utils import ASSETS
15
15
  from ultralytics.models.yolo.pose import PosePredictor
16
16
 
17
- args = dict(model='yolov8n-pose.pt', source=ASSETS)
17
+ args = dict(model="yolov8n-pose.pt", source=ASSETS)
18
18
  predictor = PosePredictor(overrides=args)
19
19
  predictor.predict_cli()
20
20
  ```
@@ -16,7 +16,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
16
16
  ```python
17
17
  from ultralytics.models.yolo.pose import PoseTrainer
18
18
 
19
- args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml', epochs=3)
19
+ args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml", epochs=3)
20
20
  trainer = PoseTrainer(overrides=args)
21
21
  trainer.train()
22
22
  ```
@@ -20,7 +20,7 @@ class PoseValidator(DetectionValidator):
20
20
  ```python
21
21
  from ultralytics.models.yolo.pose import PoseValidator
22
22
 
23
- args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml')
23
+ args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml")
24
24
  validator = PoseValidator(args=args)
25
25
  validator()
26
26
  ```
@@ -14,7 +14,7 @@ class SegmentationPredictor(DetectionPredictor):
14
14
  from ultralytics.utils import ASSETS
15
15
  from ultralytics.models.yolo.segment import SegmentationPredictor
16
16
 
17
- args = dict(model='yolov8n-seg.pt', source=ASSETS)
17
+ args = dict(model="yolov8n-seg.pt", source=ASSETS)
18
18
  predictor = SegmentationPredictor(overrides=args)
19
19
  predictor.predict_cli()
20
20
  ```
@@ -16,7 +16,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
16
16
  ```python
17
17
  from ultralytics.models.yolo.segment import SegmentationTrainer
18
18
 
19
- args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3)
19
+ args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml", epochs=3)
20
20
  trainer = SegmentationTrainer(overrides=args)
21
21
  trainer.train()
22
22
  ```
@@ -22,7 +22,7 @@ class SegmentationValidator(DetectionValidator):
22
22
  ```python
23
23
  from ultralytics.models.yolo.segment import SegmentationValidator
24
24
 
25
- args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml')
25
+ args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml")
26
26
  validator = SegmentationValidator(args=args)
27
27
  validator()
28
28
  ```
@@ -29,7 +29,7 @@ class WorldTrainer(yolo.detect.DetectionTrainer):
29
29
  ```python
30
30
  from ultralytics.models.yolo.world import WorldModel
31
31
 
32
- args = dict(model='yolov8s-world.pt', data='coco8.yaml', epochs=3)
32
+ args = dict(model="yolov8s-world.pt", data="coco8.yaml", epochs=3)
33
33
  trainer = WorldTrainer(overrides=args)
34
34
  trainer.train()
35
35
  ```
@@ -641,8 +641,8 @@ class AutoBackend(nn.Module):
641
641
  @staticmethod
642
642
  def _model_type(p="path/to/model.pt"):
643
643
  """
644
- This function takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml,
645
- engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
644
+ Takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml, engine, coreml,
645
+ saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
646
646
 
647
647
  Args:
648
648
  p: path to the model file. Defaults to path/to/model.pt
@@ -11,9 +11,9 @@ Example:
11
11
 
12
12
  x = torch.ones(1, 128, 40, 40)
13
13
  m = Conv(128, 128)
14
- f = f'{m._get_name()}.onnx'
14
+ f = f"{m._get_name()}.onnx"
15
15
  torch.onnx.export(m, x, f)
16
- os.system(f'onnxslim {f} {f} && open {f}') # pip install onnxslim
16
+ os.system(f"onnxslim {f} {f} && open {f}") # pip install onnxslim
17
17
  ```
18
18
  """
19
19
 
@@ -204,9 +204,7 @@ class C2(nn.Module):
204
204
  """CSP Bottleneck with 2 convolutions."""
205
205
 
206
206
  def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
207
- """Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut,
208
- groups, expansion.
209
- """
207
+ """Initializes a CSP Bottleneck with 2 convolutions and optional shortcut connection."""
210
208
  super().__init__()
211
209
  self.c = int(c2 * e) # hidden channels
212
210
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@@ -224,9 +222,7 @@ class C2f(nn.Module):
224
222
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
225
223
 
226
224
  def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
227
- """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
228
- expansion.
229
- """
225
+ """Initializes a CSP bottleneck with 2 convolutions and n Bottleneck blocks for faster processing."""
230
226
  super().__init__()
231
227
  self.c = int(c2 * e) # hidden channels
232
228
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@@ -335,9 +331,7 @@ class Bottleneck(nn.Module):
335
331
  """Standard bottleneck."""
336
332
 
337
333
  def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
338
- """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
339
- expansion.
340
- """
334
+ """Initializes a standard bottleneck module with optional shortcut connection and configurable parameters."""
341
335
  super().__init__()
342
336
  c_ = int(c2 * e) # hidden channels
343
337
  self.cv1 = Conv(c1, c_, k[0], 1)
@@ -345,7 +339,7 @@ class Bottleneck(nn.Module):
345
339
  self.add = shortcut and c1 == c2
346
340
 
347
341
  def forward(self, x):
348
- """'forward()' applies the YOLO FPN to input data."""
342
+ """Applies the YOLO FPN to input data."""
349
343
  return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
350
344
 
351
345
 
@@ -449,9 +443,7 @@ class C2fAttn(nn.Module):
449
443
  """C2f module with an additional attn module."""
450
444
 
451
445
  def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
452
- """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
453
- expansion.
454
- """
446
+ """Initializes C2f module with attention mechanism for enhanced feature extraction and processing."""
455
447
  super().__init__()
456
448
  self.c = int(c2 * e) # hidden channels
457
449
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@@ -521,9 +513,7 @@ class ImagePoolingAttn(nn.Module):
521
513
 
522
514
 
523
515
  class ContrastiveHead(nn.Module):
524
- """Contrastive Head for YOLO-World compute the region-text scores according to the similarity between image and text
525
- features.
526
- """
516
+ """Implements contrastive learning head for region-text similarity in vision-language models."""
527
517
 
528
518
  def __init__(self):
529
519
  """Initializes ContrastiveHead with specified region-text similarity parameters."""
@@ -569,16 +559,14 @@ class RepBottleneck(Bottleneck):
569
559
  """Rep bottleneck."""
570
560
 
571
561
  def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
572
- """Initializes a RepBottleneck module with customizable in/out channels, shortcut option, groups and expansion
573
- ratio.
574
- """
562
+ """Initializes a RepBottleneck module with customizable in/out channels, shortcuts, groups and expansion."""
575
563
  super().__init__(c1, c2, shortcut, g, k, e)
576
564
  c_ = int(c2 * e) # hidden channels
577
565
  self.cv1 = RepConv(c1, c_, k[0], 1)
578
566
 
579
567
 
580
568
  class RepCSP(C3):
581
- """Rep CSP Bottleneck with 3 convolutions."""
569
+ """Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
582
570
 
583
571
  def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
584
572
  """Initializes RepCSP layer with given channels, repetitions, shortcut, groups and expansion ratio."""
@@ -158,9 +158,7 @@ class GhostConv(nn.Module):
158
158
  """Ghost Convolution https://github.com/huawei-noah/ghostnet."""
159
159
 
160
160
  def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
161
- """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
162
- activation.
163
- """
161
+ """Initializes Ghost Convolution module with primary and cheap operations for efficient feature learning."""
164
162
  super().__init__()
165
163
  c_ = c2 // 2 # hidden channels
166
164
  self.cv1 = Conv(c1, c_, k, s, None, g, act=act)