ultralytics 8.0.195__py3-none-any.whl → 8.0.196__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (84) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/cfg/__init__.py +5 -6
  3. ultralytics/data/augment.py +234 -29
  4. ultralytics/data/base.py +2 -1
  5. ultralytics/data/build.py +9 -3
  6. ultralytics/data/converter.py +5 -2
  7. ultralytics/data/dataset.py +16 -2
  8. ultralytics/data/loaders.py +111 -7
  9. ultralytics/data/utils.py +3 -3
  10. ultralytics/engine/exporter.py +1 -3
  11. ultralytics/engine/model.py +3 -9
  12. ultralytics/engine/predictor.py +10 -6
  13. ultralytics/engine/results.py +18 -8
  14. ultralytics/engine/trainer.py +19 -31
  15. ultralytics/engine/tuner.py +20 -20
  16. ultralytics/engine/validator.py +3 -4
  17. ultralytics/hub/__init__.py +2 -2
  18. ultralytics/hub/auth.py +18 -3
  19. ultralytics/hub/session.py +1 -0
  20. ultralytics/hub/utils.py +1 -3
  21. ultralytics/models/fastsam/model.py +2 -1
  22. ultralytics/models/fastsam/predict.py +2 -0
  23. ultralytics/models/fastsam/prompt.py +15 -1
  24. ultralytics/models/nas/model.py +3 -1
  25. ultralytics/models/rtdetr/model.py +4 -6
  26. ultralytics/models/rtdetr/predict.py +2 -1
  27. ultralytics/models/rtdetr/train.py +2 -1
  28. ultralytics/models/rtdetr/val.py +1 -0
  29. ultralytics/models/sam/amg.py +12 -6
  30. ultralytics/models/sam/model.py +5 -6
  31. ultralytics/models/sam/modules/decoders.py +5 -1
  32. ultralytics/models/sam/modules/encoders.py +15 -12
  33. ultralytics/models/sam/modules/tiny_encoder.py +38 -2
  34. ultralytics/models/sam/modules/transformer.py +2 -4
  35. ultralytics/models/sam/predict.py +8 -4
  36. ultralytics/models/utils/loss.py +35 -8
  37. ultralytics/models/utils/ops.py +14 -18
  38. ultralytics/models/yolo/classify/predict.py +1 -0
  39. ultralytics/models/yolo/classify/train.py +4 -2
  40. ultralytics/models/yolo/classify/val.py +1 -0
  41. ultralytics/models/yolo/detect/train.py +4 -3
  42. ultralytics/models/yolo/model.py +2 -4
  43. ultralytics/models/yolo/pose/predict.py +1 -0
  44. ultralytics/models/yolo/segment/predict.py +2 -0
  45. ultralytics/models/yolo/segment/val.py +1 -1
  46. ultralytics/nn/autobackend.py +45 -32
  47. ultralytics/nn/modules/__init__.py +13 -9
  48. ultralytics/nn/modules/block.py +11 -5
  49. ultralytics/nn/modules/conv.py +16 -7
  50. ultralytics/nn/modules/head.py +6 -3
  51. ultralytics/nn/modules/transformer.py +47 -15
  52. ultralytics/nn/modules/utils.py +6 -4
  53. ultralytics/nn/tasks.py +61 -21
  54. ultralytics/trackers/bot_sort.py +53 -6
  55. ultralytics/trackers/byte_tracker.py +71 -15
  56. ultralytics/trackers/track.py +0 -1
  57. ultralytics/trackers/utils/gmc.py +23 -0
  58. ultralytics/trackers/utils/kalman_filter.py +6 -6
  59. ultralytics/utils/__init__.py +31 -18
  60. ultralytics/utils/autobatch.py +1 -3
  61. ultralytics/utils/benchmarks.py +14 -1
  62. ultralytics/utils/callbacks/base.py +1 -3
  63. ultralytics/utils/callbacks/comet.py +11 -3
  64. ultralytics/utils/callbacks/dvc.py +9 -0
  65. ultralytics/utils/callbacks/neptune.py +5 -6
  66. ultralytics/utils/callbacks/wb.py +1 -0
  67. ultralytics/utils/checks.py +13 -9
  68. ultralytics/utils/dist.py +2 -1
  69. ultralytics/utils/downloads.py +7 -3
  70. ultralytics/utils/files.py +3 -3
  71. ultralytics/utils/instance.py +12 -3
  72. ultralytics/utils/loss.py +97 -22
  73. ultralytics/utils/metrics.py +34 -34
  74. ultralytics/utils/ops.py +10 -9
  75. ultralytics/utils/patches.py +9 -7
  76. ultralytics/utils/plotting.py +4 -3
  77. ultralytics/utils/torch_utils.py +8 -6
  78. ultralytics/utils/triton.py +2 -1
  79. {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/METADATA +1 -1
  80. {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/RECORD +84 -84
  81. {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/LICENSE +0 -0
  82. {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/WHEEL +0 -0
  83. {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/entry_points.txt +0 -0
  84. {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/top_level.txt +0 -0
@@ -47,6 +47,7 @@ class DETRLoss(nn.Module):
47
47
  self.device = None
48
48
 
49
49
  def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
50
+ """Computes the classification loss based on predictions, target values, and ground truth scores."""
50
51
  # logits: [b, query, num_classes], gt_class: list[[n, 1]]
51
52
  name_class = f'loss_class{postfix}'
52
53
  bs, nq = pred_scores.shape[:2]
@@ -68,6 +69,9 @@ class DETRLoss(nn.Module):
68
69
  return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
69
70
 
70
71
  def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
72
+ """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
73
+ boxes.
74
+ """
71
75
  # boxes: [b, query, 4], gt_bbox: list[[n, 4]]
72
76
  name_bbox = f'loss_bbox{postfix}'
73
77
  name_giou = f'loss_giou{postfix}'
@@ -125,7 +129,7 @@ class DETRLoss(nn.Module):
125
129
  postfix='',
126
130
  masks=None,
127
131
  gt_mask=None):
128
- """Get auxiliary losses"""
132
+ """Get auxiliary losses."""
129
133
  # NOTE: loss class, bbox, giou, mask, dice
130
134
  loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
131
135
  if match_indices is None and self.use_uni_match:
@@ -166,12 +170,14 @@ class DETRLoss(nn.Module):
166
170
 
167
171
  @staticmethod
168
172
  def _get_index(match_indices):
173
+ """Returns batch indices, source indices, and destination indices from provided match indices."""
169
174
  batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
170
175
  src_idx = torch.cat([src for (src, _) in match_indices])
171
176
  dst_idx = torch.cat([dst for (_, dst) in match_indices])
172
177
  return (batch_idx, src_idx), dst_idx
173
178
 
174
179
  def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
180
+ """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
175
181
  pred_assigned = torch.cat([
176
182
  t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
177
183
  for t, (I, _) in zip(pred_bboxes, match_indices)])
@@ -190,7 +196,7 @@ class DETRLoss(nn.Module):
190
196
  gt_mask=None,
191
197
  postfix='',
192
198
  match_indices=None):
193
- """Get losses"""
199
+ """Get losses."""
194
200
  if match_indices is None:
195
201
  match_indices = self.matcher(pred_bboxes,
196
202
  pred_scores,
@@ -250,22 +256,43 @@ class DETRLoss(nn.Module):
250
256
 
251
257
 
252
258
  class RTDETRDetectionLoss(DETRLoss):
259
+ """
260
+ Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss.
261
+
262
+ This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as
263
+ an additional denoising training loss when provided with denoising metadata.
264
+ """
253
265
 
254
266
  def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
267
+ """
268
+ Forward pass to compute the detection loss.
269
+
270
+ Args:
271
+ preds (tuple): Predicted bounding boxes and scores.
272
+ batch (dict): Batch data containing ground truth information.
273
+ dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
274
+ dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
275
+ dn_meta (dict, optional): Metadata for denoising. Default is None.
276
+
277
+ Returns:
278
+ (dict): Dictionary containing the total loss and, if applicable, the denoising loss.
279
+ """
255
280
  pred_bboxes, pred_scores = preds
256
281
  total_loss = super().forward(pred_bboxes, pred_scores, batch)
257
282
 
283
+ # Check for denoising metadata to compute denoising training loss
258
284
  if dn_meta is not None:
259
285
  dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
260
286
  assert len(batch['gt_groups']) == len(dn_pos_idx)
261
287
 
262
- # Denoising match indices
288
+ # Get the match indices for denoising
263
289
  match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
264
290
 
265
- # Compute denoising training loss
291
+ # Compute the denoising training loss
266
292
  dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
267
293
  total_loss.update(dn_loss)
268
294
  else:
295
+ # If no denoising metadata is provided, set denoising loss to zero
269
296
  total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
270
297
 
271
298
  return total_loss
@@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss):
276
303
  Get the match indices for denoising.
277
304
 
278
305
  Args:
279
- dn_pos_idx (List[torch.Tensor]): A list includes positive indices of denoising.
280
- dn_num_group (int): The number of groups of denoising.
281
- gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
306
+ dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
307
+ dn_num_group (int): Number of denoising groups.
308
+ gt_groups (List[int]): List of integers representing the number of ground truths for each image.
282
309
 
283
310
  Returns:
284
- dn_match_indices (List(tuple)): Matched indices.
311
+ (List[tuple]): List of tuples containing matched indices for denoising.
285
312
  """
286
313
  dn_match_indices = []
287
314
  idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
@@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
11
11
 
12
12
  class HungarianMatcher(nn.Module):
13
13
  """
14
- A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in
15
- an end-to-end fashion.
14
+ A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
15
+ end-to-end fashion.
16
16
 
17
17
  HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
18
18
  function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
@@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module):
32
32
  """
33
33
 
34
34
  def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
35
+ """Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
36
+ gamma factors.
37
+ """
35
38
  super().__init__()
36
39
  if cost_gain is None:
37
40
  cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
@@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module):
45
48
  def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
46
49
  """
47
50
  Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
48
- (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching
49
- between predictions and ground truth based on these costs.
51
+ (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
52
+ predictions and ground truth based on these costs.
50
53
 
51
54
  Args:
52
55
  pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
@@ -153,9 +156,9 @@ def get_cdn_group(batch,
153
156
  box_noise_scale=1.0,
154
157
  training=False):
155
158
  """
156
- Get contrastive denoising training group. This function creates a contrastive denoising training group with
157
- positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding
158
- box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information.
159
+ Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
160
+ and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
161
+ and returns the modified labels, bounding boxes, attention mask and meta information.
159
162
 
160
163
  Args:
161
164
  batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
@@ -191,12 +194,12 @@ def get_cdn_group(batch,
191
194
  gt_bbox = batch['bboxes'] # bs*num, 4
192
195
  b_idx = batch['batch_idx']
193
196
 
194
- # each group has positive and negative queries.
197
+ # Each group has positive and negative queries.
195
198
  dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, )
196
199
  dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4
197
200
  dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, )
198
201
 
199
- # positive and negative mask
202
+ # Positive and negative mask
200
203
  # (bs*num*num_group, ), the second total_num*num_group part as negative samples
201
204
  neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
202
205
 
@@ -220,10 +223,9 @@ def get_cdn_group(batch,
220
223
  known_bbox += rand_part * diff
221
224
  known_bbox.clip_(min=0.0, max=1.0)
222
225
  dn_bbox = xyxy2xywh(known_bbox)
223
- dn_bbox = inverse_sigmoid(dn_bbox)
226
+ dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid
224
227
 
225
- # total denoising queries
226
- num_dn = int(max_nums * 2 * num_group)
228
+ num_dn = int(max_nums * 2 * num_group) # total denoising queries
227
229
  # class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
228
230
  dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256
229
231
  padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
@@ -256,9 +258,3 @@ def get_cdn_group(batch,
256
258
 
257
259
  return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
258
260
  class_embed.device), dn_meta
259
-
260
-
261
- def inverse_sigmoid(x, eps=1e-6):
262
- """Inverse sigmoid function."""
263
- x = x.clip(min=0., max=1.)
264
- return torch.log(x / (1 - x + eps) + eps)
@@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor):
26
26
  """
27
27
 
28
28
  def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
29
+ """Initializes ClassificationPredictor setting the task to 'classify'."""
29
30
  super().__init__(cfg, overrides, _callbacks)
30
31
  self.args.task = 'classify'
31
32
 
@@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer):
79
79
  return ckpt
80
80
 
81
81
  def build_dataset(self, img_path, mode='train', batch=None):
82
+ """Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
82
83
  return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode)
83
84
 
84
85
  def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
@@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer):
113
114
 
114
115
  def label_loss_items(self, loss_items=None, prefix='train'):
115
116
  """
116
- Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
117
- segmentation & detection
117
+ Returns a loss dict with labelled training loss items tensor.
118
+
119
+ Not needed for classification but necessary for segmentation & detection
118
120
  """
119
121
  keys = [f'{prefix}/{x}' for x in self.loss_names]
120
122
  if loss_items is None:
@@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator):
78
78
  return self.metrics.results_dict
79
79
 
80
80
  def build_dataset(self, img_path):
81
+ """Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters."""
81
82
  return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
82
83
 
83
84
  def get_dataloader(self, dataset_path, batch_size):
@@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer):
57
57
  return batch
58
58
 
59
59
  def set_model_attributes(self):
60
- """nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
60
+ """Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
61
61
  # self.args.box *= 3 / nl # scale to layers
62
62
  # self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
63
63
  # self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
@@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer):
80
80
 
81
81
  def label_loss_items(self, loss_items=None, prefix='train'):
82
82
  """
83
- Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
84
- segmentation & detection
83
+ Returns a loss dict with labelled training loss items tensor.
84
+
85
+ Not needed for classification but necessary for segmentation & detection
85
86
  """
86
87
  keys = [f'{prefix}/{x}' for x in self.loss_names]
87
88
  if loss_items is not None:
@@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel,
6
6
 
7
7
 
8
8
  class YOLO(Model):
9
- """
10
- YOLO (You Only Look Once) object detection model.
11
- """
9
+ """YOLO (You Only Look Once) object detection model."""
12
10
 
13
11
  @property
14
12
  def task_map(self):
15
- """Map head to model, trainer, validator, and predictor classes"""
13
+ """Map head to model, trainer, validator, and predictor classes."""
16
14
  return {
17
15
  'classify': {
18
16
  'model': ClassificationModel,
@@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor):
21
21
  """
22
22
 
23
23
  def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
24
+ """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
24
25
  super().__init__(cfg, overrides, _callbacks)
25
26
  self.args.task = 'pose'
26
27
  if isinstance(self.args.device, str) and self.args.device.lower() == 'mps':
@@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor):
21
21
  """
22
22
 
23
23
  def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
24
+ """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
24
25
  super().__init__(cfg, overrides, _callbacks)
25
26
  self.args.task = 'segment'
26
27
 
27
28
  def postprocess(self, preds, img, orig_imgs):
29
+ """Applies non-max suppression and processes detections for each image in an input batch."""
28
30
  p = ops.non_max_suppression(preds[0],
29
31
  self.args.conf,
30
32
  self.args.iou,
@@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator):
144
144
 
145
145
  def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
146
146
  """
147
- Return correct prediction matrix
147
+ Return correct prediction matrix.
148
148
 
149
149
  Args:
150
150
  detections (array[N, 6]), x1, y1, x2, y2, conf, class
@@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
20
20
 
21
21
 
22
22
  def check_class_names(names):
23
- """Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
23
+ """
24
+ Check class names.
25
+
26
+ Map imagenet class codes to human-readable names if required. Convert lists to dicts.
27
+ """
24
28
  if isinstance(names, list): # names is a list
25
29
  names = dict(enumerate(names)) # convert to dict
26
30
  if isinstance(names, dict):
@@ -37,36 +41,20 @@ def check_class_names(names):
37
41
 
38
42
 
39
43
  class AutoBackend(nn.Module):
44
+ """
45
+ Handles dynamic backend selection for running inference using Ultralytics YOLO models.
40
46
 
41
- @torch.no_grad()
42
- def __init__(self,
43
- weights='yolov8n.pt',
44
- device=torch.device('cpu'),
45
- dnn=False,
46
- data=None,
47
- fp16=False,
48
- fuse=True,
49
- verbose=True):
50
- """
51
- MultiBackend class for python inference on various platforms using Ultralytics YOLO.
47
+ The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
48
+ range of formats, each with specific naming conventions as outlined below:
52
49
 
53
- Args:
54
- weights (str): The path to the weights file. Default: 'yolov8n.pt'
55
- device (torch.device): The device to run the model on.
56
- dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
57
- data (str | Path | optional): Additional data.yaml file for class names.
58
- fp16 (bool): If True, use half precision. Default: False
59
- fuse (bool): Whether to fuse the model or not. Default: True
60
- verbose (bool): Whether to run in verbose mode or not. Default: True
61
-
62
- Supported formats and their naming conventions:
63
- | Format | Suffix |
50
+ Supported Formats and Naming Conventions:
51
+ | Format | File Suffix |
64
52
  |-----------------------|------------------|
65
53
  | PyTorch | *.pt |
66
54
  | TorchScript | *.torchscript |
67
55
  | ONNX Runtime | *.onnx |
68
- | ONNX OpenCV DNN | *.onnx dnn=True |
69
- | OpenVINO | *.xml |
56
+ | ONNX OpenCV DNN | *.onnx (dnn=True)|
57
+ | OpenVINO | *openvino_model/ |
70
58
  | CoreML | *.mlpackage |
71
59
  | TensorRT | *.engine |
72
60
  | TensorFlow SavedModel | *_saved_model |
@@ -75,6 +63,31 @@ class AutoBackend(nn.Module):
75
63
  | TensorFlow Edge TPU | *_edgetpu.tflite |
76
64
  | PaddlePaddle | *_paddle_model |
77
65
  | ncnn | *_ncnn_model |
66
+
67
+ This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
68
+ models across various platforms.
69
+ """
70
+
71
+ @torch.no_grad()
72
+ def __init__(self,
73
+ weights='yolov8n.pt',
74
+ device=torch.device('cpu'),
75
+ dnn=False,
76
+ data=None,
77
+ fp16=False,
78
+ fuse=True,
79
+ verbose=True):
80
+ """
81
+ Initialize the AutoBackend for inference.
82
+
83
+ Args:
84
+ weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
85
+ device (torch.device): Device to run the model on. Defaults to CPU.
86
+ dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
87
+ data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
88
+ fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
89
+ fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
90
+ verbose (bool): Enable verbose logging. Defaults to True.
78
91
  """
79
92
  super().__init__()
80
93
  w = str(weights[0] if isinstance(weights, list) else weights)
@@ -440,14 +453,14 @@ class AutoBackend(nn.Module):
440
453
 
441
454
  def from_numpy(self, x):
442
455
  """
443
- Convert a numpy array to a tensor.
456
+ Convert a numpy array to a tensor.
444
457
 
445
- Args:
446
- x (np.ndarray): The array to be converted.
458
+ Args:
459
+ x (np.ndarray): The array to be converted.
447
460
 
448
- Returns:
449
- (torch.Tensor): The converted tensor
450
- """
461
+ Returns:
462
+ (torch.Tensor): The converted tensor
463
+ """
451
464
  return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
452
465
 
453
466
  def warmup(self, imgsz=(1, 3, 640, 640)):
@@ -476,7 +489,7 @@ class AutoBackend(nn.Module):
476
489
  @staticmethod
477
490
  def _model_type(p='path/to/model.pt'):
478
491
  """
479
- This function takes a path to a model file and returns the model type
492
+ This function takes a path to a model file and returns the model type.
480
493
 
481
494
  Args:
482
495
  p: path to the model file. Defaults to path/to/model.pt
@@ -1,16 +1,20 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
2
  """
3
- Ultralytics modules. Visualize with:
3
+ Ultralytics modules.
4
4
 
5
- from ultralytics.nn.modules import *
6
- import torch
7
- import os
5
+ Example:
6
+ Visualize a module with Netron.
7
+ ```python
8
+ from ultralytics.nn.modules import *
9
+ import torch
10
+ import os
8
11
 
9
- x = torch.ones(1, 128, 40, 40)
10
- m = Conv(128, 128)
11
- f = f'{m._get_name()}.onnx'
12
- torch.onnx.export(m, x, f)
13
- os.system(f'onnxsim {f} {f} && open {f}')
12
+ x = torch.ones(1, 128, 40, 40)
13
+ m = Conv(128, 128)
14
+ f = f'{m._get_name()}.onnx'
15
+ torch.onnx.export(m, x, f)
16
+ os.system(f'onnxsim {f} {f} && open {f}')
17
+ ```
14
18
  """
15
19
 
16
20
  from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,
@@ -1,7 +1,5 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
- """
3
- Block modules
4
- """
2
+ """Block modules."""
5
3
 
6
4
  import torch
7
5
  import torch.nn as nn
@@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
17
15
  class DFL(nn.Module):
18
16
  """
19
17
  Integral module of Distribution Focal Loss (DFL).
18
+
20
19
  Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
21
20
  """
22
21
 
@@ -51,11 +50,14 @@ class Proto(nn.Module):
51
50
 
52
51
 
53
52
  class HGStem(nn.Module):
54
- """StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
53
+ """
54
+ StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
55
+
55
56
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
56
57
  """
57
58
 
58
59
  def __init__(self, c1, cm, c2):
60
+ """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
59
61
  super().__init__()
60
62
  self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
61
63
  self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
@@ -79,11 +81,14 @@ class HGStem(nn.Module):
79
81
 
80
82
 
81
83
  class HGBlock(nn.Module):
82
- """HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
84
+ """
85
+ HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
86
+
83
87
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
84
88
  """
85
89
 
86
90
  def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
91
+ """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
87
92
  super().__init__()
88
93
  block = LightConv if lightconv else Conv
89
94
  self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
@@ -218,6 +223,7 @@ class RepC3(nn.Module):
218
223
  """Rep C3."""
219
224
 
220
225
  def __init__(self, c1, c2, n=3, e=1.0):
226
+ """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
221
227
  super().__init__()
222
228
  c_ = int(c2 * e) # hidden channels
223
229
  self.cv1 = Conv(c1, c2, 1, 1)
@@ -1,7 +1,5 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
- """
3
- Convolution modules
4
- """
2
+ """Convolution modules."""
5
3
 
6
4
  import math
7
5
 
@@ -69,7 +67,9 @@ class Conv2(Conv):
69
67
 
70
68
 
71
69
  class LightConv(nn.Module):
72
- """Light convolution with args(ch_in, ch_out, kernel).
70
+ """
71
+ Light convolution with args(ch_in, ch_out, kernel).
72
+
73
73
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
74
74
  """
75
75
 
@@ -148,12 +148,15 @@ class GhostConv(nn.Module):
148
148
 
149
149
  class RepConv(nn.Module):
150
150
  """
151
- RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR.
151
+ RepConv is a basic rep-style block, including training and deploy status.
152
+
153
+ This module is used in RT-DETR.
152
154
  Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
153
155
  """
154
156
  default_act = nn.SiLU() # default activation
155
157
 
156
158
  def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
159
+ """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
157
160
  super().__init__()
158
161
  assert k == 3 and p == 1
159
162
  self.g = g
@@ -166,27 +169,30 @@ class RepConv(nn.Module):
166
169
  self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
167
170
 
168
171
  def forward_fuse(self, x):
169
- """Forward process"""
172
+ """Forward process."""
170
173
  return self.act(self.conv(x))
171
174
 
172
175
  def forward(self, x):
173
- """Forward process"""
176
+ """Forward process."""
174
177
  id_out = 0 if self.bn is None else self.bn(x)
175
178
  return self.act(self.conv1(x) + self.conv2(x) + id_out)
176
179
 
177
180
  def get_equivalent_kernel_bias(self):
181
+ """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
178
182
  kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
179
183
  kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
180
184
  kernelid, biasid = self._fuse_bn_tensor(self.bn)
181
185
  return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
182
186
 
183
187
  def _pad_1x1_to_3x3_tensor(self, kernel1x1):
188
+ """Pads a 1x1 tensor to a 3x3 tensor."""
184
189
  if kernel1x1 is None:
185
190
  return 0
186
191
  else:
187
192
  return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
188
193
 
189
194
  def _fuse_bn_tensor(self, branch):
195
+ """Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
190
196
  if branch is None:
191
197
  return 0, 0
192
198
  if isinstance(branch, Conv):
@@ -214,6 +220,7 @@ class RepConv(nn.Module):
214
220
  return kernel * t, beta - running_mean * gamma / std
215
221
 
216
222
  def fuse_convs(self):
223
+ """Combines two convolution layers into a single layer and removes unused attributes from the class."""
217
224
  if hasattr(self, 'conv'):
218
225
  return
219
226
  kernel, bias = self.get_equivalent_kernel_bias()
@@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
243
250
  """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
244
251
 
245
252
  def __init__(self, channels: int) -> None:
253
+ """Initializes the class and sets the basic configurations and instance variables required."""
246
254
  super().__init__()
247
255
  self.pool = nn.AdaptiveAvgPool2d(1)
248
256
  self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
249
257
  self.act = nn.Sigmoid()
250
258
 
251
259
  def forward(self, x: torch.Tensor) -> torch.Tensor:
260
+ """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
252
261
  return x * self.act(self.fc(self.pool(x)))
253
262
 
254
263
 
@@ -1,7 +1,5 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
- """
3
- Model head modules
4
- """
2
+ """Model head modules."""
5
3
 
6
4
  import math
7
5
 
@@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
229
227
  self._reset_parameters()
230
228
 
231
229
  def forward(self, x, batch=None):
230
+ """Runs the forward pass of the module, returning bounding box and classification scores for the input."""
232
231
  from ultralytics.models.utils.ops import get_cdn_group
233
232
 
234
233
  # input projection and embedding
@@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
265
264
  return y if self.export else (y, x)
266
265
 
267
266
  def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
267
+ """Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
268
268
  anchors = []
269
269
  for i, (h, w) in enumerate(shapes):
270
270
  sy = torch.arange(end=h, dtype=dtype, device=device)
@@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
284
284
  return anchors, valid_mask
285
285
 
286
286
  def _get_encoder_input(self, x):
287
+ """Processes and returns encoder inputs by getting projection features from input and concatenating them."""
287
288
  # get projection features
288
289
  x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
289
290
  # get encoder inputs
@@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
301
302
  return feats, shapes
302
303
 
303
304
  def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
305
+ """Generates and prepares the input required for the decoder from the provided features and shapes."""
304
306
  bs = len(feats)
305
307
  # prepare input for decoder
306
308
  anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
@@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
339
341
 
340
342
  # TODO
341
343
  def _reset_parameters(self):
344
+ """Initializes or resets the parameters of the model's various components with predefined weights and biases."""
342
345
  # class and bbox head init
343
346
  bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
344
347
  # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.