ultralytics 8.0.194__py3-none-any.whl → 8.0.196__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (84) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/cfg/__init__.py +5 -6
  3. ultralytics/data/augment.py +234 -29
  4. ultralytics/data/base.py +2 -1
  5. ultralytics/data/build.py +9 -3
  6. ultralytics/data/converter.py +5 -2
  7. ultralytics/data/dataset.py +16 -2
  8. ultralytics/data/loaders.py +111 -7
  9. ultralytics/data/utils.py +3 -3
  10. ultralytics/engine/exporter.py +1 -3
  11. ultralytics/engine/model.py +16 -9
  12. ultralytics/engine/predictor.py +10 -6
  13. ultralytics/engine/results.py +18 -8
  14. ultralytics/engine/trainer.py +19 -31
  15. ultralytics/engine/tuner.py +20 -20
  16. ultralytics/engine/validator.py +3 -4
  17. ultralytics/hub/__init__.py +2 -2
  18. ultralytics/hub/auth.py +18 -3
  19. ultralytics/hub/session.py +1 -0
  20. ultralytics/hub/utils.py +1 -3
  21. ultralytics/models/fastsam/model.py +2 -1
  22. ultralytics/models/fastsam/predict.py +10 -7
  23. ultralytics/models/fastsam/prompt.py +15 -1
  24. ultralytics/models/nas/model.py +3 -1
  25. ultralytics/models/rtdetr/model.py +4 -6
  26. ultralytics/models/rtdetr/predict.py +2 -1
  27. ultralytics/models/rtdetr/train.py +2 -1
  28. ultralytics/models/rtdetr/val.py +1 -0
  29. ultralytics/models/sam/amg.py +12 -6
  30. ultralytics/models/sam/model.py +5 -6
  31. ultralytics/models/sam/modules/decoders.py +5 -1
  32. ultralytics/models/sam/modules/encoders.py +15 -12
  33. ultralytics/models/sam/modules/tiny_encoder.py +38 -2
  34. ultralytics/models/sam/modules/transformer.py +2 -4
  35. ultralytics/models/sam/predict.py +8 -4
  36. ultralytics/models/utils/loss.py +35 -8
  37. ultralytics/models/utils/ops.py +14 -18
  38. ultralytics/models/yolo/classify/predict.py +1 -0
  39. ultralytics/models/yolo/classify/train.py +4 -2
  40. ultralytics/models/yolo/classify/val.py +1 -0
  41. ultralytics/models/yolo/detect/train.py +4 -3
  42. ultralytics/models/yolo/model.py +2 -4
  43. ultralytics/models/yolo/pose/predict.py +1 -0
  44. ultralytics/models/yolo/segment/predict.py +2 -0
  45. ultralytics/models/yolo/segment/val.py +1 -1
  46. ultralytics/nn/autobackend.py +54 -43
  47. ultralytics/nn/modules/__init__.py +13 -9
  48. ultralytics/nn/modules/block.py +11 -5
  49. ultralytics/nn/modules/conv.py +16 -7
  50. ultralytics/nn/modules/head.py +6 -3
  51. ultralytics/nn/modules/transformer.py +47 -15
  52. ultralytics/nn/modules/utils.py +6 -4
  53. ultralytics/nn/tasks.py +61 -21
  54. ultralytics/trackers/bot_sort.py +53 -6
  55. ultralytics/trackers/byte_tracker.py +71 -15
  56. ultralytics/trackers/track.py +0 -1
  57. ultralytics/trackers/utils/gmc.py +23 -0
  58. ultralytics/trackers/utils/kalman_filter.py +6 -6
  59. ultralytics/utils/__init__.py +32 -19
  60. ultralytics/utils/autobatch.py +1 -3
  61. ultralytics/utils/benchmarks.py +14 -1
  62. ultralytics/utils/callbacks/base.py +1 -3
  63. ultralytics/utils/callbacks/comet.py +11 -3
  64. ultralytics/utils/callbacks/dvc.py +9 -0
  65. ultralytics/utils/callbacks/neptune.py +5 -6
  66. ultralytics/utils/callbacks/wb.py +1 -0
  67. ultralytics/utils/checks.py +13 -9
  68. ultralytics/utils/dist.py +2 -1
  69. ultralytics/utils/downloads.py +7 -3
  70. ultralytics/utils/files.py +3 -3
  71. ultralytics/utils/instance.py +12 -3
  72. ultralytics/utils/loss.py +97 -22
  73. ultralytics/utils/metrics.py +35 -34
  74. ultralytics/utils/ops.py +10 -9
  75. ultralytics/utils/patches.py +9 -7
  76. ultralytics/utils/plotting.py +4 -3
  77. ultralytics/utils/torch_utils.py +8 -6
  78. ultralytics/utils/triton.py +87 -0
  79. {ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/METADATA +1 -1
  80. {ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/RECORD +84 -83
  81. {ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/LICENSE +0 -0
  82. {ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/WHEEL +0 -0
  83. {ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/entry_points.txt +0 -0
  84. {ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/top_level.txt +0 -0
@@ -47,6 +47,7 @@ class DETRLoss(nn.Module):
47
47
  self.device = None
48
48
 
49
49
  def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
50
+ """Computes the classification loss based on predictions, target values, and ground truth scores."""
50
51
  # logits: [b, query, num_classes], gt_class: list[[n, 1]]
51
52
  name_class = f'loss_class{postfix}'
52
53
  bs, nq = pred_scores.shape[:2]
@@ -68,6 +69,9 @@ class DETRLoss(nn.Module):
68
69
  return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
69
70
 
70
71
  def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
72
+ """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
73
+ boxes.
74
+ """
71
75
  # boxes: [b, query, 4], gt_bbox: list[[n, 4]]
72
76
  name_bbox = f'loss_bbox{postfix}'
73
77
  name_giou = f'loss_giou{postfix}'
@@ -125,7 +129,7 @@ class DETRLoss(nn.Module):
125
129
  postfix='',
126
130
  masks=None,
127
131
  gt_mask=None):
128
- """Get auxiliary losses"""
132
+ """Get auxiliary losses."""
129
133
  # NOTE: loss class, bbox, giou, mask, dice
130
134
  loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
131
135
  if match_indices is None and self.use_uni_match:
@@ -166,12 +170,14 @@ class DETRLoss(nn.Module):
166
170
 
167
171
  @staticmethod
168
172
  def _get_index(match_indices):
173
+ """Returns batch indices, source indices, and destination indices from provided match indices."""
169
174
  batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
170
175
  src_idx = torch.cat([src for (src, _) in match_indices])
171
176
  dst_idx = torch.cat([dst for (_, dst) in match_indices])
172
177
  return (batch_idx, src_idx), dst_idx
173
178
 
174
179
  def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
180
+ """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
175
181
  pred_assigned = torch.cat([
176
182
  t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
177
183
  for t, (I, _) in zip(pred_bboxes, match_indices)])
@@ -190,7 +196,7 @@ class DETRLoss(nn.Module):
190
196
  gt_mask=None,
191
197
  postfix='',
192
198
  match_indices=None):
193
- """Get losses"""
199
+ """Get losses."""
194
200
  if match_indices is None:
195
201
  match_indices = self.matcher(pred_bboxes,
196
202
  pred_scores,
@@ -250,22 +256,43 @@ class DETRLoss(nn.Module):
250
256
 
251
257
 
252
258
  class RTDETRDetectionLoss(DETRLoss):
259
+ """
260
+ Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss.
261
+
262
+ This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as
263
+ an additional denoising training loss when provided with denoising metadata.
264
+ """
253
265
 
254
266
  def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
267
+ """
268
+ Forward pass to compute the detection loss.
269
+
270
+ Args:
271
+ preds (tuple): Predicted bounding boxes and scores.
272
+ batch (dict): Batch data containing ground truth information.
273
+ dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
274
+ dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
275
+ dn_meta (dict, optional): Metadata for denoising. Default is None.
276
+
277
+ Returns:
278
+ (dict): Dictionary containing the total loss and, if applicable, the denoising loss.
279
+ """
255
280
  pred_bboxes, pred_scores = preds
256
281
  total_loss = super().forward(pred_bboxes, pred_scores, batch)
257
282
 
283
+ # Check for denoising metadata to compute denoising training loss
258
284
  if dn_meta is not None:
259
285
  dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
260
286
  assert len(batch['gt_groups']) == len(dn_pos_idx)
261
287
 
262
- # Denoising match indices
288
+ # Get the match indices for denoising
263
289
  match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
264
290
 
265
- # Compute denoising training loss
291
+ # Compute the denoising training loss
266
292
  dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
267
293
  total_loss.update(dn_loss)
268
294
  else:
295
+ # If no denoising metadata is provided, set denoising loss to zero
269
296
  total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
270
297
 
271
298
  return total_loss
@@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss):
276
303
  Get the match indices for denoising.
277
304
 
278
305
  Args:
279
- dn_pos_idx (List[torch.Tensor]): A list includes positive indices of denoising.
280
- dn_num_group (int): The number of groups of denoising.
281
- gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
306
+ dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
307
+ dn_num_group (int): Number of denoising groups.
308
+ gt_groups (List[int]): List of integers representing the number of ground truths for each image.
282
309
 
283
310
  Returns:
284
- dn_match_indices (List(tuple)): Matched indices.
311
+ (List[tuple]): List of tuples containing matched indices for denoising.
285
312
  """
286
313
  dn_match_indices = []
287
314
  idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
@@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
11
11
 
12
12
  class HungarianMatcher(nn.Module):
13
13
  """
14
- A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in
15
- an end-to-end fashion.
14
+ A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
15
+ end-to-end fashion.
16
16
 
17
17
  HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
18
18
  function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
@@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module):
32
32
  """
33
33
 
34
34
  def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
35
+ """Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
36
+ gamma factors.
37
+ """
35
38
  super().__init__()
36
39
  if cost_gain is None:
37
40
  cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
@@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module):
45
48
  def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
46
49
  """
47
50
  Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
48
- (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching
49
- between predictions and ground truth based on these costs.
51
+ (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
52
+ predictions and ground truth based on these costs.
50
53
 
51
54
  Args:
52
55
  pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
@@ -153,9 +156,9 @@ def get_cdn_group(batch,
153
156
  box_noise_scale=1.0,
154
157
  training=False):
155
158
  """
156
- Get contrastive denoising training group. This function creates a contrastive denoising training group with
157
- positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding
158
- box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information.
159
+ Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
160
+ and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
161
+ and returns the modified labels, bounding boxes, attention mask and meta information.
159
162
 
160
163
  Args:
161
164
  batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
@@ -191,12 +194,12 @@ def get_cdn_group(batch,
191
194
  gt_bbox = batch['bboxes'] # bs*num, 4
192
195
  b_idx = batch['batch_idx']
193
196
 
194
- # each group has positive and negative queries.
197
+ # Each group has positive and negative queries.
195
198
  dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, )
196
199
  dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4
197
200
  dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, )
198
201
 
199
- # positive and negative mask
202
+ # Positive and negative mask
200
203
  # (bs*num*num_group, ), the second total_num*num_group part as negative samples
201
204
  neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
202
205
 
@@ -220,10 +223,9 @@ def get_cdn_group(batch,
220
223
  known_bbox += rand_part * diff
221
224
  known_bbox.clip_(min=0.0, max=1.0)
222
225
  dn_bbox = xyxy2xywh(known_bbox)
223
- dn_bbox = inverse_sigmoid(dn_bbox)
226
+ dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid
224
227
 
225
- # total denoising queries
226
- num_dn = int(max_nums * 2 * num_group)
228
+ num_dn = int(max_nums * 2 * num_group) # total denoising queries
227
229
  # class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
228
230
  dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256
229
231
  padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
@@ -256,9 +258,3 @@ def get_cdn_group(batch,
256
258
 
257
259
  return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
258
260
  class_embed.device), dn_meta
259
-
260
-
261
- def inverse_sigmoid(x, eps=1e-6):
262
- """Inverse sigmoid function."""
263
- x = x.clip(min=0., max=1.)
264
- return torch.log(x / (1 - x + eps) + eps)
@@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor):
26
26
  """
27
27
 
28
28
  def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
29
+ """Initializes ClassificationPredictor setting the task to 'classify'."""
29
30
  super().__init__(cfg, overrides, _callbacks)
30
31
  self.args.task = 'classify'
31
32
 
@@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer):
79
79
  return ckpt
80
80
 
81
81
  def build_dataset(self, img_path, mode='train', batch=None):
82
+ """Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
82
83
  return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode)
83
84
 
84
85
  def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
@@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer):
113
114
 
114
115
  def label_loss_items(self, loss_items=None, prefix='train'):
115
116
  """
116
- Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
117
- segmentation & detection
117
+ Returns a loss dict with labelled training loss items tensor.
118
+
119
+ Not needed for classification but necessary for segmentation & detection
118
120
  """
119
121
  keys = [f'{prefix}/{x}' for x in self.loss_names]
120
122
  if loss_items is None:
@@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator):
78
78
  return self.metrics.results_dict
79
79
 
80
80
  def build_dataset(self, img_path):
81
+ """Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters."""
81
82
  return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
82
83
 
83
84
  def get_dataloader(self, dataset_path, batch_size):
@@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer):
57
57
  return batch
58
58
 
59
59
  def set_model_attributes(self):
60
- """nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
60
+ """Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
61
61
  # self.args.box *= 3 / nl # scale to layers
62
62
  # self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
63
63
  # self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
@@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer):
80
80
 
81
81
  def label_loss_items(self, loss_items=None, prefix='train'):
82
82
  """
83
- Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
84
- segmentation & detection
83
+ Returns a loss dict with labelled training loss items tensor.
84
+
85
+ Not needed for classification but necessary for segmentation & detection
85
86
  """
86
87
  keys = [f'{prefix}/{x}' for x in self.loss_names]
87
88
  if loss_items is not None:
@@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel,
6
6
 
7
7
 
8
8
  class YOLO(Model):
9
- """
10
- YOLO (You Only Look Once) object detection model.
11
- """
9
+ """YOLO (You Only Look Once) object detection model."""
12
10
 
13
11
  @property
14
12
  def task_map(self):
15
- """Map head to model, trainer, validator, and predictor classes"""
13
+ """Map head to model, trainer, validator, and predictor classes."""
16
14
  return {
17
15
  'classify': {
18
16
  'model': ClassificationModel,
@@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor):
21
21
  """
22
22
 
23
23
  def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
24
+ """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
24
25
  super().__init__(cfg, overrides, _callbacks)
25
26
  self.args.task = 'pose'
26
27
  if isinstance(self.args.device, str) and self.args.device.lower() == 'mps':
@@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor):
21
21
  """
22
22
 
23
23
  def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
24
+ """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
24
25
  super().__init__(cfg, overrides, _callbacks)
25
26
  self.args.task = 'segment'
26
27
 
27
28
  def postprocess(self, preds, img, orig_imgs):
29
+ """Applies non-max suppression and processes detections for each image in an input batch."""
28
30
  p = ops.non_max_suppression(preds[0],
29
31
  self.args.conf,
30
32
  self.args.iou,
@@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator):
144
144
 
145
145
  def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
146
146
  """
147
- Return correct prediction matrix
147
+ Return correct prediction matrix.
148
148
 
149
149
  Args:
150
150
  detections (array[N, 6]), x1, y1, x2, y2, conf, class
@@ -7,7 +7,6 @@ import platform
7
7
  import zipfile
8
8
  from collections import OrderedDict, namedtuple
9
9
  from pathlib import Path
10
- from urllib.parse import urlparse
11
10
 
12
11
  import cv2
13
12
  import numpy as np
@@ -21,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
21
20
 
22
21
 
23
22
  def check_class_names(names):
24
- """Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
23
+ """
24
+ Check class names.
25
+
26
+ Map imagenet class codes to human-readable names if required. Convert lists to dicts.
27
+ """
25
28
  if isinstance(names, list): # names is a list
26
29
  names = dict(enumerate(names)) # convert to dict
27
30
  if isinstance(names, dict):
@@ -32,42 +35,26 @@ def check_class_names(names):
32
35
  raise KeyError(f'{n}-class dataset requires class indices 0-{n - 1}, but you have invalid class indices '
33
36
  f'{min(names.keys())}-{max(names.keys())} defined in your dataset YAML.')
34
37
  if isinstance(names[0], str) and names[0].startswith('n0'): # imagenet class codes, i.e. 'n01440764'
35
- map = yaml_load(ROOT / 'cfg/datasets/ImageNet.yaml')['map'] # human-readable names
36
- names = {k: map[v] for k, v in names.items()}
38
+ names_map = yaml_load(ROOT / 'cfg/datasets/ImageNet.yaml')['map'] # human-readable names
39
+ names = {k: names_map[v] for k, v in names.items()}
37
40
  return names
38
41
 
39
42
 
40
43
  class AutoBackend(nn.Module):
44
+ """
45
+ Handles dynamic backend selection for running inference using Ultralytics YOLO models.
41
46
 
42
- @torch.no_grad()
43
- def __init__(self,
44
- weights='yolov8n.pt',
45
- device=torch.device('cpu'),
46
- dnn=False,
47
- data=None,
48
- fp16=False,
49
- fuse=True,
50
- verbose=True):
51
- """
52
- MultiBackend class for python inference on various platforms using Ultralytics YOLO.
47
+ The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
48
+ range of formats, each with specific naming conventions as outlined below:
53
49
 
54
- Args:
55
- weights (str): The path to the weights file. Default: 'yolov8n.pt'
56
- device (torch.device): The device to run the model on.
57
- dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
58
- data (str | Path | optional): Additional data.yaml file for class names.
59
- fp16 (bool): If True, use half precision. Default: False
60
- fuse (bool): Whether to fuse the model or not. Default: True
61
- verbose (bool): Whether to run in verbose mode or not. Default: True
62
-
63
- Supported formats and their naming conventions:
64
- | Format | Suffix |
50
+ Supported Formats and Naming Conventions:
51
+ | Format | File Suffix |
65
52
  |-----------------------|------------------|
66
53
  | PyTorch | *.pt |
67
54
  | TorchScript | *.torchscript |
68
55
  | ONNX Runtime | *.onnx |
69
- | ONNX OpenCV DNN | *.onnx dnn=True |
70
- | OpenVINO | *.xml |
56
+ | ONNX OpenCV DNN | *.onnx (dnn=True)|
57
+ | OpenVINO | *openvino_model/ |
71
58
  | CoreML | *.mlpackage |
72
59
  | TensorRT | *.engine |
73
60
  | TensorFlow SavedModel | *_saved_model |
@@ -76,6 +63,31 @@ class AutoBackend(nn.Module):
76
63
  | TensorFlow Edge TPU | *_edgetpu.tflite |
77
64
  | PaddlePaddle | *_paddle_model |
78
65
  | ncnn | *_ncnn_model |
66
+
67
+ This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
68
+ models across various platforms.
69
+ """
70
+
71
+ @torch.no_grad()
72
+ def __init__(self,
73
+ weights='yolov8n.pt',
74
+ device=torch.device('cpu'),
75
+ dnn=False,
76
+ data=None,
77
+ fp16=False,
78
+ fuse=True,
79
+ verbose=True):
80
+ """
81
+ Initialize the AutoBackend for inference.
82
+
83
+ Args:
84
+ weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
85
+ device (torch.device): Device to run the model on. Defaults to CPU.
86
+ dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
87
+ data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
88
+ fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
89
+ fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
90
+ verbose (bool): Enable verbose logging. Defaults to True.
79
91
  """
80
92
  super().__init__()
81
93
  w = str(weights[0] if isinstance(weights, list) else weights)
@@ -274,13 +286,9 @@ class AutoBackend(nn.Module):
274
286
  net.load_model(str(w.with_suffix('.bin')))
275
287
  metadata = w.parent / 'metadata.yaml'
276
288
  elif triton: # NVIDIA Triton Inference Server
277
- """TODO
278
289
  check_requirements('tritonclient[all]')
279
- from utils.triton import TritonRemoteModel
280
- model = TritonRemoteModel(url=w)
281
- nhwc = model.runtime.startswith("tensorflow")
282
- """
283
- raise NotImplementedError('Triton Inference Server is not currently supported.')
290
+ from ultralytics.utils.triton import TritonRemoteModel
291
+ model = TritonRemoteModel(w)
284
292
  else:
285
293
  from ultralytics.engine.exporter import export_formats
286
294
  raise TypeError(f"model='{w}' is not a supported model format. "
@@ -395,6 +403,7 @@ class AutoBackend(nn.Module):
395
403
  ex.extract(output_name, mat_out)
396
404
  y.append(np.array(mat_out)[None])
397
405
  elif self.triton: # NVIDIA Triton Inference Server
406
+ im = im.cpu().numpy() # torch to numpy
398
407
  y = self.model(im)
399
408
  else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
400
409
  im = im.cpu().numpy()
@@ -444,14 +453,14 @@ class AutoBackend(nn.Module):
444
453
 
445
454
  def from_numpy(self, x):
446
455
  """
447
- Convert a numpy array to a tensor.
456
+ Convert a numpy array to a tensor.
448
457
 
449
- Args:
450
- x (np.ndarray): The array to be converted.
458
+ Args:
459
+ x (np.ndarray): The array to be converted.
451
460
 
452
- Returns:
453
- (torch.Tensor): The converted tensor
454
- """
461
+ Returns:
462
+ (torch.Tensor): The converted tensor
463
+ """
455
464
  return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
456
465
 
457
466
  def warmup(self, imgsz=(1, 3, 640, 640)):
@@ -480,7 +489,7 @@ class AutoBackend(nn.Module):
480
489
  @staticmethod
481
490
  def _model_type(p='path/to/model.pt'):
482
491
  """
483
- This function takes a path to a model file and returns the model type
492
+ This function takes a path to a model file and returns the model type.
484
493
 
485
494
  Args:
486
495
  p: path to the model file. Defaults to path/to/model.pt
@@ -498,6 +507,8 @@ class AutoBackend(nn.Module):
498
507
  if any(types):
499
508
  triton = False
500
509
  else:
501
- url = urlparse(p) # if url may be Triton inference server
502
- triton = all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
510
+ from urllib.parse import urlsplit
511
+ url = urlsplit(p)
512
+ triton = url.netloc and url.path and url.scheme in {'http', 'grfc'}
513
+
503
514
  return types + [triton]
@@ -1,16 +1,20 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
2
  """
3
- Ultralytics modules. Visualize with:
3
+ Ultralytics modules.
4
4
 
5
- from ultralytics.nn.modules import *
6
- import torch
7
- import os
5
+ Example:
6
+ Visualize a module with Netron.
7
+ ```python
8
+ from ultralytics.nn.modules import *
9
+ import torch
10
+ import os
8
11
 
9
- x = torch.ones(1, 128, 40, 40)
10
- m = Conv(128, 128)
11
- f = f'{m._get_name()}.onnx'
12
- torch.onnx.export(m, x, f)
13
- os.system(f'onnxsim {f} {f} && open {f}')
12
+ x = torch.ones(1, 128, 40, 40)
13
+ m = Conv(128, 128)
14
+ f = f'{m._get_name()}.onnx'
15
+ torch.onnx.export(m, x, f)
16
+ os.system(f'onnxsim {f} {f} && open {f}')
17
+ ```
14
18
  """
15
19
 
16
20
  from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,
@@ -1,7 +1,5 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
- """
3
- Block modules
4
- """
2
+ """Block modules."""
5
3
 
6
4
  import torch
7
5
  import torch.nn as nn
@@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
17
15
  class DFL(nn.Module):
18
16
  """
19
17
  Integral module of Distribution Focal Loss (DFL).
18
+
20
19
  Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
21
20
  """
22
21
 
@@ -51,11 +50,14 @@ class Proto(nn.Module):
51
50
 
52
51
 
53
52
  class HGStem(nn.Module):
54
- """StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
53
+ """
54
+ StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
55
+
55
56
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
56
57
  """
57
58
 
58
59
  def __init__(self, c1, cm, c2):
60
+ """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
59
61
  super().__init__()
60
62
  self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
61
63
  self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
@@ -79,11 +81,14 @@ class HGStem(nn.Module):
79
81
 
80
82
 
81
83
  class HGBlock(nn.Module):
82
- """HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
84
+ """
85
+ HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
86
+
83
87
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
84
88
  """
85
89
 
86
90
  def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
91
+ """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
87
92
  super().__init__()
88
93
  block = LightConv if lightconv else Conv
89
94
  self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
@@ -218,6 +223,7 @@ class RepC3(nn.Module):
218
223
  """Rep C3."""
219
224
 
220
225
  def __init__(self, c1, c2, n=3, e=1.0):
226
+ """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
221
227
  super().__init__()
222
228
  c_ = int(c2 * e) # hidden channels
223
229
  self.cv1 = Conv(c1, c2, 1, 1)
@@ -1,7 +1,5 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
- """
3
- Convolution modules
4
- """
2
+ """Convolution modules."""
5
3
 
6
4
  import math
7
5
 
@@ -69,7 +67,9 @@ class Conv2(Conv):
69
67
 
70
68
 
71
69
  class LightConv(nn.Module):
72
- """Light convolution with args(ch_in, ch_out, kernel).
70
+ """
71
+ Light convolution with args(ch_in, ch_out, kernel).
72
+
73
73
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
74
74
  """
75
75
 
@@ -148,12 +148,15 @@ class GhostConv(nn.Module):
148
148
 
149
149
  class RepConv(nn.Module):
150
150
  """
151
- RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR.
151
+ RepConv is a basic rep-style block, including training and deploy status.
152
+
153
+ This module is used in RT-DETR.
152
154
  Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
153
155
  """
154
156
  default_act = nn.SiLU() # default activation
155
157
 
156
158
  def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
159
+ """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
157
160
  super().__init__()
158
161
  assert k == 3 and p == 1
159
162
  self.g = g
@@ -166,27 +169,30 @@ class RepConv(nn.Module):
166
169
  self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
167
170
 
168
171
  def forward_fuse(self, x):
169
- """Forward process"""
172
+ """Forward process."""
170
173
  return self.act(self.conv(x))
171
174
 
172
175
  def forward(self, x):
173
- """Forward process"""
176
+ """Forward process."""
174
177
  id_out = 0 if self.bn is None else self.bn(x)
175
178
  return self.act(self.conv1(x) + self.conv2(x) + id_out)
176
179
 
177
180
  def get_equivalent_kernel_bias(self):
181
+ """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
178
182
  kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
179
183
  kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
180
184
  kernelid, biasid = self._fuse_bn_tensor(self.bn)
181
185
  return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
182
186
 
183
187
  def _pad_1x1_to_3x3_tensor(self, kernel1x1):
188
+ """Pads a 1x1 tensor to a 3x3 tensor."""
184
189
  if kernel1x1 is None:
185
190
  return 0
186
191
  else:
187
192
  return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
188
193
 
189
194
  def _fuse_bn_tensor(self, branch):
195
+ """Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
190
196
  if branch is None:
191
197
  return 0, 0
192
198
  if isinstance(branch, Conv):
@@ -214,6 +220,7 @@ class RepConv(nn.Module):
214
220
  return kernel * t, beta - running_mean * gamma / std
215
221
 
216
222
  def fuse_convs(self):
223
+ """Combines two convolution layers into a single layer and removes unused attributes from the class."""
217
224
  if hasattr(self, 'conv'):
218
225
  return
219
226
  kernel, bias = self.get_equivalent_kernel_bias()
@@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
243
250
  """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
244
251
 
245
252
  def __init__(self, channels: int) -> None:
253
+ """Initializes the class and sets the basic configurations and instance variables required."""
246
254
  super().__init__()
247
255
  self.pool = nn.AdaptiveAvgPool2d(1)
248
256
  self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
249
257
  self.act = nn.Sigmoid()
250
258
 
251
259
  def forward(self, x: torch.Tensor) -> torch.Tensor:
260
+ """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
252
261
  return x * self.act(self.fc(self.pool(x)))
253
262
 
254
263