ultralytics 8.0.195__py3-none-any.whl → 8.0.196__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +5 -6
- ultralytics/data/augment.py +234 -29
- ultralytics/data/base.py +2 -1
- ultralytics/data/build.py +9 -3
- ultralytics/data/converter.py +5 -2
- ultralytics/data/dataset.py +16 -2
- ultralytics/data/loaders.py +111 -7
- ultralytics/data/utils.py +3 -3
- ultralytics/engine/exporter.py +1 -3
- ultralytics/engine/model.py +3 -9
- ultralytics/engine/predictor.py +10 -6
- ultralytics/engine/results.py +18 -8
- ultralytics/engine/trainer.py +19 -31
- ultralytics/engine/tuner.py +20 -20
- ultralytics/engine/validator.py +3 -4
- ultralytics/hub/__init__.py +2 -2
- ultralytics/hub/auth.py +18 -3
- ultralytics/hub/session.py +1 -0
- ultralytics/hub/utils.py +1 -3
- ultralytics/models/fastsam/model.py +2 -1
- ultralytics/models/fastsam/predict.py +2 -0
- ultralytics/models/fastsam/prompt.py +15 -1
- ultralytics/models/nas/model.py +3 -1
- ultralytics/models/rtdetr/model.py +4 -6
- ultralytics/models/rtdetr/predict.py +2 -1
- ultralytics/models/rtdetr/train.py +2 -1
- ultralytics/models/rtdetr/val.py +1 -0
- ultralytics/models/sam/amg.py +12 -6
- ultralytics/models/sam/model.py +5 -6
- ultralytics/models/sam/modules/decoders.py +5 -1
- ultralytics/models/sam/modules/encoders.py +15 -12
- ultralytics/models/sam/modules/tiny_encoder.py +38 -2
- ultralytics/models/sam/modules/transformer.py +2 -4
- ultralytics/models/sam/predict.py +8 -4
- ultralytics/models/utils/loss.py +35 -8
- ultralytics/models/utils/ops.py +14 -18
- ultralytics/models/yolo/classify/predict.py +1 -0
- ultralytics/models/yolo/classify/train.py +4 -2
- ultralytics/models/yolo/classify/val.py +1 -0
- ultralytics/models/yolo/detect/train.py +4 -3
- ultralytics/models/yolo/model.py +2 -4
- ultralytics/models/yolo/pose/predict.py +1 -0
- ultralytics/models/yolo/segment/predict.py +2 -0
- ultralytics/models/yolo/segment/val.py +1 -1
- ultralytics/nn/autobackend.py +45 -32
- ultralytics/nn/modules/__init__.py +13 -9
- ultralytics/nn/modules/block.py +11 -5
- ultralytics/nn/modules/conv.py +16 -7
- ultralytics/nn/modules/head.py +6 -3
- ultralytics/nn/modules/transformer.py +47 -15
- ultralytics/nn/modules/utils.py +6 -4
- ultralytics/nn/tasks.py +61 -21
- ultralytics/trackers/bot_sort.py +53 -6
- ultralytics/trackers/byte_tracker.py +71 -15
- ultralytics/trackers/track.py +0 -1
- ultralytics/trackers/utils/gmc.py +23 -0
- ultralytics/trackers/utils/kalman_filter.py +6 -6
- ultralytics/utils/__init__.py +31 -18
- ultralytics/utils/autobatch.py +1 -3
- ultralytics/utils/benchmarks.py +14 -1
- ultralytics/utils/callbacks/base.py +1 -3
- ultralytics/utils/callbacks/comet.py +11 -3
- ultralytics/utils/callbacks/dvc.py +9 -0
- ultralytics/utils/callbacks/neptune.py +5 -6
- ultralytics/utils/callbacks/wb.py +1 -0
- ultralytics/utils/checks.py +13 -9
- ultralytics/utils/dist.py +2 -1
- ultralytics/utils/downloads.py +7 -3
- ultralytics/utils/files.py +3 -3
- ultralytics/utils/instance.py +12 -3
- ultralytics/utils/loss.py +97 -22
- ultralytics/utils/metrics.py +34 -34
- ultralytics/utils/ops.py +10 -9
- ultralytics/utils/patches.py +9 -7
- ultralytics/utils/plotting.py +4 -3
- ultralytics/utils/torch_utils.py +8 -6
- ultralytics/utils/triton.py +2 -1
- {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/METADATA +1 -1
- {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/RECORD +84 -84
- {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/LICENSE +0 -0
- {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/WHEEL +0 -0
- {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/top_level.txt +0 -0
ultralytics/models/utils/loss.py
CHANGED
|
@@ -47,6 +47,7 @@ class DETRLoss(nn.Module):
|
|
|
47
47
|
self.device = None
|
|
48
48
|
|
|
49
49
|
def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
|
|
50
|
+
"""Computes the classification loss based on predictions, target values, and ground truth scores."""
|
|
50
51
|
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
|
|
51
52
|
name_class = f'loss_class{postfix}'
|
|
52
53
|
bs, nq = pred_scores.shape[:2]
|
|
@@ -68,6 +69,9 @@ class DETRLoss(nn.Module):
|
|
|
68
69
|
return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
|
|
69
70
|
|
|
70
71
|
def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
|
|
72
|
+
"""Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
|
|
73
|
+
boxes.
|
|
74
|
+
"""
|
|
71
75
|
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
|
|
72
76
|
name_bbox = f'loss_bbox{postfix}'
|
|
73
77
|
name_giou = f'loss_giou{postfix}'
|
|
@@ -125,7 +129,7 @@ class DETRLoss(nn.Module):
|
|
|
125
129
|
postfix='',
|
|
126
130
|
masks=None,
|
|
127
131
|
gt_mask=None):
|
|
128
|
-
"""Get auxiliary losses"""
|
|
132
|
+
"""Get auxiliary losses."""
|
|
129
133
|
# NOTE: loss class, bbox, giou, mask, dice
|
|
130
134
|
loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
|
|
131
135
|
if match_indices is None and self.use_uni_match:
|
|
@@ -166,12 +170,14 @@ class DETRLoss(nn.Module):
|
|
|
166
170
|
|
|
167
171
|
@staticmethod
|
|
168
172
|
def _get_index(match_indices):
|
|
173
|
+
"""Returns batch indices, source indices, and destination indices from provided match indices."""
|
|
169
174
|
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
|
|
170
175
|
src_idx = torch.cat([src for (src, _) in match_indices])
|
|
171
176
|
dst_idx = torch.cat([dst for (_, dst) in match_indices])
|
|
172
177
|
return (batch_idx, src_idx), dst_idx
|
|
173
178
|
|
|
174
179
|
def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
|
|
180
|
+
"""Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
|
|
175
181
|
pred_assigned = torch.cat([
|
|
176
182
|
t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
|
|
177
183
|
for t, (I, _) in zip(pred_bboxes, match_indices)])
|
|
@@ -190,7 +196,7 @@ class DETRLoss(nn.Module):
|
|
|
190
196
|
gt_mask=None,
|
|
191
197
|
postfix='',
|
|
192
198
|
match_indices=None):
|
|
193
|
-
"""Get losses"""
|
|
199
|
+
"""Get losses."""
|
|
194
200
|
if match_indices is None:
|
|
195
201
|
match_indices = self.matcher(pred_bboxes,
|
|
196
202
|
pred_scores,
|
|
@@ -250,22 +256,43 @@ class DETRLoss(nn.Module):
|
|
|
250
256
|
|
|
251
257
|
|
|
252
258
|
class RTDETRDetectionLoss(DETRLoss):
|
|
259
|
+
"""
|
|
260
|
+
Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss.
|
|
261
|
+
|
|
262
|
+
This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as
|
|
263
|
+
an additional denoising training loss when provided with denoising metadata.
|
|
264
|
+
"""
|
|
253
265
|
|
|
254
266
|
def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
|
|
267
|
+
"""
|
|
268
|
+
Forward pass to compute the detection loss.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
preds (tuple): Predicted bounding boxes and scores.
|
|
272
|
+
batch (dict): Batch data containing ground truth information.
|
|
273
|
+
dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
|
|
274
|
+
dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
|
|
275
|
+
dn_meta (dict, optional): Metadata for denoising. Default is None.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
(dict): Dictionary containing the total loss and, if applicable, the denoising loss.
|
|
279
|
+
"""
|
|
255
280
|
pred_bboxes, pred_scores = preds
|
|
256
281
|
total_loss = super().forward(pred_bboxes, pred_scores, batch)
|
|
257
282
|
|
|
283
|
+
# Check for denoising metadata to compute denoising training loss
|
|
258
284
|
if dn_meta is not None:
|
|
259
285
|
dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
|
|
260
286
|
assert len(batch['gt_groups']) == len(dn_pos_idx)
|
|
261
287
|
|
|
262
|
-
#
|
|
288
|
+
# Get the match indices for denoising
|
|
263
289
|
match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
|
|
264
290
|
|
|
265
|
-
# Compute denoising training loss
|
|
291
|
+
# Compute the denoising training loss
|
|
266
292
|
dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
|
|
267
293
|
total_loss.update(dn_loss)
|
|
268
294
|
else:
|
|
295
|
+
# If no denoising metadata is provided, set denoising loss to zero
|
|
269
296
|
total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
|
|
270
297
|
|
|
271
298
|
return total_loss
|
|
@@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss):
|
|
|
276
303
|
Get the match indices for denoising.
|
|
277
304
|
|
|
278
305
|
Args:
|
|
279
|
-
dn_pos_idx (List[torch.Tensor]):
|
|
280
|
-
dn_num_group (int):
|
|
281
|
-
gt_groups (List
|
|
306
|
+
dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
|
|
307
|
+
dn_num_group (int): Number of denoising groups.
|
|
308
|
+
gt_groups (List[int]): List of integers representing the number of ground truths for each image.
|
|
282
309
|
|
|
283
310
|
Returns:
|
|
284
|
-
|
|
311
|
+
(List[tuple]): List of tuples containing matched indices for denoising.
|
|
285
312
|
"""
|
|
286
313
|
dn_match_indices = []
|
|
287
314
|
idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
|
ultralytics/models/utils/ops.py
CHANGED
|
@@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
|
|
|
11
11
|
|
|
12
12
|
class HungarianMatcher(nn.Module):
|
|
13
13
|
"""
|
|
14
|
-
A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in
|
|
15
|
-
|
|
14
|
+
A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
|
|
15
|
+
end-to-end fashion.
|
|
16
16
|
|
|
17
17
|
HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
|
|
18
18
|
function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
|
|
@@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module):
|
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
|
|
35
|
+
"""Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
|
|
36
|
+
gamma factors.
|
|
37
|
+
"""
|
|
35
38
|
super().__init__()
|
|
36
39
|
if cost_gain is None:
|
|
37
40
|
cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
|
|
@@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module):
|
|
|
45
48
|
def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
|
|
46
49
|
"""
|
|
47
50
|
Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
|
|
48
|
-
(classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching
|
|
49
|
-
|
|
51
|
+
(classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
|
|
52
|
+
predictions and ground truth based on these costs.
|
|
50
53
|
|
|
51
54
|
Args:
|
|
52
55
|
pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
|
|
@@ -153,9 +156,9 @@ def get_cdn_group(batch,
|
|
|
153
156
|
box_noise_scale=1.0,
|
|
154
157
|
training=False):
|
|
155
158
|
"""
|
|
156
|
-
Get contrastive denoising training group. This function creates a contrastive denoising training group with
|
|
157
|
-
|
|
158
|
-
|
|
159
|
+
Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
|
|
160
|
+
and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
|
|
161
|
+
and returns the modified labels, bounding boxes, attention mask and meta information.
|
|
159
162
|
|
|
160
163
|
Args:
|
|
161
164
|
batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
|
|
@@ -191,12 +194,12 @@ def get_cdn_group(batch,
|
|
|
191
194
|
gt_bbox = batch['bboxes'] # bs*num, 4
|
|
192
195
|
b_idx = batch['batch_idx']
|
|
193
196
|
|
|
194
|
-
#
|
|
197
|
+
# Each group has positive and negative queries.
|
|
195
198
|
dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, )
|
|
196
199
|
dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4
|
|
197
200
|
dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, )
|
|
198
201
|
|
|
199
|
-
#
|
|
202
|
+
# Positive and negative mask
|
|
200
203
|
# (bs*num*num_group, ), the second total_num*num_group part as negative samples
|
|
201
204
|
neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
|
|
202
205
|
|
|
@@ -220,10 +223,9 @@ def get_cdn_group(batch,
|
|
|
220
223
|
known_bbox += rand_part * diff
|
|
221
224
|
known_bbox.clip_(min=0.0, max=1.0)
|
|
222
225
|
dn_bbox = xyxy2xywh(known_bbox)
|
|
223
|
-
dn_bbox =
|
|
226
|
+
dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid
|
|
224
227
|
|
|
225
|
-
# total denoising queries
|
|
226
|
-
num_dn = int(max_nums * 2 * num_group)
|
|
228
|
+
num_dn = int(max_nums * 2 * num_group) # total denoising queries
|
|
227
229
|
# class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
|
|
228
230
|
dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256
|
|
229
231
|
padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
|
|
@@ -256,9 +258,3 @@ def get_cdn_group(batch,
|
|
|
256
258
|
|
|
257
259
|
return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
|
|
258
260
|
class_embed.device), dn_meta
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
def inverse_sigmoid(x, eps=1e-6):
|
|
262
|
-
"""Inverse sigmoid function."""
|
|
263
|
-
x = x.clip(min=0., max=1.)
|
|
264
|
-
return torch.log(x / (1 - x + eps) + eps)
|
|
@@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor):
|
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
28
|
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
|
29
|
+
"""Initializes ClassificationPredictor setting the task to 'classify'."""
|
|
29
30
|
super().__init__(cfg, overrides, _callbacks)
|
|
30
31
|
self.args.task = 'classify'
|
|
31
32
|
|
|
@@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer):
|
|
|
79
79
|
return ckpt
|
|
80
80
|
|
|
81
81
|
def build_dataset(self, img_path, mode='train', batch=None):
|
|
82
|
+
"""Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
|
|
82
83
|
return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode)
|
|
83
84
|
|
|
84
85
|
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
|
|
@@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer):
|
|
|
113
114
|
|
|
114
115
|
def label_loss_items(self, loss_items=None, prefix='train'):
|
|
115
116
|
"""
|
|
116
|
-
Returns a loss dict with labelled training loss items tensor.
|
|
117
|
-
|
|
117
|
+
Returns a loss dict with labelled training loss items tensor.
|
|
118
|
+
|
|
119
|
+
Not needed for classification but necessary for segmentation & detection
|
|
118
120
|
"""
|
|
119
121
|
keys = [f'{prefix}/{x}' for x in self.loss_names]
|
|
120
122
|
if loss_items is None:
|
|
@@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator):
|
|
|
78
78
|
return self.metrics.results_dict
|
|
79
79
|
|
|
80
80
|
def build_dataset(self, img_path):
|
|
81
|
+
"""Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters."""
|
|
81
82
|
return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
|
|
82
83
|
|
|
83
84
|
def get_dataloader(self, dataset_path, batch_size):
|
|
@@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
57
57
|
return batch
|
|
58
58
|
|
|
59
59
|
def set_model_attributes(self):
|
|
60
|
-
"""
|
|
60
|
+
"""Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
|
|
61
61
|
# self.args.box *= 3 / nl # scale to layers
|
|
62
62
|
# self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
|
|
63
63
|
# self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
|
|
@@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer):
|
|
|
80
80
|
|
|
81
81
|
def label_loss_items(self, loss_items=None, prefix='train'):
|
|
82
82
|
"""
|
|
83
|
-
Returns a loss dict with labelled training loss items tensor.
|
|
84
|
-
|
|
83
|
+
Returns a loss dict with labelled training loss items tensor.
|
|
84
|
+
|
|
85
|
+
Not needed for classification but necessary for segmentation & detection
|
|
85
86
|
"""
|
|
86
87
|
keys = [f'{prefix}/{x}' for x in self.loss_names]
|
|
87
88
|
if loss_items is not None:
|
ultralytics/models/yolo/model.py
CHANGED
|
@@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel,
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class YOLO(Model):
|
|
9
|
-
"""
|
|
10
|
-
YOLO (You Only Look Once) object detection model.
|
|
11
|
-
"""
|
|
9
|
+
"""YOLO (You Only Look Once) object detection model."""
|
|
12
10
|
|
|
13
11
|
@property
|
|
14
12
|
def task_map(self):
|
|
15
|
-
"""Map head to model, trainer, validator, and predictor classes"""
|
|
13
|
+
"""Map head to model, trainer, validator, and predictor classes."""
|
|
16
14
|
return {
|
|
17
15
|
'classify': {
|
|
18
16
|
'model': ClassificationModel,
|
|
@@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor):
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
|
24
|
+
"""Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
|
|
24
25
|
super().__init__(cfg, overrides, _callbacks)
|
|
25
26
|
self.args.task = 'pose'
|
|
26
27
|
if isinstance(self.args.device, str) and self.args.device.lower() == 'mps':
|
|
@@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor):
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
|
24
|
+
"""Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
|
|
24
25
|
super().__init__(cfg, overrides, _callbacks)
|
|
25
26
|
self.args.task = 'segment'
|
|
26
27
|
|
|
27
28
|
def postprocess(self, preds, img, orig_imgs):
|
|
29
|
+
"""Applies non-max suppression and processes detections for each image in an input batch."""
|
|
28
30
|
p = ops.non_max_suppression(preds[0],
|
|
29
31
|
self.args.conf,
|
|
30
32
|
self.args.iou,
|
|
@@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
144
144
|
|
|
145
145
|
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
|
|
146
146
|
"""
|
|
147
|
-
Return correct prediction matrix
|
|
147
|
+
Return correct prediction matrix.
|
|
148
148
|
|
|
149
149
|
Args:
|
|
150
150
|
detections (array[N, 6]), x1, y1, x2, y2, conf, class
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def check_class_names(names):
|
|
23
|
-
"""
|
|
23
|
+
"""
|
|
24
|
+
Check class names.
|
|
25
|
+
|
|
26
|
+
Map imagenet class codes to human-readable names if required. Convert lists to dicts.
|
|
27
|
+
"""
|
|
24
28
|
if isinstance(names, list): # names is a list
|
|
25
29
|
names = dict(enumerate(names)) # convert to dict
|
|
26
30
|
if isinstance(names, dict):
|
|
@@ -37,36 +41,20 @@ def check_class_names(names):
|
|
|
37
41
|
|
|
38
42
|
|
|
39
43
|
class AutoBackend(nn.Module):
|
|
44
|
+
"""
|
|
45
|
+
Handles dynamic backend selection for running inference using Ultralytics YOLO models.
|
|
40
46
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
weights='yolov8n.pt',
|
|
44
|
-
device=torch.device('cpu'),
|
|
45
|
-
dnn=False,
|
|
46
|
-
data=None,
|
|
47
|
-
fp16=False,
|
|
48
|
-
fuse=True,
|
|
49
|
-
verbose=True):
|
|
50
|
-
"""
|
|
51
|
-
MultiBackend class for python inference on various platforms using Ultralytics YOLO.
|
|
47
|
+
The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
|
|
48
|
+
range of formats, each with specific naming conventions as outlined below:
|
|
52
49
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
device (torch.device): The device to run the model on.
|
|
56
|
-
dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
|
|
57
|
-
data (str | Path | optional): Additional data.yaml file for class names.
|
|
58
|
-
fp16 (bool): If True, use half precision. Default: False
|
|
59
|
-
fuse (bool): Whether to fuse the model or not. Default: True
|
|
60
|
-
verbose (bool): Whether to run in verbose mode or not. Default: True
|
|
61
|
-
|
|
62
|
-
Supported formats and their naming conventions:
|
|
63
|
-
| Format | Suffix |
|
|
50
|
+
Supported Formats and Naming Conventions:
|
|
51
|
+
| Format | File Suffix |
|
|
64
52
|
|-----------------------|------------------|
|
|
65
53
|
| PyTorch | *.pt |
|
|
66
54
|
| TorchScript | *.torchscript |
|
|
67
55
|
| ONNX Runtime | *.onnx |
|
|
68
|
-
| ONNX OpenCV DNN | *.onnx dnn=True
|
|
69
|
-
| OpenVINO |
|
|
56
|
+
| ONNX OpenCV DNN | *.onnx (dnn=True)|
|
|
57
|
+
| OpenVINO | *openvino_model/ |
|
|
70
58
|
| CoreML | *.mlpackage |
|
|
71
59
|
| TensorRT | *.engine |
|
|
72
60
|
| TensorFlow SavedModel | *_saved_model |
|
|
@@ -75,6 +63,31 @@ class AutoBackend(nn.Module):
|
|
|
75
63
|
| TensorFlow Edge TPU | *_edgetpu.tflite |
|
|
76
64
|
| PaddlePaddle | *_paddle_model |
|
|
77
65
|
| ncnn | *_ncnn_model |
|
|
66
|
+
|
|
67
|
+
This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
|
|
68
|
+
models across various platforms.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
@torch.no_grad()
|
|
72
|
+
def __init__(self,
|
|
73
|
+
weights='yolov8n.pt',
|
|
74
|
+
device=torch.device('cpu'),
|
|
75
|
+
dnn=False,
|
|
76
|
+
data=None,
|
|
77
|
+
fp16=False,
|
|
78
|
+
fuse=True,
|
|
79
|
+
verbose=True):
|
|
80
|
+
"""
|
|
81
|
+
Initialize the AutoBackend for inference.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
|
|
85
|
+
device (torch.device): Device to run the model on. Defaults to CPU.
|
|
86
|
+
dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
|
|
87
|
+
data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
|
|
88
|
+
fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
|
|
89
|
+
fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
|
|
90
|
+
verbose (bool): Enable verbose logging. Defaults to True.
|
|
78
91
|
"""
|
|
79
92
|
super().__init__()
|
|
80
93
|
w = str(weights[0] if isinstance(weights, list) else weights)
|
|
@@ -440,14 +453,14 @@ class AutoBackend(nn.Module):
|
|
|
440
453
|
|
|
441
454
|
def from_numpy(self, x):
|
|
442
455
|
"""
|
|
443
|
-
|
|
456
|
+
Convert a numpy array to a tensor.
|
|
444
457
|
|
|
445
|
-
|
|
446
|
-
|
|
458
|
+
Args:
|
|
459
|
+
x (np.ndarray): The array to be converted.
|
|
447
460
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
461
|
+
Returns:
|
|
462
|
+
(torch.Tensor): The converted tensor
|
|
463
|
+
"""
|
|
451
464
|
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
|
|
452
465
|
|
|
453
466
|
def warmup(self, imgsz=(1, 3, 640, 640)):
|
|
@@ -476,7 +489,7 @@ class AutoBackend(nn.Module):
|
|
|
476
489
|
@staticmethod
|
|
477
490
|
def _model_type(p='path/to/model.pt'):
|
|
478
491
|
"""
|
|
479
|
-
This function takes a path to a model file and returns the model type
|
|
492
|
+
This function takes a path to a model file and returns the model type.
|
|
480
493
|
|
|
481
494
|
Args:
|
|
482
495
|
p: path to the model file. Defaults to path/to/model.pt
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
2
|
"""
|
|
3
|
-
Ultralytics modules.
|
|
3
|
+
Ultralytics modules.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
Example:
|
|
6
|
+
Visualize a module with Netron.
|
|
7
|
+
```python
|
|
8
|
+
from ultralytics.nn.modules import *
|
|
9
|
+
import torch
|
|
10
|
+
import os
|
|
8
11
|
|
|
9
|
-
x = torch.ones(1, 128, 40, 40)
|
|
10
|
-
m = Conv(128, 128)
|
|
11
|
-
f = f'{m._get_name()}.onnx'
|
|
12
|
-
torch.onnx.export(m, x, f)
|
|
13
|
-
os.system(f'onnxsim {f} {f} && open {f}')
|
|
12
|
+
x = torch.ones(1, 128, 40, 40)
|
|
13
|
+
m = Conv(128, 128)
|
|
14
|
+
f = f'{m._get_name()}.onnx'
|
|
15
|
+
torch.onnx.export(m, x, f)
|
|
16
|
+
os.system(f'onnxsim {f} {f} && open {f}')
|
|
17
|
+
```
|
|
14
18
|
"""
|
|
15
19
|
|
|
16
20
|
from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
-
"""
|
|
3
|
-
Block modules
|
|
4
|
-
"""
|
|
2
|
+
"""Block modules."""
|
|
5
3
|
|
|
6
4
|
import torch
|
|
7
5
|
import torch.nn as nn
|
|
@@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
|
|
|
17
15
|
class DFL(nn.Module):
|
|
18
16
|
"""
|
|
19
17
|
Integral module of Distribution Focal Loss (DFL).
|
|
18
|
+
|
|
20
19
|
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
|
|
21
20
|
"""
|
|
22
21
|
|
|
@@ -51,11 +50,14 @@ class Proto(nn.Module):
|
|
|
51
50
|
|
|
52
51
|
|
|
53
52
|
class HGStem(nn.Module):
|
|
54
|
-
"""
|
|
53
|
+
"""
|
|
54
|
+
StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
|
|
55
|
+
|
|
55
56
|
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
|
56
57
|
"""
|
|
57
58
|
|
|
58
59
|
def __init__(self, c1, cm, c2):
|
|
60
|
+
"""Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
|
|
59
61
|
super().__init__()
|
|
60
62
|
self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
|
|
61
63
|
self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
|
|
@@ -79,11 +81,14 @@ class HGStem(nn.Module):
|
|
|
79
81
|
|
|
80
82
|
|
|
81
83
|
class HGBlock(nn.Module):
|
|
82
|
-
"""
|
|
84
|
+
"""
|
|
85
|
+
HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
|
|
86
|
+
|
|
83
87
|
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
|
84
88
|
"""
|
|
85
89
|
|
|
86
90
|
def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
|
|
91
|
+
"""Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
|
|
87
92
|
super().__init__()
|
|
88
93
|
block = LightConv if lightconv else Conv
|
|
89
94
|
self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
|
|
@@ -218,6 +223,7 @@ class RepC3(nn.Module):
|
|
|
218
223
|
"""Rep C3."""
|
|
219
224
|
|
|
220
225
|
def __init__(self, c1, c2, n=3, e=1.0):
|
|
226
|
+
"""Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
|
|
221
227
|
super().__init__()
|
|
222
228
|
c_ = int(c2 * e) # hidden channels
|
|
223
229
|
self.cv1 = Conv(c1, c2, 1, 1)
|
ultralytics/nn/modules/conv.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
-
"""
|
|
3
|
-
Convolution modules
|
|
4
|
-
"""
|
|
2
|
+
"""Convolution modules."""
|
|
5
3
|
|
|
6
4
|
import math
|
|
7
5
|
|
|
@@ -69,7 +67,9 @@ class Conv2(Conv):
|
|
|
69
67
|
|
|
70
68
|
|
|
71
69
|
class LightConv(nn.Module):
|
|
72
|
-
"""
|
|
70
|
+
"""
|
|
71
|
+
Light convolution with args(ch_in, ch_out, kernel).
|
|
72
|
+
|
|
73
73
|
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
|
74
74
|
"""
|
|
75
75
|
|
|
@@ -148,12 +148,15 @@ class GhostConv(nn.Module):
|
|
|
148
148
|
|
|
149
149
|
class RepConv(nn.Module):
|
|
150
150
|
"""
|
|
151
|
-
RepConv is a basic rep-style block, including training and deploy status.
|
|
151
|
+
RepConv is a basic rep-style block, including training and deploy status.
|
|
152
|
+
|
|
153
|
+
This module is used in RT-DETR.
|
|
152
154
|
Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
|
|
153
155
|
"""
|
|
154
156
|
default_act = nn.SiLU() # default activation
|
|
155
157
|
|
|
156
158
|
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
|
|
159
|
+
"""Initializes Light Convolution layer with inputs, outputs & optional activation function."""
|
|
157
160
|
super().__init__()
|
|
158
161
|
assert k == 3 and p == 1
|
|
159
162
|
self.g = g
|
|
@@ -166,27 +169,30 @@ class RepConv(nn.Module):
|
|
|
166
169
|
self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
|
|
167
170
|
|
|
168
171
|
def forward_fuse(self, x):
|
|
169
|
-
"""Forward process"""
|
|
172
|
+
"""Forward process."""
|
|
170
173
|
return self.act(self.conv(x))
|
|
171
174
|
|
|
172
175
|
def forward(self, x):
|
|
173
|
-
"""Forward process"""
|
|
176
|
+
"""Forward process."""
|
|
174
177
|
id_out = 0 if self.bn is None else self.bn(x)
|
|
175
178
|
return self.act(self.conv1(x) + self.conv2(x) + id_out)
|
|
176
179
|
|
|
177
180
|
def get_equivalent_kernel_bias(self):
|
|
181
|
+
"""Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
|
|
178
182
|
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
|
|
179
183
|
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
|
|
180
184
|
kernelid, biasid = self._fuse_bn_tensor(self.bn)
|
|
181
185
|
return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
|
|
182
186
|
|
|
183
187
|
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
|
|
188
|
+
"""Pads a 1x1 tensor to a 3x3 tensor."""
|
|
184
189
|
if kernel1x1 is None:
|
|
185
190
|
return 0
|
|
186
191
|
else:
|
|
187
192
|
return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
|
|
188
193
|
|
|
189
194
|
def _fuse_bn_tensor(self, branch):
|
|
195
|
+
"""Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
|
|
190
196
|
if branch is None:
|
|
191
197
|
return 0, 0
|
|
192
198
|
if isinstance(branch, Conv):
|
|
@@ -214,6 +220,7 @@ class RepConv(nn.Module):
|
|
|
214
220
|
return kernel * t, beta - running_mean * gamma / std
|
|
215
221
|
|
|
216
222
|
def fuse_convs(self):
|
|
223
|
+
"""Combines two convolution layers into a single layer and removes unused attributes from the class."""
|
|
217
224
|
if hasattr(self, 'conv'):
|
|
218
225
|
return
|
|
219
226
|
kernel, bias = self.get_equivalent_kernel_bias()
|
|
@@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
|
|
|
243
250
|
"""Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
|
|
244
251
|
|
|
245
252
|
def __init__(self, channels: int) -> None:
|
|
253
|
+
"""Initializes the class and sets the basic configurations and instance variables required."""
|
|
246
254
|
super().__init__()
|
|
247
255
|
self.pool = nn.AdaptiveAvgPool2d(1)
|
|
248
256
|
self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
|
|
249
257
|
self.act = nn.Sigmoid()
|
|
250
258
|
|
|
251
259
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
260
|
+
"""Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
|
|
252
261
|
return x * self.act(self.fc(self.pool(x)))
|
|
253
262
|
|
|
254
263
|
|
ultralytics/nn/modules/head.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
-
"""
|
|
3
|
-
Model head modules
|
|
4
|
-
"""
|
|
2
|
+
"""Model head modules."""
|
|
5
3
|
|
|
6
4
|
import math
|
|
7
5
|
|
|
@@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
229
227
|
self._reset_parameters()
|
|
230
228
|
|
|
231
229
|
def forward(self, x, batch=None):
|
|
230
|
+
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
|
|
232
231
|
from ultralytics.models.utils.ops import get_cdn_group
|
|
233
232
|
|
|
234
233
|
# input projection and embedding
|
|
@@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
265
264
|
return y if self.export else (y, x)
|
|
266
265
|
|
|
267
266
|
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
|
|
267
|
+
"""Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
|
|
268
268
|
anchors = []
|
|
269
269
|
for i, (h, w) in enumerate(shapes):
|
|
270
270
|
sy = torch.arange(end=h, dtype=dtype, device=device)
|
|
@@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
284
284
|
return anchors, valid_mask
|
|
285
285
|
|
|
286
286
|
def _get_encoder_input(self, x):
|
|
287
|
+
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
|
|
287
288
|
# get projection features
|
|
288
289
|
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
|
|
289
290
|
# get encoder inputs
|
|
@@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
301
302
|
return feats, shapes
|
|
302
303
|
|
|
303
304
|
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
|
|
305
|
+
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
|
|
304
306
|
bs = len(feats)
|
|
305
307
|
# prepare input for decoder
|
|
306
308
|
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
|
|
@@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
339
341
|
|
|
340
342
|
# TODO
|
|
341
343
|
def _reset_parameters(self):
|
|
344
|
+
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
|
|
342
345
|
# class and bbox head init
|
|
343
346
|
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
|
|
344
347
|
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
|