ultralytics 8.3.197__py3-none-any.whl → 8.3.198__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_engine.py +9 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +0 -1
- ultralytics/cfg/default.yaml +96 -94
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/augment.py +1 -1
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +35 -35
- ultralytics/engine/predictor.py +1 -2
- ultralytics/engine/results.py +1 -1
- ultralytics/engine/trainer.py +5 -5
- ultralytics/engine/tuner.py +54 -32
- ultralytics/models/sam/modules/decoders.py +3 -3
- ultralytics/models/sam/modules/sam.py +5 -5
- ultralytics/models/sam/predict.py +11 -11
- ultralytics/models/yolo/classify/train.py +2 -7
- ultralytics/models/yolo/classify/val.py +2 -2
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -6
- ultralytics/models/yolo/detect/val.py +4 -4
- ultralytics/models/yolo/obb/val.py +3 -3
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +0 -6
- ultralytics/models/yolo/pose/val.py +2 -2
- ultralytics/models/yolo/segment/predict.py +2 -2
- ultralytics/models/yolo/segment/train.py +0 -5
- ultralytics/models/yolo/segment/val.py +9 -7
- ultralytics/models/yolo/yoloe/val.py +1 -1
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/tasks.py +2 -2
- ultralytics/utils/checks.py +1 -1
- ultralytics/utils/metrics.py +6 -6
- ultralytics/utils/nms.py +5 -13
- ultralytics/utils/plotting.py +22 -36
- ultralytics/utils/torch_utils.py +9 -5
- {ultralytics-8.3.197.dist-info → ultralytics-8.3.198.dist-info}/METADATA +1 -1
- {ultralytics-8.3.197.dist-info → ultralytics-8.3.198.dist-info}/RECORD +42 -42
- {ultralytics-8.3.197.dist-info → ultralytics-8.3.198.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.197.dist-info → ultralytics-8.3.198.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.197.dist-info → ultralytics-8.3.198.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.197.dist-info → ultralytics-8.3.198.dist-info}/top_level.txt +0 -0
ultralytics/engine/tuner.py
CHANGED
@@ -16,6 +16,7 @@ Examples:
|
|
16
16
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
+
import gc
|
19
20
|
import random
|
20
21
|
import shutil
|
21
22
|
import subprocess
|
@@ -23,6 +24,7 @@ import time
|
|
23
24
|
from datetime import datetime
|
24
25
|
|
25
26
|
import numpy as np
|
27
|
+
import torch
|
26
28
|
|
27
29
|
from ultralytics.cfg import get_cfg, get_save_dir
|
28
30
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
|
@@ -97,7 +99,7 @@ class Tuner:
|
|
97
99
|
"warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
|
98
100
|
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
|
99
101
|
"box": (1.0, 20.0), # box loss gain
|
100
|
-
"cls": (0.
|
102
|
+
"cls": (0.1, 4.0), # cls loss gain (scale with pixels)
|
101
103
|
"dfl": (0.4, 6.0), # dfl loss gain
|
102
104
|
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
103
105
|
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
@@ -114,6 +116,7 @@ class Tuner:
|
|
114
116
|
"mixup": (0.0, 1.0), # image mixup (probability)
|
115
117
|
"cutmix": (0.0, 1.0), # image cutmix (probability)
|
116
118
|
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
|
119
|
+
"close_mosaic": (0.0, 10.0), # close dataloader mosaic (epochs)
|
117
120
|
}
|
118
121
|
mongodb_uri = args.pop("mongodb_uri", None)
|
119
122
|
mongodb_db = args.pop("mongodb_db", "ultralytics")
|
@@ -266,19 +269,31 @@ class Tuner:
|
|
266
269
|
except Exception as e:
|
267
270
|
LOGGER.warning(f"{self.prefix}MongoDB to CSV sync failed: {e}")
|
268
271
|
|
272
|
+
def _crossover(self, x: np.ndarray, alpha: float = 0.2, k: int = 9) -> np.ndarray:
|
273
|
+
"""BLX-α crossover from up to top-k parents (x[:,0]=fitness, rest=genes)."""
|
274
|
+
k = min(k, len(x))
|
275
|
+
# fitness weights (shifted to >0); fallback to uniform if degenerate
|
276
|
+
weights = x[:, 0] - x[:, 0].min() + 1e-6
|
277
|
+
if not np.isfinite(weights).all() or weights.sum() == 0:
|
278
|
+
weights = np.ones_like(weights)
|
279
|
+
idxs = random.choices(range(len(x)), weights=weights, k=k)
|
280
|
+
parents_mat = np.stack([x[i][1:] for i in idxs], 0) # (k, ng) strip fitness
|
281
|
+
lo, hi = parents_mat.min(0), parents_mat.max(0)
|
282
|
+
span = hi - lo
|
283
|
+
return np.random.uniform(lo - alpha * span, hi + alpha * span)
|
284
|
+
|
269
285
|
def _mutate(
|
270
286
|
self,
|
271
|
-
|
272
|
-
|
273
|
-
mutation: float = 0.8,
|
287
|
+
n: int = 9,
|
288
|
+
mutation: float = 0.5,
|
274
289
|
sigma: float = 0.2,
|
275
290
|
) -> dict[str, float]:
|
276
291
|
"""
|
277
292
|
Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
|
278
293
|
|
279
294
|
Args:
|
280
|
-
parent (str): Parent selection method
|
281
|
-
n (int): Number of parents to consider.
|
295
|
+
parent (str): Parent selection method (kept for API compatibility, unused in BLX mode).
|
296
|
+
n (int): Number of top parents to consider.
|
282
297
|
mutation (float): Probability of a parameter mutation in any given iteration.
|
283
298
|
sigma (float): Standard deviation for Gaussian random number generator.
|
284
299
|
|
@@ -293,41 +308,40 @@ class Tuner:
|
|
293
308
|
if results:
|
294
309
|
# MongoDB already sorted by fitness DESC, so results[0] is best
|
295
310
|
x = np.array([[r["fitness"]] + [r["hyperparameters"][k] for k in self.space.keys()] for r in results])
|
296
|
-
n = min(n, len(x))
|
297
311
|
|
298
312
|
# Fall back to CSV if MongoDB unavailable or empty
|
299
313
|
if x is None and self.tune_csv.exists():
|
300
314
|
csv_data = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
301
315
|
if len(csv_data) > 0:
|
302
316
|
fitness = csv_data[:, 0] # first column
|
303
|
-
|
304
|
-
x = csv_data[
|
317
|
+
order = np.argsort(-fitness)
|
318
|
+
x = csv_data[order][:n] # top-n sorted by fitness DESC
|
305
319
|
|
306
320
|
# Mutate if we have data, otherwise use defaults
|
307
321
|
if x is not None:
|
308
|
-
|
309
|
-
if parent == "single" or len(x) <= 1:
|
310
|
-
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
|
311
|
-
elif parent == "weighted":
|
312
|
-
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
313
|
-
|
314
|
-
# Mutate
|
315
|
-
r = np.random
|
316
|
-
r.seed(int(time.time()))
|
317
|
-
g = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1
|
322
|
+
np.random.seed(int(time.time()))
|
318
323
|
ng = len(self.space)
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
324
|
+
|
325
|
+
# Crossover
|
326
|
+
genes = self._crossover(x)
|
327
|
+
|
328
|
+
# Mutation
|
329
|
+
gains = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1
|
330
|
+
factors = np.ones(ng)
|
331
|
+
while np.all(factors == 1): # mutate until a change occurs (prevent duplicates)
|
332
|
+
mask = np.random.random(ng) < mutation
|
333
|
+
step = np.random.randn(ng) * (sigma * gains)
|
334
|
+
factors = np.where(mask, np.exp(step), 1.0).clip(0.25, 4.0)
|
335
|
+
hyp = {k: float(genes[i] * factors[i]) for i, k in enumerate(self.space.keys())}
|
323
336
|
else:
|
324
337
|
hyp = {k: getattr(self.args, k) for k in self.space.keys()}
|
325
338
|
|
326
339
|
# Constrain to limits
|
327
340
|
for k, bounds in self.space.items():
|
328
|
-
hyp[k] = max(hyp[k], bounds[0])
|
329
|
-
|
330
|
-
|
341
|
+
hyp[k] = round(min(max(hyp[k], bounds[0]), bounds[1]), 5)
|
342
|
+
|
343
|
+
# Update types
|
344
|
+
hyp["close_mosaic"] = int(round(hyp["close_mosaic"]))
|
331
345
|
|
332
346
|
return hyp
|
333
347
|
|
@@ -361,8 +375,12 @@ class Tuner:
|
|
361
375
|
start = x.shape[0]
|
362
376
|
LOGGER.info(f"{self.prefix}Resuming tuning run {self.tune_dir} from iteration {start + 1}...")
|
363
377
|
for i in range(start, iterations):
|
378
|
+
# Linearly decay sigma from 0.2 → 0.1 over first 300 iterations
|
379
|
+
frac = min(i / 300.0, 1.0)
|
380
|
+
sigma_i = 0.2 - 0.1 * frac
|
381
|
+
|
364
382
|
# Mutate hyperparameters
|
365
|
-
mutated_hyp = self._mutate()
|
383
|
+
mutated_hyp = self._mutate(sigma=sigma_i)
|
366
384
|
LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}")
|
367
385
|
|
368
386
|
metrics = {}
|
@@ -378,6 +396,11 @@ class Tuner:
|
|
378
396
|
metrics = torch_load(ckpt_file)["train_metrics"]
|
379
397
|
assert return_code == 0, "training failed"
|
380
398
|
|
399
|
+
# Cleanup
|
400
|
+
time.sleep(1)
|
401
|
+
gc.collect()
|
402
|
+
torch.cuda.empty_cache()
|
403
|
+
|
381
404
|
except Exception as e:
|
382
405
|
LOGGER.error(f"training failure for hyperparameter tuning iteration {i + 1}\n{e}")
|
383
406
|
|
@@ -403,14 +426,14 @@ class Tuner:
|
|
403
426
|
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
404
427
|
fitness = x[:, 0] # first column
|
405
428
|
best_idx = fitness.argmax()
|
406
|
-
best_is_current = best_idx == i
|
429
|
+
best_is_current = best_idx == (i - start)
|
407
430
|
if best_is_current:
|
408
|
-
best_save_dir = save_dir
|
431
|
+
best_save_dir = str(save_dir)
|
409
432
|
best_metrics = {k: round(v, 5) for k, v in metrics.items()}
|
410
433
|
for ckpt in weights_dir.glob("*.pt"):
|
411
434
|
shutil.copy2(ckpt, self.tune_dir / "weights")
|
412
435
|
elif cleanup:
|
413
|
-
shutil.rmtree(
|
436
|
+
shutil.rmtree(best_save_dir, ignore_errors=True) # remove iteration dirs to reduce storage space
|
414
437
|
|
415
438
|
# Plot tune results
|
416
439
|
plot_tune_results(str(self.tune_csv))
|
@@ -421,8 +444,7 @@ class Tuner:
|
|
421
444
|
f"{self.prefix}Results saved to {colorstr('bold', self.tune_dir)}\n"
|
422
445
|
f"{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n"
|
423
446
|
f"{self.prefix}Best fitness metrics are {best_metrics}\n"
|
424
|
-
f"{self.prefix}Best fitness model is {best_save_dir}
|
425
|
-
f"{self.prefix}Best fitness hyperparameters are printed below.\n"
|
447
|
+
f"{self.prefix}Best fitness model is {best_save_dir}"
|
426
448
|
)
|
427
449
|
LOGGER.info("\n" + header)
|
428
450
|
data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
|
@@ -402,7 +402,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
402
402
|
s = 1
|
403
403
|
else:
|
404
404
|
output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
|
405
|
-
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.
|
405
|
+
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.shape[0], -1, -1)
|
406
406
|
tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
|
407
407
|
|
408
408
|
# Expand per-image data in batch direction to be per-mask
|
@@ -412,7 +412,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
412
412
|
assert image_embeddings.shape[0] == tokens.shape[0]
|
413
413
|
src = image_embeddings
|
414
414
|
src = src + dense_prompt_embeddings
|
415
|
-
assert image_pe.
|
415
|
+
assert image_pe.shape[0] == 1, "image_pe should have size 1 in batch dim (from `get_dense_pe()`)"
|
416
416
|
pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
|
417
417
|
b, c, h, w = src.shape
|
418
418
|
|
@@ -487,7 +487,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
487
487
|
multimask_logits = all_mask_logits[:, 1:, :, :]
|
488
488
|
multimask_iou_scores = all_iou_scores[:, 1:]
|
489
489
|
best_scores_inds = torch.argmax(multimask_iou_scores, dim=-1)
|
490
|
-
batch_inds = torch.arange(multimask_iou_scores.
|
490
|
+
batch_inds = torch.arange(multimask_iou_scores.shape[0], device=all_iou_scores.device)
|
491
491
|
best_multimask_logits = multimask_logits[batch_inds, best_scores_inds]
|
492
492
|
best_multimask_logits = best_multimask_logits.unsqueeze(1)
|
493
493
|
best_multimask_iou_scores = multimask_iou_scores[batch_inds, best_scores_inds]
|
@@ -472,7 +472,7 @@ class SAM2Model(torch.nn.Module):
|
|
472
472
|
... object_score_logits,
|
473
473
|
... ) = results
|
474
474
|
"""
|
475
|
-
B = backbone_features.
|
475
|
+
B = backbone_features.shape[0]
|
476
476
|
device = backbone_features.device
|
477
477
|
assert backbone_features.size(1) == self.sam_prompt_embed_dim
|
478
478
|
assert backbone_features.size(2) == self.sam_image_embedding_size
|
@@ -482,7 +482,7 @@ class SAM2Model(torch.nn.Module):
|
|
482
482
|
if point_inputs is not None:
|
483
483
|
sam_point_coords = point_inputs["point_coords"]
|
484
484
|
sam_point_labels = point_inputs["point_labels"]
|
485
|
-
assert sam_point_coords.
|
485
|
+
assert sam_point_coords.shape[0] == B and sam_point_labels.shape[0] == B
|
486
486
|
else:
|
487
487
|
# If no points are provide, pad with an empty point (with label -1)
|
488
488
|
sam_point_coords = torch.zeros(B, 1, 2, device=device, dtype=backbone_features.dtype)
|
@@ -585,10 +585,10 @@ class SAM2Model(torch.nn.Module):
|
|
585
585
|
antialias=True, # use antialias for downsampling
|
586
586
|
)
|
587
587
|
# a dummy IoU prediction of all 1's under mask input
|
588
|
-
ious = mask_inputs.new_ones(mask_inputs.
|
588
|
+
ious = mask_inputs.new_ones(mask_inputs.shape[0], 1).float()
|
589
589
|
if not self.use_obj_ptrs_in_encoder or backbone_features is None or high_res_features is None:
|
590
590
|
# all zeros as a dummy object pointer (of shape [B, C])
|
591
|
-
obj_ptr = torch.zeros(mask_inputs.
|
591
|
+
obj_ptr = torch.zeros(mask_inputs.shape[0], self.hidden_dim, device=mask_inputs.device)
|
592
592
|
else:
|
593
593
|
# produce an object pointer using the SAM decoder from the mask input
|
594
594
|
_, _, _, _, _, obj_ptr, _ = self._forward_sam_heads(
|
@@ -1006,7 +1006,7 @@ class SAM2Model(torch.nn.Module):
|
|
1006
1006
|
@staticmethod
|
1007
1007
|
def _apply_non_overlapping_constraints(pred_masks):
|
1008
1008
|
"""Apply non-overlapping constraints to masks, keeping the highest scoring object per location."""
|
1009
|
-
batch_size = pred_masks.
|
1009
|
+
batch_size = pred_masks.shape[0]
|
1010
1010
|
if batch_size == 1:
|
1011
1011
|
return pred_masks
|
1012
1012
|
|
@@ -423,7 +423,7 @@ class Predictor(BasePredictor):
|
|
423
423
|
pred_masks.append(crop_masks)
|
424
424
|
pred_bboxes.append(crop_bboxes)
|
425
425
|
pred_scores.append(crop_scores)
|
426
|
-
region_areas.append(area.expand(
|
426
|
+
region_areas.append(area.expand(crop_masks.shape[0]))
|
427
427
|
|
428
428
|
pred_masks = torch.cat(pred_masks)
|
429
429
|
pred_bboxes = torch.cat(pred_bboxes)
|
@@ -504,14 +504,14 @@ class Predictor(BasePredictor):
|
|
504
504
|
# (N, 1, H, W), (N, 1)
|
505
505
|
pred_masks, pred_scores = preds[:2]
|
506
506
|
pred_bboxes = preds[2] if self.segment_all else None
|
507
|
-
names = dict(enumerate(str(i) for i in range(
|
507
|
+
names = dict(enumerate(str(i) for i in range(pred_masks.shape[0])))
|
508
508
|
|
509
509
|
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
510
510
|
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
511
511
|
|
512
512
|
results = []
|
513
513
|
for masks, orig_img, img_path in zip([pred_masks], orig_imgs, self.batch[0]):
|
514
|
-
if
|
514
|
+
if masks.shape[0] == 0:
|
515
515
|
masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
516
516
|
else:
|
517
517
|
masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
|
@@ -521,7 +521,7 @@ class Predictor(BasePredictor):
|
|
521
521
|
else:
|
522
522
|
pred_bboxes = batched_mask_to_box(masks)
|
523
523
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
524
|
-
cls = torch.arange(
|
524
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
525
525
|
idx = pred_scores > self.args.conf
|
526
526
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)[idx]
|
527
527
|
masks = masks[idx]
|
@@ -633,7 +633,7 @@ class Predictor(BasePredictor):
|
|
633
633
|
"""
|
634
634
|
import torchvision # scope for faster 'import ultralytics'
|
635
635
|
|
636
|
-
if
|
636
|
+
if masks.shape[0] == 0:
|
637
637
|
return masks
|
638
638
|
|
639
639
|
# Filter small disconnected regions and holes
|
@@ -693,14 +693,14 @@ class Predictor(BasePredictor):
|
|
693
693
|
dst_shape = dst_shape or (self.args.imgsz, self.args.imgsz)
|
694
694
|
prompts = self._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
695
695
|
pred_masks, pred_scores = self._inference_features(features, *prompts, multimask_output)
|
696
|
-
if
|
696
|
+
if pred_masks.shape[0] == 0:
|
697
697
|
pred_masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
698
698
|
else:
|
699
699
|
pred_masks = ops.scale_masks(pred_masks[None].float(), src_shape, padding=False)[0]
|
700
700
|
pred_masks = pred_masks > self.model.mask_threshold # to bool
|
701
701
|
pred_bboxes = batched_mask_to_box(pred_masks)
|
702
702
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
703
|
-
cls = torch.arange(
|
703
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
704
704
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)
|
705
705
|
return pred_masks, pred_bboxes
|
706
706
|
|
@@ -770,7 +770,7 @@ class SAM2Predictor(Predictor):
|
|
770
770
|
bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
771
771
|
if bboxes is not None:
|
772
772
|
bboxes = bboxes.view(-1, 2, 2)
|
773
|
-
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(
|
773
|
+
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(bboxes.shape[0], -1)
|
774
774
|
# NOTE: merge "boxes" and "points" into a single "points" input
|
775
775
|
# (where boxes are added at the beginning) to model.sam_prompt_encoder
|
776
776
|
if points is not None:
|
@@ -1025,7 +1025,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1025
1025
|
pred_masks = current_out["pred_masks"].flatten(0, 1)
|
1026
1026
|
pred_masks = pred_masks[(pred_masks > self.model.mask_threshold).sum((1, 2)) > 0] # filter blank masks
|
1027
1027
|
|
1028
|
-
return pred_masks, torch.ones(
|
1028
|
+
return pred_masks, torch.ones(pred_masks.shape[0], dtype=pred_masks.dtype, device=pred_masks.device)
|
1029
1029
|
|
1030
1030
|
def postprocess(self, preds, img, orig_imgs):
|
1031
1031
|
"""
|
@@ -1465,7 +1465,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1465
1465
|
else:
|
1466
1466
|
maskmem_pos_enc = model_constants["maskmem_pos_enc"]
|
1467
1467
|
# expand the cached maskmem_pos_enc to the actual batch size
|
1468
|
-
batch_size = out_maskmem_pos_enc[0].
|
1468
|
+
batch_size = out_maskmem_pos_enc[0].shape[0]
|
1469
1469
|
if batch_size > 1:
|
1470
1470
|
out_maskmem_pos_enc = [x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc]
|
1471
1471
|
return out_maskmem_pos_enc
|
@@ -2028,7 +2028,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
2028
2028
|
point_inputs={"point_coords": point, "point_labels": label} if obj_idx is not None else None,
|
2029
2029
|
mask_inputs=mask,
|
2030
2030
|
multimask_output=False,
|
2031
|
-
high_res_features=[feat[: pix_feat_with_mem.
|
2031
|
+
high_res_features=[feat[: pix_feat_with_mem.shape[0]] for feat in self.high_res_features],
|
2032
2032
|
)
|
2033
2033
|
return {
|
2034
2034
|
"pred_masks": low_res_masks,
|
@@ -12,7 +12,7 @@ from ultralytics.engine.trainer import BaseTrainer
|
|
12
12
|
from ultralytics.models import yolo
|
13
13
|
from ultralytics.nn.tasks import ClassificationModel
|
14
14
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
15
|
-
from ultralytics.utils.plotting import plot_images
|
15
|
+
from ultralytics.utils.plotting import plot_images
|
16
16
|
from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first
|
17
17
|
|
18
18
|
|
@@ -39,7 +39,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
39
39
|
progress_string: Return a formatted string showing training progress.
|
40
40
|
get_validator: Return an instance of ClassificationValidator.
|
41
41
|
label_loss_items: Return a loss dict with labelled training loss items.
|
42
|
-
plot_metrics: Plot metrics from a CSV file.
|
43
42
|
final_eval: Evaluate trained model and save validation results.
|
44
43
|
plot_training_samples: Plot training samples with their annotations.
|
45
44
|
|
@@ -195,10 +194,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
195
194
|
loss_items = [round(float(loss_items), 5)]
|
196
195
|
return dict(zip(keys, loss_items))
|
197
196
|
|
198
|
-
def plot_metrics(self):
|
199
|
-
"""Plot metrics from a CSV file."""
|
200
|
-
plot_results(file=self.csv, classify=True, on_plot=self.on_plot) # save results.png
|
201
|
-
|
202
197
|
def final_eval(self):
|
203
198
|
"""Evaluate trained model and save validation results."""
|
204
199
|
for f in self.last, self.best:
|
@@ -220,7 +215,7 @@ class ClassificationTrainer(BaseTrainer):
|
|
220
215
|
batch (dict[str, torch.Tensor]): Batch containing images and class labels.
|
221
216
|
ni (int): Number of iterations.
|
222
217
|
"""
|
223
|
-
batch["batch_idx"] = torch.arange(
|
218
|
+
batch["batch_idx"] = torch.arange(batch["img"].shape[0]) # add batch index for plotting
|
224
219
|
plot_images(
|
225
220
|
labels=batch,
|
226
221
|
fname=self.save_dir / f"train_batch{ni}.jpg",
|
@@ -178,7 +178,7 @@ class ClassificationValidator(BaseValidator):
|
|
178
178
|
>>> batch = {"img": torch.rand(16, 3, 224, 224), "cls": torch.randint(0, 10, (16,))}
|
179
179
|
>>> validator.plot_val_samples(batch, 0)
|
180
180
|
"""
|
181
|
-
batch["batch_idx"] = torch.arange(
|
181
|
+
batch["batch_idx"] = torch.arange(batch["img"].shape[0]) # add batch index for plotting
|
182
182
|
plot_images(
|
183
183
|
labels=batch,
|
184
184
|
fname=self.save_dir / f"val_batch{ni}_labels.jpg",
|
@@ -203,7 +203,7 @@ class ClassificationValidator(BaseValidator):
|
|
203
203
|
"""
|
204
204
|
batched_preds = dict(
|
205
205
|
img=batch["img"],
|
206
|
-
batch_idx=torch.arange(
|
206
|
+
batch_idx=torch.arange(batch["img"].shape[0]),
|
207
207
|
cls=torch.argmax(preds, dim=1),
|
208
208
|
)
|
209
209
|
plot_images(
|
@@ -89,7 +89,7 @@ class DetectionPredictor(BasePredictor):
|
|
89
89
|
obj_feats = torch.cat(
|
90
90
|
[x.permute(0, 2, 3, 1).reshape(x.shape[0], -1, s, x.shape[1] // s).mean(dim=-1) for x in feat_maps], dim=1
|
91
91
|
) # mean reduce all vectors to same length
|
92
|
-
return [feats[idx] if
|
92
|
+
return [feats[idx] if idx.shape[0] else [] for feats, idx in zip(obj_feats, idxs)] # for each img in batch
|
93
93
|
|
94
94
|
def construct_results(self, preds, img, orig_imgs):
|
95
95
|
"""
|
@@ -17,7 +17,7 @@ from ultralytics.models import yolo
|
|
17
17
|
from ultralytics.nn.tasks import DetectionModel
|
18
18
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
19
19
|
from ultralytics.utils.patches import override_configs
|
20
|
-
from ultralytics.utils.plotting import plot_images, plot_labels
|
20
|
+
from ultralytics.utils.plotting import plot_images, plot_labels
|
21
21
|
from ultralytics.utils.torch_utils import torch_distributed_zero_first, unwrap_model
|
22
22
|
|
23
23
|
|
@@ -43,7 +43,6 @@ class DetectionTrainer(BaseTrainer):
|
|
43
43
|
label_loss_items: Return a loss dictionary with labeled training loss items.
|
44
44
|
progress_string: Return a formatted string of training progress.
|
45
45
|
plot_training_samples: Plot training samples with their annotations.
|
46
|
-
plot_metrics: Plot metrics from a CSV file.
|
47
46
|
plot_training_labels: Create a labeled training plot of the YOLO model.
|
48
47
|
auto_batch: Calculate optimal batch size based on model memory requirements.
|
49
48
|
|
@@ -217,10 +216,6 @@ class DetectionTrainer(BaseTrainer):
|
|
217
216
|
on_plot=self.on_plot,
|
218
217
|
)
|
219
218
|
|
220
|
-
def plot_metrics(self):
|
221
|
-
"""Plot metrics from a CSV file."""
|
222
|
-
plot_results(file=self.csv, on_plot=self.on_plot) # save results.png
|
223
|
-
|
224
219
|
def plot_training_labels(self):
|
225
220
|
"""Create a labeled training plot of the YOLO model."""
|
226
221
|
boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0)
|
@@ -146,7 +146,7 @@ class DetectionValidator(BaseValidator):
|
|
146
146
|
ori_shape = batch["ori_shape"][si]
|
147
147
|
imgsz = batch["img"].shape[2:]
|
148
148
|
ratio_pad = batch["ratio_pad"][si]
|
149
|
-
if
|
149
|
+
if cls.shape[0]:
|
150
150
|
bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]] # target boxes
|
151
151
|
return {
|
152
152
|
"cls": cls,
|
@@ -185,7 +185,7 @@ class DetectionValidator(BaseValidator):
|
|
185
185
|
predn = self._prepare_pred(pred)
|
186
186
|
|
187
187
|
cls = pbatch["cls"].cpu().numpy()
|
188
|
-
no_pred =
|
188
|
+
no_pred = predn["cls"].shape[0] == 0
|
189
189
|
self.metrics.update_stats(
|
190
190
|
{
|
191
191
|
**self._process_batch(predn, pbatch),
|
@@ -268,8 +268,8 @@ class DetectionValidator(BaseValidator):
|
|
268
268
|
Returns:
|
269
269
|
(dict[str, np.ndarray]): Dictionary containing 'tp' key with correct prediction matrix of shape (N, 10) for 10 IoU levels.
|
270
270
|
"""
|
271
|
-
if
|
272
|
-
return {"tp": np.zeros((
|
271
|
+
if batch["cls"].shape[0] == 0 or preds["cls"].shape[0] == 0:
|
272
|
+
return {"tp": np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)}
|
273
273
|
iou = box_iou(batch["bboxes"], preds["bboxes"])
|
274
274
|
return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
|
275
275
|
|
@@ -93,8 +93,8 @@ class OBBValidator(DetectionValidator):
|
|
93
93
|
>>> gt_cls = torch.randint(0, 5, (50,)) # 50 ground truth class labels
|
94
94
|
>>> correct_matrix = validator._process_batch(detections, gt_bboxes, gt_cls)
|
95
95
|
"""
|
96
|
-
if
|
97
|
-
return {"tp": np.zeros((
|
96
|
+
if batch["cls"].shape[0] == 0 or preds["cls"].shape[0] == 0:
|
97
|
+
return {"tp": np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)}
|
98
98
|
iou = batch_probiou(batch["bboxes"], preds["bboxes"])
|
99
99
|
return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
|
100
100
|
|
@@ -134,7 +134,7 @@ class OBBValidator(DetectionValidator):
|
|
134
134
|
ori_shape = batch["ori_shape"][si]
|
135
135
|
imgsz = batch["img"].shape[2:]
|
136
136
|
ratio_pad = batch["ratio_pad"][si]
|
137
|
-
if
|
137
|
+
if cls.shape[0]:
|
138
138
|
bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]) # target boxes
|
139
139
|
return {
|
140
140
|
"cls": cls,
|
@@ -73,7 +73,7 @@ class PosePredictor(DetectionPredictor):
|
|
73
73
|
"""
|
74
74
|
result = super().construct_result(pred, img, orig_img, img_path)
|
75
75
|
# Extract keypoints from prediction and reshape according to model's keypoint shape
|
76
|
-
pred_kpts = pred[:, 6:].view(
|
76
|
+
pred_kpts = pred[:, 6:].view(pred.shape[0], *self.model.kpt_shape)
|
77
77
|
# Scale keypoints coordinates to match the original image dimensions
|
78
78
|
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
|
79
79
|
result.update(keypoints=pred_kpts)
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
from ultralytics.models import yolo
|
10
10
|
from ultralytics.nn.tasks import PoseModel
|
11
11
|
from ultralytics.utils import DEFAULT_CFG, LOGGER
|
12
|
-
from ultralytics.utils.plotting import plot_results
|
13
12
|
|
14
13
|
|
15
14
|
class PoseTrainer(yolo.detect.DetectionTrainer):
|
@@ -30,7 +29,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
30
29
|
set_model_attributes: Set keypoints shape attribute on the model.
|
31
30
|
get_validator: Create a validator instance for model evaluation.
|
32
31
|
plot_training_samples: Visualize training samples with keypoints.
|
33
|
-
plot_metrics: Generate and save training/validation metric plots.
|
34
32
|
get_dataset: Retrieve the dataset and ensure it contains required kpt_shape key.
|
35
33
|
|
36
34
|
Examples:
|
@@ -101,10 +99,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
101
99
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
102
100
|
)
|
103
101
|
|
104
|
-
def plot_metrics(self):
|
105
|
-
"""Plot training/validation metrics."""
|
106
|
-
plot_results(file=self.csv, pose=True, on_plot=self.on_plot) # save results.png
|
107
|
-
|
108
102
|
def get_dataset(self) -> dict[str, Any]:
|
109
103
|
"""
|
110
104
|
Retrieve the dataset and ensure it contains the required `kpt_shape` key.
|
@@ -192,8 +192,8 @@ class PoseValidator(DetectionValidator):
|
|
192
192
|
"""
|
193
193
|
tp = super()._process_batch(preds, batch)
|
194
194
|
gt_cls = batch["cls"]
|
195
|
-
if
|
196
|
-
tp_p = np.zeros((
|
195
|
+
if gt_cls.shape[0] == 0 or preds["cls"].shape[0] == 0:
|
196
|
+
tp_p = np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)
|
197
197
|
else:
|
198
198
|
# `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
|
199
199
|
area = ops.xyxy2xywh(batch["bboxes"])[:, 2:].prod(1) * 0.53
|
@@ -90,7 +90,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
90
90
|
Construct a single result object from the prediction.
|
91
91
|
|
92
92
|
Args:
|
93
|
-
pred (
|
93
|
+
pred (torch.Tensor): The predicted bounding boxes, scores, and masks.
|
94
94
|
img (torch.Tensor): The image after preprocessing.
|
95
95
|
orig_img (np.ndarray): The original image before preprocessing.
|
96
96
|
img_path (str): The path to the original image.
|
@@ -99,7 +99,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
99
99
|
Returns:
|
100
100
|
(Results): Result object containing the original image, image path, class names, bounding boxes, and masks.
|
101
101
|
"""
|
102
|
-
if
|
102
|
+
if pred.shape[0] == 0: # save empty boxes
|
103
103
|
masks = None
|
104
104
|
elif self.args.retina_masks:
|
105
105
|
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
@@ -8,7 +8,6 @@ from pathlib import Path
|
|
8
8
|
from ultralytics.models import yolo
|
9
9
|
from ultralytics.nn.tasks import SegmentationModel
|
10
10
|
from ultralytics.utils import DEFAULT_CFG, RANK
|
11
|
-
from ultralytics.utils.plotting import plot_results
|
12
11
|
|
13
12
|
|
14
13
|
class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
@@ -71,7 +70,3 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
71
70
|
return yolo.segment.SegmentationValidator(
|
72
71
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
73
72
|
)
|
74
|
-
|
75
|
-
def plot_metrics(self):
|
76
|
-
"""Plot training/validation metrics."""
|
77
|
-
plot_results(file=self.csv, segment=True, on_plot=self.on_plot) # save results.png
|
@@ -112,7 +112,7 @@ class SegmentationValidator(DetectionValidator):
|
|
112
112
|
coefficient = pred.pop("extra")
|
113
113
|
pred["masks"] = (
|
114
114
|
self.process(proto[i], coefficient, pred["bboxes"], shape=imgsz)
|
115
|
-
if
|
115
|
+
if coefficient.shape[0]
|
116
116
|
else torch.zeros(
|
117
117
|
(0, *(imgsz if self.process is ops.process_mask_native else proto.shape[2:])),
|
118
118
|
dtype=torch.uint8,
|
@@ -133,16 +133,18 @@ class SegmentationValidator(DetectionValidator):
|
|
133
133
|
(dict[str, Any]): Prepared batch with processed annotations.
|
134
134
|
"""
|
135
135
|
prepared_batch = super()._prepare_batch(si, batch)
|
136
|
-
nl =
|
136
|
+
nl = prepared_batch["cls"].shape[0]
|
137
137
|
if self.args.overlap_mask:
|
138
138
|
masks = batch["masks"][si]
|
139
139
|
index = torch.arange(1, nl + 1, device=masks.device).view(nl, 1, 1)
|
140
140
|
masks = (masks == index).float()
|
141
141
|
else:
|
142
142
|
masks = batch["masks"][batch["batch_idx"] == si]
|
143
|
-
if nl
|
144
|
-
|
145
|
-
masks
|
143
|
+
if nl:
|
144
|
+
mask_size = [s if self.process is ops.process_mask_native else s // 4 for s in prepared_batch["imgsz"]]
|
145
|
+
if masks.shape[1:] != mask_size:
|
146
|
+
masks = F.interpolate(masks[None], mask_size, mode="bilinear", align_corners=False)[0]
|
147
|
+
masks = masks.gt_(0.5)
|
146
148
|
prepared_batch["masks"] = masks
|
147
149
|
return prepared_batch
|
148
150
|
|
@@ -168,8 +170,8 @@ class SegmentationValidator(DetectionValidator):
|
|
168
170
|
"""
|
169
171
|
tp = super()._process_batch(preds, batch)
|
170
172
|
gt_cls = batch["cls"]
|
171
|
-
if
|
172
|
-
tp_m = np.zeros((
|
173
|
+
if gt_cls.shape[0] == 0 or preds["cls"].shape[0] == 0:
|
174
|
+
tp_m = np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)
|
173
175
|
else:
|
174
176
|
iou = mask_iou(batch["masks"].flatten(1), preds["masks"].flatten(1))
|
175
177
|
tp_m = self.match_predictions(preds["cls"], gt_cls, iou).cpu().numpy()
|
@@ -89,7 +89,7 @@ class YOLOEDetectValidator(DetectionValidator):
|
|
89
89
|
for i in range(preds.shape[0]):
|
90
90
|
cls = batch["cls"][batch_idx == i].squeeze(-1).to(torch.int).unique(sorted=True)
|
91
91
|
pad_cls = torch.ones(preds.shape[1], device=self.device) * -1
|
92
|
-
pad_cls[:
|
92
|
+
pad_cls[: cls.shape[0]] = cls
|
93
93
|
for c in cls:
|
94
94
|
visual_pe[c] += preds[i][pad_cls == c].sum(0) / cls_visual_num[c]
|
95
95
|
|
ultralytics/nn/modules/block.py
CHANGED
@@ -1921,7 +1921,7 @@ class A2C2f(nn.Module):
|
|
1921
1921
|
y.extend(m(y[-1]) for m in self.m)
|
1922
1922
|
y = self.cv2(torch.cat(y, 1))
|
1923
1923
|
if self.gamma is not None:
|
1924
|
-
return x + self.gamma.view(-1,
|
1924
|
+
return x + self.gamma.view(-1, self.gamma.shape[0], 1, 1) * y
|
1925
1925
|
return y
|
1926
1926
|
|
1927
1927
|
|