ultralytics 8.3.196__py3-none-any.whl → 8.3.198__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_engine.py +9 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +0 -1
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/default.yaml +96 -94
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/augment.py +1 -1
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +36 -35
- ultralytics/engine/model.py +1 -2
- ultralytics/engine/predictor.py +1 -2
- ultralytics/engine/results.py +1 -1
- ultralytics/engine/trainer.py +8 -10
- ultralytics/engine/tuner.py +54 -32
- ultralytics/models/sam/modules/decoders.py +3 -3
- ultralytics/models/sam/modules/sam.py +5 -5
- ultralytics/models/sam/predict.py +11 -11
- ultralytics/models/yolo/classify/train.py +2 -7
- ultralytics/models/yolo/classify/val.py +2 -2
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -11
- ultralytics/models/yolo/detect/val.py +4 -4
- ultralytics/models/yolo/obb/val.py +3 -3
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +0 -7
- ultralytics/models/yolo/pose/val.py +2 -2
- ultralytics/models/yolo/segment/predict.py +2 -2
- ultralytics/models/yolo/segment/train.py +0 -6
- ultralytics/models/yolo/segment/val.py +13 -11
- ultralytics/models/yolo/yoloe/val.py +1 -1
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/modules/head.py +1 -2
- ultralytics/nn/tasks.py +2 -2
- ultralytics/utils/checks.py +1 -1
- ultralytics/utils/loss.py +1 -2
- ultralytics/utils/metrics.py +6 -6
- ultralytics/utils/nms.py +8 -14
- ultralytics/utils/plotting.py +22 -36
- ultralytics/utils/torch_utils.py +9 -27
- {ultralytics-8.3.196.dist-info → ultralytics-8.3.198.dist-info}/METADATA +1 -1
- {ultralytics-8.3.196.dist-info → ultralytics-8.3.198.dist-info}/RECORD +46 -45
- {ultralytics-8.3.196.dist-info → ultralytics-8.3.198.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.196.dist-info → ultralytics-8.3.198.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.196.dist-info → ultralytics-8.3.198.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.196.dist-info → ultralytics-8.3.198.dist-info}/top_level.txt +0 -0
ultralytics/engine/predictor.py
CHANGED
@@ -409,8 +409,7 @@ class BasePredictor:
|
|
409
409
|
if hasattr(self.model, "imgsz") and not getattr(self.model, "dynamic", False):
|
410
410
|
self.args.imgsz = self.model.imgsz # reuse imgsz from export metadata
|
411
411
|
self.model.eval()
|
412
|
-
|
413
|
-
self.model = attempt_compile(self.model, device=self.device)
|
412
|
+
self.model = attempt_compile(self.model, device=self.device, mode=self.args.compile)
|
414
413
|
|
415
414
|
def write_results(self, i: int, p: Path, im: torch.Tensor, s: list[str]) -> str:
|
416
415
|
"""
|
ultralytics/engine/results.py
CHANGED
@@ -900,7 +900,7 @@ class Boxes(BaseTensor):
|
|
900
900
|
Args:
|
901
901
|
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
|
902
902
|
(num_boxes, 6) or (num_boxes, 7). Columns should contain
|
903
|
-
[x1, y1, x2, y2,
|
903
|
+
[x1, y1, x2, y2, (optional) track_id, confidence, class].
|
904
904
|
orig_shape (tuple[int, int]): The original image shape as (height, width). Used for normalization.
|
905
905
|
|
906
906
|
Attributes:
|
ultralytics/engine/trainer.py
CHANGED
@@ -42,6 +42,7 @@ from ultralytics.utils.autobatch import check_train_batch_size
|
|
42
42
|
from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
|
43
43
|
from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
|
44
44
|
from ultralytics.utils.files import get_latest_run
|
45
|
+
from ultralytics.utils.plotting import plot_results
|
45
46
|
from ultralytics.utils.torch_utils import (
|
46
47
|
TORCH_2_4,
|
47
48
|
EarlyStopping,
|
@@ -119,6 +120,7 @@ class BaseTrainer:
|
|
119
120
|
overrides (dict, optional): Configuration overrides.
|
120
121
|
_callbacks (list, optional): List of callback functions.
|
121
122
|
"""
|
123
|
+
self.hub_session = overrides.pop("session", None) # HUB
|
122
124
|
self.args = get_cfg(cfg, overrides)
|
123
125
|
self.check_resume(overrides)
|
124
126
|
self.device = select_device(self.args.device, self.args.batch)
|
@@ -170,9 +172,6 @@ class BaseTrainer:
|
|
170
172
|
self.csv = self.save_dir / "results.csv"
|
171
173
|
self.plot_idx = [0, 1, 2]
|
172
174
|
|
173
|
-
# HUB
|
174
|
-
self.hub_session = None
|
175
|
-
|
176
175
|
# Callbacks
|
177
176
|
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
178
177
|
if RANK in {-1, 0}:
|
@@ -263,8 +262,7 @@ class BaseTrainer:
|
|
263
262
|
self.model.criterion = self.model.init_criterion()
|
264
263
|
|
265
264
|
# Compile model
|
266
|
-
|
267
|
-
self.model = attempt_compile(self.model, device=self.device)
|
265
|
+
self.model = attempt_compile(self.model, device=self.device, mode=self.args.compile)
|
268
266
|
|
269
267
|
# Freeze layers
|
270
268
|
freeze_list = (
|
@@ -414,8 +412,9 @@ class BaseTrainer:
|
|
414
412
|
# Forward
|
415
413
|
with autocast(self.amp):
|
416
414
|
batch = self.preprocess_batch(batch)
|
417
|
-
|
418
|
-
|
415
|
+
# decouple inference and loss calculations for torch.compile convenience
|
416
|
+
preds = self.model(batch["img"])
|
417
|
+
loss, self.loss_items = unwrap_model(self.model).loss(batch, preds)
|
419
418
|
self.loss = loss.sum()
|
420
419
|
if RANK != -1:
|
421
420
|
self.loss *= world_size
|
@@ -456,7 +455,6 @@ class BaseTrainer:
|
|
456
455
|
)
|
457
456
|
self.run_callbacks("on_batch_end")
|
458
457
|
if self.args.plots and ni in self.plot_idx:
|
459
|
-
batch = {**batch, **metadata}
|
460
458
|
self.plot_training_samples(batch, ni)
|
461
459
|
|
462
460
|
self.run_callbacks("on_train_batch_end")
|
@@ -745,8 +743,8 @@ class BaseTrainer:
|
|
745
743
|
f.write(s + ("%.6g," * n % tuple([self.epoch + 1, t] + vals)).rstrip(",") + "\n")
|
746
744
|
|
747
745
|
def plot_metrics(self):
|
748
|
-
"""Plot
|
749
|
-
|
746
|
+
"""Plot metrics from a CSV file."""
|
747
|
+
plot_results(file=self.csv, on_plot=self.on_plot) # save results.png
|
750
748
|
|
751
749
|
def on_plot(self, name, data=None):
|
752
750
|
"""Register plots (e.g. to be consumed in callbacks)."""
|
ultralytics/engine/tuner.py
CHANGED
@@ -16,6 +16,7 @@ Examples:
|
|
16
16
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
+
import gc
|
19
20
|
import random
|
20
21
|
import shutil
|
21
22
|
import subprocess
|
@@ -23,6 +24,7 @@ import time
|
|
23
24
|
from datetime import datetime
|
24
25
|
|
25
26
|
import numpy as np
|
27
|
+
import torch
|
26
28
|
|
27
29
|
from ultralytics.cfg import get_cfg, get_save_dir
|
28
30
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
|
@@ -97,7 +99,7 @@ class Tuner:
|
|
97
99
|
"warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
|
98
100
|
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
|
99
101
|
"box": (1.0, 20.0), # box loss gain
|
100
|
-
"cls": (0.
|
102
|
+
"cls": (0.1, 4.0), # cls loss gain (scale with pixels)
|
101
103
|
"dfl": (0.4, 6.0), # dfl loss gain
|
102
104
|
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
103
105
|
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
@@ -114,6 +116,7 @@ class Tuner:
|
|
114
116
|
"mixup": (0.0, 1.0), # image mixup (probability)
|
115
117
|
"cutmix": (0.0, 1.0), # image cutmix (probability)
|
116
118
|
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
|
119
|
+
"close_mosaic": (0.0, 10.0), # close dataloader mosaic (epochs)
|
117
120
|
}
|
118
121
|
mongodb_uri = args.pop("mongodb_uri", None)
|
119
122
|
mongodb_db = args.pop("mongodb_db", "ultralytics")
|
@@ -266,19 +269,31 @@ class Tuner:
|
|
266
269
|
except Exception as e:
|
267
270
|
LOGGER.warning(f"{self.prefix}MongoDB to CSV sync failed: {e}")
|
268
271
|
|
272
|
+
def _crossover(self, x: np.ndarray, alpha: float = 0.2, k: int = 9) -> np.ndarray:
|
273
|
+
"""BLX-α crossover from up to top-k parents (x[:,0]=fitness, rest=genes)."""
|
274
|
+
k = min(k, len(x))
|
275
|
+
# fitness weights (shifted to >0); fallback to uniform if degenerate
|
276
|
+
weights = x[:, 0] - x[:, 0].min() + 1e-6
|
277
|
+
if not np.isfinite(weights).all() or weights.sum() == 0:
|
278
|
+
weights = np.ones_like(weights)
|
279
|
+
idxs = random.choices(range(len(x)), weights=weights, k=k)
|
280
|
+
parents_mat = np.stack([x[i][1:] for i in idxs], 0) # (k, ng) strip fitness
|
281
|
+
lo, hi = parents_mat.min(0), parents_mat.max(0)
|
282
|
+
span = hi - lo
|
283
|
+
return np.random.uniform(lo - alpha * span, hi + alpha * span)
|
284
|
+
|
269
285
|
def _mutate(
|
270
286
|
self,
|
271
|
-
|
272
|
-
|
273
|
-
mutation: float = 0.8,
|
287
|
+
n: int = 9,
|
288
|
+
mutation: float = 0.5,
|
274
289
|
sigma: float = 0.2,
|
275
290
|
) -> dict[str, float]:
|
276
291
|
"""
|
277
292
|
Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
|
278
293
|
|
279
294
|
Args:
|
280
|
-
parent (str): Parent selection method
|
281
|
-
n (int): Number of parents to consider.
|
295
|
+
parent (str): Parent selection method (kept for API compatibility, unused in BLX mode).
|
296
|
+
n (int): Number of top parents to consider.
|
282
297
|
mutation (float): Probability of a parameter mutation in any given iteration.
|
283
298
|
sigma (float): Standard deviation for Gaussian random number generator.
|
284
299
|
|
@@ -293,41 +308,40 @@ class Tuner:
|
|
293
308
|
if results:
|
294
309
|
# MongoDB already sorted by fitness DESC, so results[0] is best
|
295
310
|
x = np.array([[r["fitness"]] + [r["hyperparameters"][k] for k in self.space.keys()] for r in results])
|
296
|
-
n = min(n, len(x))
|
297
311
|
|
298
312
|
# Fall back to CSV if MongoDB unavailable or empty
|
299
313
|
if x is None and self.tune_csv.exists():
|
300
314
|
csv_data = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
301
315
|
if len(csv_data) > 0:
|
302
316
|
fitness = csv_data[:, 0] # first column
|
303
|
-
|
304
|
-
x = csv_data[
|
317
|
+
order = np.argsort(-fitness)
|
318
|
+
x = csv_data[order][:n] # top-n sorted by fitness DESC
|
305
319
|
|
306
320
|
# Mutate if we have data, otherwise use defaults
|
307
321
|
if x is not None:
|
308
|
-
|
309
|
-
if parent == "single" or len(x) <= 1:
|
310
|
-
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
|
311
|
-
elif parent == "weighted":
|
312
|
-
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
313
|
-
|
314
|
-
# Mutate
|
315
|
-
r = np.random
|
316
|
-
r.seed(int(time.time()))
|
317
|
-
g = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1
|
322
|
+
np.random.seed(int(time.time()))
|
318
323
|
ng = len(self.space)
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
324
|
+
|
325
|
+
# Crossover
|
326
|
+
genes = self._crossover(x)
|
327
|
+
|
328
|
+
# Mutation
|
329
|
+
gains = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1
|
330
|
+
factors = np.ones(ng)
|
331
|
+
while np.all(factors == 1): # mutate until a change occurs (prevent duplicates)
|
332
|
+
mask = np.random.random(ng) < mutation
|
333
|
+
step = np.random.randn(ng) * (sigma * gains)
|
334
|
+
factors = np.where(mask, np.exp(step), 1.0).clip(0.25, 4.0)
|
335
|
+
hyp = {k: float(genes[i] * factors[i]) for i, k in enumerate(self.space.keys())}
|
323
336
|
else:
|
324
337
|
hyp = {k: getattr(self.args, k) for k in self.space.keys()}
|
325
338
|
|
326
339
|
# Constrain to limits
|
327
340
|
for k, bounds in self.space.items():
|
328
|
-
hyp[k] = max(hyp[k], bounds[0])
|
329
|
-
|
330
|
-
|
341
|
+
hyp[k] = round(min(max(hyp[k], bounds[0]), bounds[1]), 5)
|
342
|
+
|
343
|
+
# Update types
|
344
|
+
hyp["close_mosaic"] = int(round(hyp["close_mosaic"]))
|
331
345
|
|
332
346
|
return hyp
|
333
347
|
|
@@ -361,8 +375,12 @@ class Tuner:
|
|
361
375
|
start = x.shape[0]
|
362
376
|
LOGGER.info(f"{self.prefix}Resuming tuning run {self.tune_dir} from iteration {start + 1}...")
|
363
377
|
for i in range(start, iterations):
|
378
|
+
# Linearly decay sigma from 0.2 → 0.1 over first 300 iterations
|
379
|
+
frac = min(i / 300.0, 1.0)
|
380
|
+
sigma_i = 0.2 - 0.1 * frac
|
381
|
+
|
364
382
|
# Mutate hyperparameters
|
365
|
-
mutated_hyp = self._mutate()
|
383
|
+
mutated_hyp = self._mutate(sigma=sigma_i)
|
366
384
|
LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}")
|
367
385
|
|
368
386
|
metrics = {}
|
@@ -378,6 +396,11 @@ class Tuner:
|
|
378
396
|
metrics = torch_load(ckpt_file)["train_metrics"]
|
379
397
|
assert return_code == 0, "training failed"
|
380
398
|
|
399
|
+
# Cleanup
|
400
|
+
time.sleep(1)
|
401
|
+
gc.collect()
|
402
|
+
torch.cuda.empty_cache()
|
403
|
+
|
381
404
|
except Exception as e:
|
382
405
|
LOGGER.error(f"training failure for hyperparameter tuning iteration {i + 1}\n{e}")
|
383
406
|
|
@@ -403,14 +426,14 @@ class Tuner:
|
|
403
426
|
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
404
427
|
fitness = x[:, 0] # first column
|
405
428
|
best_idx = fitness.argmax()
|
406
|
-
best_is_current = best_idx == i
|
429
|
+
best_is_current = best_idx == (i - start)
|
407
430
|
if best_is_current:
|
408
|
-
best_save_dir = save_dir
|
431
|
+
best_save_dir = str(save_dir)
|
409
432
|
best_metrics = {k: round(v, 5) for k, v in metrics.items()}
|
410
433
|
for ckpt in weights_dir.glob("*.pt"):
|
411
434
|
shutil.copy2(ckpt, self.tune_dir / "weights")
|
412
435
|
elif cleanup:
|
413
|
-
shutil.rmtree(
|
436
|
+
shutil.rmtree(best_save_dir, ignore_errors=True) # remove iteration dirs to reduce storage space
|
414
437
|
|
415
438
|
# Plot tune results
|
416
439
|
plot_tune_results(str(self.tune_csv))
|
@@ -421,8 +444,7 @@ class Tuner:
|
|
421
444
|
f"{self.prefix}Results saved to {colorstr('bold', self.tune_dir)}\n"
|
422
445
|
f"{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n"
|
423
446
|
f"{self.prefix}Best fitness metrics are {best_metrics}\n"
|
424
|
-
f"{self.prefix}Best fitness model is {best_save_dir}
|
425
|
-
f"{self.prefix}Best fitness hyperparameters are printed below.\n"
|
447
|
+
f"{self.prefix}Best fitness model is {best_save_dir}"
|
426
448
|
)
|
427
449
|
LOGGER.info("\n" + header)
|
428
450
|
data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
|
@@ -402,7 +402,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
402
402
|
s = 1
|
403
403
|
else:
|
404
404
|
output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
|
405
|
-
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.
|
405
|
+
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.shape[0], -1, -1)
|
406
406
|
tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
|
407
407
|
|
408
408
|
# Expand per-image data in batch direction to be per-mask
|
@@ -412,7 +412,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
412
412
|
assert image_embeddings.shape[0] == tokens.shape[0]
|
413
413
|
src = image_embeddings
|
414
414
|
src = src + dense_prompt_embeddings
|
415
|
-
assert image_pe.
|
415
|
+
assert image_pe.shape[0] == 1, "image_pe should have size 1 in batch dim (from `get_dense_pe()`)"
|
416
416
|
pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
|
417
417
|
b, c, h, w = src.shape
|
418
418
|
|
@@ -487,7 +487,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
487
487
|
multimask_logits = all_mask_logits[:, 1:, :, :]
|
488
488
|
multimask_iou_scores = all_iou_scores[:, 1:]
|
489
489
|
best_scores_inds = torch.argmax(multimask_iou_scores, dim=-1)
|
490
|
-
batch_inds = torch.arange(multimask_iou_scores.
|
490
|
+
batch_inds = torch.arange(multimask_iou_scores.shape[0], device=all_iou_scores.device)
|
491
491
|
best_multimask_logits = multimask_logits[batch_inds, best_scores_inds]
|
492
492
|
best_multimask_logits = best_multimask_logits.unsqueeze(1)
|
493
493
|
best_multimask_iou_scores = multimask_iou_scores[batch_inds, best_scores_inds]
|
@@ -472,7 +472,7 @@ class SAM2Model(torch.nn.Module):
|
|
472
472
|
... object_score_logits,
|
473
473
|
... ) = results
|
474
474
|
"""
|
475
|
-
B = backbone_features.
|
475
|
+
B = backbone_features.shape[0]
|
476
476
|
device = backbone_features.device
|
477
477
|
assert backbone_features.size(1) == self.sam_prompt_embed_dim
|
478
478
|
assert backbone_features.size(2) == self.sam_image_embedding_size
|
@@ -482,7 +482,7 @@ class SAM2Model(torch.nn.Module):
|
|
482
482
|
if point_inputs is not None:
|
483
483
|
sam_point_coords = point_inputs["point_coords"]
|
484
484
|
sam_point_labels = point_inputs["point_labels"]
|
485
|
-
assert sam_point_coords.
|
485
|
+
assert sam_point_coords.shape[0] == B and sam_point_labels.shape[0] == B
|
486
486
|
else:
|
487
487
|
# If no points are provide, pad with an empty point (with label -1)
|
488
488
|
sam_point_coords = torch.zeros(B, 1, 2, device=device, dtype=backbone_features.dtype)
|
@@ -585,10 +585,10 @@ class SAM2Model(torch.nn.Module):
|
|
585
585
|
antialias=True, # use antialias for downsampling
|
586
586
|
)
|
587
587
|
# a dummy IoU prediction of all 1's under mask input
|
588
|
-
ious = mask_inputs.new_ones(mask_inputs.
|
588
|
+
ious = mask_inputs.new_ones(mask_inputs.shape[0], 1).float()
|
589
589
|
if not self.use_obj_ptrs_in_encoder or backbone_features is None or high_res_features is None:
|
590
590
|
# all zeros as a dummy object pointer (of shape [B, C])
|
591
|
-
obj_ptr = torch.zeros(mask_inputs.
|
591
|
+
obj_ptr = torch.zeros(mask_inputs.shape[0], self.hidden_dim, device=mask_inputs.device)
|
592
592
|
else:
|
593
593
|
# produce an object pointer using the SAM decoder from the mask input
|
594
594
|
_, _, _, _, _, obj_ptr, _ = self._forward_sam_heads(
|
@@ -1006,7 +1006,7 @@ class SAM2Model(torch.nn.Module):
|
|
1006
1006
|
@staticmethod
|
1007
1007
|
def _apply_non_overlapping_constraints(pred_masks):
|
1008
1008
|
"""Apply non-overlapping constraints to masks, keeping the highest scoring object per location."""
|
1009
|
-
batch_size = pred_masks.
|
1009
|
+
batch_size = pred_masks.shape[0]
|
1010
1010
|
if batch_size == 1:
|
1011
1011
|
return pred_masks
|
1012
1012
|
|
@@ -423,7 +423,7 @@ class Predictor(BasePredictor):
|
|
423
423
|
pred_masks.append(crop_masks)
|
424
424
|
pred_bboxes.append(crop_bboxes)
|
425
425
|
pred_scores.append(crop_scores)
|
426
|
-
region_areas.append(area.expand(
|
426
|
+
region_areas.append(area.expand(crop_masks.shape[0]))
|
427
427
|
|
428
428
|
pred_masks = torch.cat(pred_masks)
|
429
429
|
pred_bboxes = torch.cat(pred_bboxes)
|
@@ -504,14 +504,14 @@ class Predictor(BasePredictor):
|
|
504
504
|
# (N, 1, H, W), (N, 1)
|
505
505
|
pred_masks, pred_scores = preds[:2]
|
506
506
|
pred_bboxes = preds[2] if self.segment_all else None
|
507
|
-
names = dict(enumerate(str(i) for i in range(
|
507
|
+
names = dict(enumerate(str(i) for i in range(pred_masks.shape[0])))
|
508
508
|
|
509
509
|
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
510
510
|
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
511
511
|
|
512
512
|
results = []
|
513
513
|
for masks, orig_img, img_path in zip([pred_masks], orig_imgs, self.batch[0]):
|
514
|
-
if
|
514
|
+
if masks.shape[0] == 0:
|
515
515
|
masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
516
516
|
else:
|
517
517
|
masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
|
@@ -521,7 +521,7 @@ class Predictor(BasePredictor):
|
|
521
521
|
else:
|
522
522
|
pred_bboxes = batched_mask_to_box(masks)
|
523
523
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
524
|
-
cls = torch.arange(
|
524
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
525
525
|
idx = pred_scores > self.args.conf
|
526
526
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)[idx]
|
527
527
|
masks = masks[idx]
|
@@ -633,7 +633,7 @@ class Predictor(BasePredictor):
|
|
633
633
|
"""
|
634
634
|
import torchvision # scope for faster 'import ultralytics'
|
635
635
|
|
636
|
-
if
|
636
|
+
if masks.shape[0] == 0:
|
637
637
|
return masks
|
638
638
|
|
639
639
|
# Filter small disconnected regions and holes
|
@@ -693,14 +693,14 @@ class Predictor(BasePredictor):
|
|
693
693
|
dst_shape = dst_shape or (self.args.imgsz, self.args.imgsz)
|
694
694
|
prompts = self._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
695
695
|
pred_masks, pred_scores = self._inference_features(features, *prompts, multimask_output)
|
696
|
-
if
|
696
|
+
if pred_masks.shape[0] == 0:
|
697
697
|
pred_masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
698
698
|
else:
|
699
699
|
pred_masks = ops.scale_masks(pred_masks[None].float(), src_shape, padding=False)[0]
|
700
700
|
pred_masks = pred_masks > self.model.mask_threshold # to bool
|
701
701
|
pred_bboxes = batched_mask_to_box(pred_masks)
|
702
702
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
703
|
-
cls = torch.arange(
|
703
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
704
704
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)
|
705
705
|
return pred_masks, pred_bboxes
|
706
706
|
|
@@ -770,7 +770,7 @@ class SAM2Predictor(Predictor):
|
|
770
770
|
bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
771
771
|
if bboxes is not None:
|
772
772
|
bboxes = bboxes.view(-1, 2, 2)
|
773
|
-
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(
|
773
|
+
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(bboxes.shape[0], -1)
|
774
774
|
# NOTE: merge "boxes" and "points" into a single "points" input
|
775
775
|
# (where boxes are added at the beginning) to model.sam_prompt_encoder
|
776
776
|
if points is not None:
|
@@ -1025,7 +1025,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1025
1025
|
pred_masks = current_out["pred_masks"].flatten(0, 1)
|
1026
1026
|
pred_masks = pred_masks[(pred_masks > self.model.mask_threshold).sum((1, 2)) > 0] # filter blank masks
|
1027
1027
|
|
1028
|
-
return pred_masks, torch.ones(
|
1028
|
+
return pred_masks, torch.ones(pred_masks.shape[0], dtype=pred_masks.dtype, device=pred_masks.device)
|
1029
1029
|
|
1030
1030
|
def postprocess(self, preds, img, orig_imgs):
|
1031
1031
|
"""
|
@@ -1465,7 +1465,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1465
1465
|
else:
|
1466
1466
|
maskmem_pos_enc = model_constants["maskmem_pos_enc"]
|
1467
1467
|
# expand the cached maskmem_pos_enc to the actual batch size
|
1468
|
-
batch_size = out_maskmem_pos_enc[0].
|
1468
|
+
batch_size = out_maskmem_pos_enc[0].shape[0]
|
1469
1469
|
if batch_size > 1:
|
1470
1470
|
out_maskmem_pos_enc = [x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc]
|
1471
1471
|
return out_maskmem_pos_enc
|
@@ -2028,7 +2028,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
2028
2028
|
point_inputs={"point_coords": point, "point_labels": label} if obj_idx is not None else None,
|
2029
2029
|
mask_inputs=mask,
|
2030
2030
|
multimask_output=False,
|
2031
|
-
high_res_features=[feat[: pix_feat_with_mem.
|
2031
|
+
high_res_features=[feat[: pix_feat_with_mem.shape[0]] for feat in self.high_res_features],
|
2032
2032
|
)
|
2033
2033
|
return {
|
2034
2034
|
"pred_masks": low_res_masks,
|
@@ -12,7 +12,7 @@ from ultralytics.engine.trainer import BaseTrainer
|
|
12
12
|
from ultralytics.models import yolo
|
13
13
|
from ultralytics.nn.tasks import ClassificationModel
|
14
14
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
15
|
-
from ultralytics.utils.plotting import plot_images
|
15
|
+
from ultralytics.utils.plotting import plot_images
|
16
16
|
from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first
|
17
17
|
|
18
18
|
|
@@ -39,7 +39,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
39
39
|
progress_string: Return a formatted string showing training progress.
|
40
40
|
get_validator: Return an instance of ClassificationValidator.
|
41
41
|
label_loss_items: Return a loss dict with labelled training loss items.
|
42
|
-
plot_metrics: Plot metrics from a CSV file.
|
43
42
|
final_eval: Evaluate trained model and save validation results.
|
44
43
|
plot_training_samples: Plot training samples with their annotations.
|
45
44
|
|
@@ -195,10 +194,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
195
194
|
loss_items = [round(float(loss_items), 5)]
|
196
195
|
return dict(zip(keys, loss_items))
|
197
196
|
|
198
|
-
def plot_metrics(self):
|
199
|
-
"""Plot metrics from a CSV file."""
|
200
|
-
plot_results(file=self.csv, classify=True, on_plot=self.on_plot) # save results.png
|
201
|
-
|
202
197
|
def final_eval(self):
|
203
198
|
"""Evaluate trained model and save validation results."""
|
204
199
|
for f in self.last, self.best:
|
@@ -220,7 +215,7 @@ class ClassificationTrainer(BaseTrainer):
|
|
220
215
|
batch (dict[str, torch.Tensor]): Batch containing images and class labels.
|
221
216
|
ni (int): Number of iterations.
|
222
217
|
"""
|
223
|
-
batch["batch_idx"] = torch.arange(
|
218
|
+
batch["batch_idx"] = torch.arange(batch["img"].shape[0]) # add batch index for plotting
|
224
219
|
plot_images(
|
225
220
|
labels=batch,
|
226
221
|
fname=self.save_dir / f"train_batch{ni}.jpg",
|
@@ -178,7 +178,7 @@ class ClassificationValidator(BaseValidator):
|
|
178
178
|
>>> batch = {"img": torch.rand(16, 3, 224, 224), "cls": torch.randint(0, 10, (16,))}
|
179
179
|
>>> validator.plot_val_samples(batch, 0)
|
180
180
|
"""
|
181
|
-
batch["batch_idx"] = torch.arange(
|
181
|
+
batch["batch_idx"] = torch.arange(batch["img"].shape[0]) # add batch index for plotting
|
182
182
|
plot_images(
|
183
183
|
labels=batch,
|
184
184
|
fname=self.save_dir / f"val_batch{ni}_labels.jpg",
|
@@ -203,7 +203,7 @@ class ClassificationValidator(BaseValidator):
|
|
203
203
|
"""
|
204
204
|
batched_preds = dict(
|
205
205
|
img=batch["img"],
|
206
|
-
batch_idx=torch.arange(
|
206
|
+
batch_idx=torch.arange(batch["img"].shape[0]),
|
207
207
|
cls=torch.argmax(preds, dim=1),
|
208
208
|
)
|
209
209
|
plot_images(
|
@@ -89,7 +89,7 @@ class DetectionPredictor(BasePredictor):
|
|
89
89
|
obj_feats = torch.cat(
|
90
90
|
[x.permute(0, 2, 3, 1).reshape(x.shape[0], -1, s, x.shape[1] // s).mean(dim=-1) for x in feat_maps], dim=1
|
91
91
|
) # mean reduce all vectors to same length
|
92
|
-
return [feats[idx] if
|
92
|
+
return [feats[idx] if idx.shape[0] else [] for feats, idx in zip(obj_feats, idxs)] # for each img in batch
|
93
93
|
|
94
94
|
def construct_results(self, preds, img, orig_imgs):
|
95
95
|
"""
|
@@ -17,7 +17,7 @@ from ultralytics.models import yolo
|
|
17
17
|
from ultralytics.nn.tasks import DetectionModel
|
18
18
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
19
19
|
from ultralytics.utils.patches import override_configs
|
20
|
-
from ultralytics.utils.plotting import plot_images, plot_labels
|
20
|
+
from ultralytics.utils.plotting import plot_images, plot_labels
|
21
21
|
from ultralytics.utils.torch_utils import torch_distributed_zero_first, unwrap_model
|
22
22
|
|
23
23
|
|
@@ -43,7 +43,6 @@ class DetectionTrainer(BaseTrainer):
|
|
43
43
|
label_loss_items: Return a loss dictionary with labeled training loss items.
|
44
44
|
progress_string: Return a formatted string of training progress.
|
45
45
|
plot_training_samples: Plot training samples with their annotations.
|
46
|
-
plot_metrics: Plot metrics from a CSV file.
|
47
46
|
plot_training_labels: Create a labeled training plot of the YOLO model.
|
48
47
|
auto_batch: Calculate optimal batch size based on model memory requirements.
|
49
48
|
|
@@ -64,7 +63,6 @@ class DetectionTrainer(BaseTrainer):
|
|
64
63
|
_callbacks (list, optional): List of callback functions to be executed during training.
|
65
64
|
"""
|
66
65
|
super().__init__(cfg, overrides, _callbacks)
|
67
|
-
self.dynamic_tensors = ["batch_idx", "cls", "bboxes"]
|
68
66
|
|
69
67
|
def build_dataset(self, img_path: str, mode: str = "train", batch: int | None = None):
|
70
68
|
"""
|
@@ -138,10 +136,6 @@ class DetectionTrainer(BaseTrainer):
|
|
138
136
|
] # new shape (stretched to gs-multiple)
|
139
137
|
imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
|
140
138
|
batch["img"] = imgs
|
141
|
-
|
142
|
-
if self.args.compile:
|
143
|
-
for k in self.dynamic_tensors:
|
144
|
-
torch._dynamo.maybe_mark_dynamic(batch[k], 0)
|
145
139
|
return batch
|
146
140
|
|
147
141
|
def set_model_attributes(self):
|
@@ -222,10 +216,6 @@ class DetectionTrainer(BaseTrainer):
|
|
222
216
|
on_plot=self.on_plot,
|
223
217
|
)
|
224
218
|
|
225
|
-
def plot_metrics(self):
|
226
|
-
"""Plot metrics from a CSV file."""
|
227
|
-
plot_results(file=self.csv, on_plot=self.on_plot) # save results.png
|
228
|
-
|
229
219
|
def plot_training_labels(self):
|
230
220
|
"""Create a labeled training plot of the YOLO model."""
|
231
221
|
boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0)
|
@@ -146,7 +146,7 @@ class DetectionValidator(BaseValidator):
|
|
146
146
|
ori_shape = batch["ori_shape"][si]
|
147
147
|
imgsz = batch["img"].shape[2:]
|
148
148
|
ratio_pad = batch["ratio_pad"][si]
|
149
|
-
if
|
149
|
+
if cls.shape[0]:
|
150
150
|
bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]] # target boxes
|
151
151
|
return {
|
152
152
|
"cls": cls,
|
@@ -185,7 +185,7 @@ class DetectionValidator(BaseValidator):
|
|
185
185
|
predn = self._prepare_pred(pred)
|
186
186
|
|
187
187
|
cls = pbatch["cls"].cpu().numpy()
|
188
|
-
no_pred =
|
188
|
+
no_pred = predn["cls"].shape[0] == 0
|
189
189
|
self.metrics.update_stats(
|
190
190
|
{
|
191
191
|
**self._process_batch(predn, pbatch),
|
@@ -268,8 +268,8 @@ class DetectionValidator(BaseValidator):
|
|
268
268
|
Returns:
|
269
269
|
(dict[str, np.ndarray]): Dictionary containing 'tp' key with correct prediction matrix of shape (N, 10) for 10 IoU levels.
|
270
270
|
"""
|
271
|
-
if
|
272
|
-
return {"tp": np.zeros((
|
271
|
+
if batch["cls"].shape[0] == 0 or preds["cls"].shape[0] == 0:
|
272
|
+
return {"tp": np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)}
|
273
273
|
iou = box_iou(batch["bboxes"], preds["bboxes"])
|
274
274
|
return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
|
275
275
|
|
@@ -93,8 +93,8 @@ class OBBValidator(DetectionValidator):
|
|
93
93
|
>>> gt_cls = torch.randint(0, 5, (50,)) # 50 ground truth class labels
|
94
94
|
>>> correct_matrix = validator._process_batch(detections, gt_bboxes, gt_cls)
|
95
95
|
"""
|
96
|
-
if
|
97
|
-
return {"tp": np.zeros((
|
96
|
+
if batch["cls"].shape[0] == 0 or preds["cls"].shape[0] == 0:
|
97
|
+
return {"tp": np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)}
|
98
98
|
iou = batch_probiou(batch["bboxes"], preds["bboxes"])
|
99
99
|
return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
|
100
100
|
|
@@ -134,7 +134,7 @@ class OBBValidator(DetectionValidator):
|
|
134
134
|
ori_shape = batch["ori_shape"][si]
|
135
135
|
imgsz = batch["img"].shape[2:]
|
136
136
|
ratio_pad = batch["ratio_pad"][si]
|
137
|
-
if
|
137
|
+
if cls.shape[0]:
|
138
138
|
bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]) # target boxes
|
139
139
|
return {
|
140
140
|
"cls": cls,
|
@@ -73,7 +73,7 @@ class PosePredictor(DetectionPredictor):
|
|
73
73
|
"""
|
74
74
|
result = super().construct_result(pred, img, orig_img, img_path)
|
75
75
|
# Extract keypoints from prediction and reshape according to model's keypoint shape
|
76
|
-
pred_kpts = pred[:, 6:].view(
|
76
|
+
pred_kpts = pred[:, 6:].view(pred.shape[0], *self.model.kpt_shape)
|
77
77
|
# Scale keypoints coordinates to match the original image dimensions
|
78
78
|
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
|
79
79
|
result.update(keypoints=pred_kpts)
|
@@ -9,7 +9,6 @@ from typing import Any
|
|
9
9
|
from ultralytics.models import yolo
|
10
10
|
from ultralytics.nn.tasks import PoseModel
|
11
11
|
from ultralytics.utils import DEFAULT_CFG, LOGGER
|
12
|
-
from ultralytics.utils.plotting import plot_results
|
13
12
|
|
14
13
|
|
15
14
|
class PoseTrainer(yolo.detect.DetectionTrainer):
|
@@ -30,7 +29,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
30
29
|
set_model_attributes: Set keypoints shape attribute on the model.
|
31
30
|
get_validator: Create a validator instance for model evaluation.
|
32
31
|
plot_training_samples: Visualize training samples with keypoints.
|
33
|
-
plot_metrics: Generate and save training/validation metric plots.
|
34
32
|
get_dataset: Retrieve the dataset and ensure it contains required kpt_shape key.
|
35
33
|
|
36
34
|
Examples:
|
@@ -57,7 +55,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
57
55
|
overrides = {}
|
58
56
|
overrides["task"] = "pose"
|
59
57
|
super().__init__(cfg, overrides, _callbacks)
|
60
|
-
self.dynamic_tensors = ["batch_idx", "cls", "bboxes", "keypoints"]
|
61
58
|
|
62
59
|
if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
|
63
60
|
LOGGER.warning(
|
@@ -102,10 +99,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
102
99
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
103
100
|
)
|
104
101
|
|
105
|
-
def plot_metrics(self):
|
106
|
-
"""Plot training/validation metrics."""
|
107
|
-
plot_results(file=self.csv, pose=True, on_plot=self.on_plot) # save results.png
|
108
|
-
|
109
102
|
def get_dataset(self) -> dict[str, Any]:
|
110
103
|
"""
|
111
104
|
Retrieve the dataset and ensure it contains the required `kpt_shape` key.
|