ultralytics-opencv-headless 8.3.253__py3-none-any.whl → 8.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +2 -2
- tests/conftest.py +1 -1
- tests/test_cuda.py +8 -2
- tests/test_engine.py +8 -8
- tests/test_exports.py +11 -4
- tests/test_integrations.py +9 -9
- tests/test_python.py +14 -14
- tests/test_solutions.py +3 -3
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +7 -9
- ultralytics/cfg/default.yaml +3 -1
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/data/augment.py +7 -0
- ultralytics/data/converter.py +49 -30
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +9 -4
- ultralytics/engine/model.py +1 -1
- ultralytics/engine/results.py +19 -10
- ultralytics/engine/trainer.py +48 -25
- ultralytics/engine/tuner.py +15 -7
- ultralytics/models/fastsam/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +3 -2
- ultralytics/models/yolo/detect/val.py +6 -0
- ultralytics/models/yolo/model.py +1 -1
- ultralytics/models/yolo/obb/predict.py +1 -1
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/pose/train.py +1 -1
- ultralytics/models/yolo/segment/predict.py +1 -1
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +3 -1
- ultralytics/models/yolo/yoloe/train.py +6 -1
- ultralytics/models/yolo/yoloe/train_seg.py +6 -1
- ultralytics/nn/autobackend.py +3 -3
- ultralytics/nn/modules/__init__.py +8 -0
- ultralytics/nn/modules/block.py +128 -8
- ultralytics/nn/modules/head.py +789 -204
- ultralytics/nn/tasks.py +74 -29
- ultralytics/nn/text_model.py +5 -2
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/utils/callbacks/platform.py +9 -7
- ultralytics/utils/downloads.py +3 -1
- ultralytics/utils/export/engine.py +19 -10
- ultralytics/utils/export/imx.py +22 -11
- ultralytics/utils/export/tensorflow.py +21 -21
- ultralytics/utils/loss.py +587 -203
- ultralytics/utils/metrics.py +1 -0
- ultralytics/utils/ops.py +11 -2
- ultralytics/utils/tal.py +98 -19
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/METADATA +31 -39
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/RECORD +63 -52
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/WHEEL +0 -0
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/entry_points.txt +0 -0
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/licenses/LICENSE +0 -0
- {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/top_level.txt +0 -0
ultralytics/engine/trainer.py
CHANGED
|
@@ -27,6 +27,7 @@ from ultralytics import __version__
|
|
|
27
27
|
from ultralytics.cfg import get_cfg, get_save_dir
|
|
28
28
|
from ultralytics.data.utils import check_cls_dataset, check_det_dataset
|
|
29
29
|
from ultralytics.nn.tasks import load_checkpoint
|
|
30
|
+
from ultralytics.optim import MuSGD
|
|
30
31
|
from ultralytics.utils import (
|
|
31
32
|
DEFAULT_CFG,
|
|
32
33
|
GIT,
|
|
@@ -464,6 +465,9 @@ class BaseTrainer:
|
|
|
464
465
|
|
|
465
466
|
self.run_callbacks("on_train_batch_end")
|
|
466
467
|
|
|
468
|
+
if hasattr(unwrap_model(self.model).criterion, "update"):
|
|
469
|
+
unwrap_model(self.model).criterion.update()
|
|
470
|
+
|
|
467
471
|
self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
|
|
468
472
|
|
|
469
473
|
self.run_callbacks("on_train_epoch_end")
|
|
@@ -628,21 +632,19 @@ class BaseTrainer:
|
|
|
628
632
|
(dict): A dictionary containing the training/validation/test dataset and category names.
|
|
629
633
|
"""
|
|
630
634
|
try:
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
str(self.args.data).startswith("ul://") and "/datasets/" in str(self.args.data)
|
|
635
|
-
):
|
|
636
|
-
# Convert NDJSON to YOLO format (including ul:// platform dataset URIs)
|
|
635
|
+
# Convert ul:// platform URIs and NDJSON files to local dataset format first
|
|
636
|
+
data_str = str(self.args.data)
|
|
637
|
+
if data_str.endswith(".ndjson") or (data_str.startswith("ul://") and "/datasets/" in data_str):
|
|
637
638
|
import asyncio
|
|
638
639
|
|
|
639
640
|
from ultralytics.data.converter import convert_ndjson_to_yolo
|
|
640
641
|
from ultralytics.utils.checks import check_file
|
|
641
642
|
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
643
|
+
self.args.data = str(asyncio.run(convert_ndjson_to_yolo(check_file(self.args.data))))
|
|
644
|
+
|
|
645
|
+
# Task-specific dataset checking
|
|
646
|
+
if self.args.task == "classify":
|
|
647
|
+
data = check_cls_dataset(self.args.data)
|
|
646
648
|
elif str(self.args.data).rsplit(".", 1)[-1] in {"yaml", "yml"} or self.args.task in {
|
|
647
649
|
"detect",
|
|
648
650
|
"segment",
|
|
@@ -930,7 +932,7 @@ class BaseTrainer:
|
|
|
930
932
|
Returns:
|
|
931
933
|
(torch.optim.Optimizer): The constructed optimizer.
|
|
932
934
|
"""
|
|
933
|
-
g = [
|
|
935
|
+
g = [{}, {}, {}, {}] # optimizer parameter groups
|
|
934
936
|
bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
|
|
935
937
|
if name == "auto":
|
|
936
938
|
LOGGER.info(
|
|
@@ -940,38 +942,59 @@ class BaseTrainer:
|
|
|
940
942
|
)
|
|
941
943
|
nc = self.data.get("nc", 10) # number of classes
|
|
942
944
|
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
|
|
943
|
-
name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("
|
|
945
|
+
name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("MuSGD", lr_fit, 0.9)
|
|
944
946
|
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
|
|
945
947
|
|
|
946
|
-
|
|
948
|
+
use_muon = name == "MuSGD"
|
|
949
|
+
for module_name, module in unwrap_model(model).named_modules():
|
|
947
950
|
for param_name, param in module.named_parameters(recurse=False):
|
|
948
951
|
fullname = f"{module_name}.{param_name}" if module_name else param_name
|
|
949
|
-
if
|
|
950
|
-
g[
|
|
952
|
+
if param.ndim >= 2 and use_muon:
|
|
953
|
+
g[3][fullname] = param # muon params
|
|
954
|
+
elif "bias" in fullname: # bias (no decay)
|
|
955
|
+
g[2][fullname] = param
|
|
951
956
|
elif isinstance(module, bn) or "logit_scale" in fullname: # weight (no decay)
|
|
952
957
|
# ContrastiveHead and BNContrastiveHead included here with 'logit_scale'
|
|
953
|
-
g[1]
|
|
958
|
+
g[1][fullname] = param
|
|
954
959
|
else: # weight (with decay)
|
|
955
|
-
g[0]
|
|
960
|
+
g[0][fullname] = param
|
|
961
|
+
if not use_muon:
|
|
962
|
+
g = [x.values() for x in g[:3]] # convert to list of params
|
|
956
963
|
|
|
957
|
-
optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"}
|
|
964
|
+
optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "MuSGD", "auto"}
|
|
958
965
|
name = {x.lower(): x for x in optimizers}.get(name.lower())
|
|
959
966
|
if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}:
|
|
960
|
-
|
|
967
|
+
optim_args = dict(lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
|
|
961
968
|
elif name == "RMSProp":
|
|
962
|
-
|
|
963
|
-
elif name == "SGD":
|
|
964
|
-
|
|
969
|
+
optim_args = dict(lr=lr, momentum=momentum)
|
|
970
|
+
elif name == "SGD" or name == "MuSGD":
|
|
971
|
+
optim_args = dict(lr=lr, momentum=momentum, nesterov=True)
|
|
965
972
|
else:
|
|
966
973
|
raise NotImplementedError(
|
|
967
974
|
f"Optimizer '{name}' not found in list of available optimizers {optimizers}. "
|
|
968
975
|
"Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
|
|
969
976
|
)
|
|
970
977
|
|
|
971
|
-
|
|
972
|
-
|
|
978
|
+
g[2] = {"params": g[2], **optim_args}
|
|
979
|
+
g[0] = {"params": g[0], **optim_args, "weight_decay": decay}
|
|
980
|
+
g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0}
|
|
981
|
+
if name == "MuSGD":
|
|
982
|
+
g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True}
|
|
983
|
+
import re
|
|
984
|
+
|
|
985
|
+
# higher lr for certain parameters in MuSGD
|
|
986
|
+
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
|
|
987
|
+
g_ = [] # new param groups
|
|
988
|
+
for x in g:
|
|
989
|
+
p = x.pop("params")
|
|
990
|
+
p1 = [v for k, v in p.items() if pattern.search(k)]
|
|
991
|
+
p2 = [v for k, v in p.items() if not pattern.search(k)]
|
|
992
|
+
g_.extend([{"params": p1, **x, "lr": lr * 3}, {"params": p2, **x}])
|
|
993
|
+
g = g_
|
|
994
|
+
optimizer = getattr(optim, name, MuSGD)(params=g)
|
|
995
|
+
|
|
973
996
|
LOGGER.info(
|
|
974
997
|
f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
|
|
975
|
-
f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)"
|
|
998
|
+
f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
|
|
976
999
|
)
|
|
977
1000
|
return optimizer
|
ultralytics/engine/tuner.py
CHANGED
|
@@ -90,15 +90,15 @@ class Tuner:
|
|
|
90
90
|
"""
|
|
91
91
|
self.space = args.pop("space", None) or { # key: (min, max, gain(optional))
|
|
92
92
|
# 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
|
|
93
|
-
"lr0": (1e-5, 1e-
|
|
94
|
-
"lrf": (0.
|
|
93
|
+
"lr0": (1e-5, 1e-2), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
|
|
94
|
+
"lrf": (0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
|
|
95
95
|
"momentum": (0.7, 0.98, 0.3), # SGD momentum/Adam beta1
|
|
96
96
|
"weight_decay": (0.0, 0.001), # optimizer weight decay 5e-4
|
|
97
97
|
"warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
|
|
98
98
|
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
|
|
99
99
|
"box": (1.0, 20.0), # box loss gain
|
|
100
100
|
"cls": (0.1, 4.0), # cls loss gain (scale with pixels)
|
|
101
|
-
"dfl": (0.4,
|
|
101
|
+
"dfl": (0.4, 12.0), # dfl loss gain
|
|
102
102
|
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
|
103
103
|
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
|
104
104
|
"hsv_v": (0.0, 0.9), # image HSV-Value augmentation (fraction)
|
|
@@ -254,7 +254,7 @@ class Tuner:
|
|
|
254
254
|
f.write(headers)
|
|
255
255
|
for result in all_results:
|
|
256
256
|
fitness = result["fitness"]
|
|
257
|
-
hyp_values = [result["hyperparameters"]
|
|
257
|
+
hyp_values = [result["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
|
|
258
258
|
log_row = [round(fitness, 5), *hyp_values]
|
|
259
259
|
f.write(",".join(map(str, log_row)) + "\n")
|
|
260
260
|
|
|
@@ -273,6 +273,8 @@ class Tuner:
|
|
|
273
273
|
parents_mat = np.stack([x[i][1:] for i in idxs], 0) # (k, ng) strip fitness
|
|
274
274
|
lo, hi = parents_mat.min(0), parents_mat.max(0)
|
|
275
275
|
span = hi - lo
|
|
276
|
+
# given a small value when span is zero to avoid no mutation
|
|
277
|
+
span = np.where(span == 0, np.random.uniform(0.01, 0.1, span.shape), span)
|
|
276
278
|
return np.random.uniform(lo - alpha * span, hi + alpha * span)
|
|
277
279
|
|
|
278
280
|
def _mutate(
|
|
@@ -297,7 +299,12 @@ class Tuner:
|
|
|
297
299
|
if self.mongodb:
|
|
298
300
|
if results := self._get_mongodb_results(n):
|
|
299
301
|
# MongoDB already sorted by fitness DESC, so results[0] is best
|
|
300
|
-
x = np.array(
|
|
302
|
+
x = np.array(
|
|
303
|
+
[
|
|
304
|
+
[r["fitness"]] + [r["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
|
|
305
|
+
for r in results
|
|
306
|
+
]
|
|
307
|
+
)
|
|
301
308
|
elif self.collection.name in self.collection.database.list_collection_names(): # Tuner started elsewhere
|
|
302
309
|
x = np.array([[0.0] + [getattr(self.args, k) for k in self.space.keys()]])
|
|
303
310
|
|
|
@@ -335,10 +342,12 @@ class Tuner:
|
|
|
335
342
|
# Update types
|
|
336
343
|
if "close_mosaic" in hyp:
|
|
337
344
|
hyp["close_mosaic"] = round(hyp["close_mosaic"])
|
|
345
|
+
if "epochs" in hyp:
|
|
346
|
+
hyp["epochs"] = round(hyp["epochs"])
|
|
338
347
|
|
|
339
348
|
return hyp
|
|
340
349
|
|
|
341
|
-
def __call__(self,
|
|
350
|
+
def __call__(self, iterations: int = 10, cleanup: bool = True):
|
|
342
351
|
"""Execute the hyperparameter evolution process when the Tuner instance is called.
|
|
343
352
|
|
|
344
353
|
This method iterates through the specified number of iterations, performing the following steps:
|
|
@@ -349,7 +358,6 @@ class Tuner:
|
|
|
349
358
|
5. Track the best performing configuration across all iterations
|
|
350
359
|
|
|
351
360
|
Args:
|
|
352
|
-
model (Model | None, optional): A pre-initialized YOLO model to be used for training.
|
|
353
361
|
iterations (int): The number of generations to run the evolution for.
|
|
354
362
|
cleanup (bool): Whether to delete iteration weights to reduce storage space during tuning.
|
|
355
363
|
"""
|
|
@@ -63,7 +63,7 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
|
63
63
|
results = super().postprocess(preds, img, orig_imgs)
|
|
64
64
|
for result in results:
|
|
65
65
|
full_box = torch.tensor(
|
|
66
|
-
[0, 0, result.orig_shape[1], result.orig_shape[0]], device=
|
|
66
|
+
[0, 0, result.orig_shape[1], result.orig_shape[0]], device=result.boxes.data.device, dtype=torch.float32
|
|
67
67
|
)
|
|
68
68
|
boxes = adjust_bboxes_to_image_border(result.boxes.xyxy, result.orig_shape)
|
|
69
69
|
idx = torch.nonzero(box_iou(full_box[None], boxes) > 0.9).flatten()
|
|
@@ -117,10 +117,11 @@ class DetectionTrainer(BaseTrainer):
|
|
|
117
117
|
if isinstance(v, torch.Tensor):
|
|
118
118
|
batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
|
|
119
119
|
batch["img"] = batch["img"].float() / 255
|
|
120
|
-
|
|
120
|
+
multi_scale = self.args.multi_scale
|
|
121
|
+
if random.random() < multi_scale:
|
|
121
122
|
imgs = batch["img"]
|
|
122
123
|
sz = (
|
|
123
|
-
random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1
|
|
124
|
+
random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
|
|
124
125
|
// self.stride
|
|
125
126
|
* self.stride
|
|
126
127
|
) # size
|
|
@@ -494,6 +494,12 @@ class DetectionValidator(BaseValidator):
|
|
|
494
494
|
# update mAP50-95 and mAP50
|
|
495
495
|
stats[f"metrics/mAP50({suffix[i][0]})"] = val.stats_as_dict["AP_50"]
|
|
496
496
|
stats[f"metrics/mAP50-95({suffix[i][0]})"] = val.stats_as_dict["AP_all"]
|
|
497
|
+
# record mAP for small, medium, large objects as well
|
|
498
|
+
stats["metrics/mAP_small(B)"] = val.stats_as_dict["AP_small"]
|
|
499
|
+
stats["metrics/mAP_medium(B)"] = val.stats_as_dict["AP_medium"]
|
|
500
|
+
stats["metrics/mAP_large(B)"] = val.stats_as_dict["AP_large"]
|
|
501
|
+
# update fitness
|
|
502
|
+
stats["fitness"] = 0.9 * val.stats_as_dict["AP_all"] + 0.1 * val.stats_as_dict["AP_50"]
|
|
497
503
|
|
|
498
504
|
if self.is_lvis:
|
|
499
505
|
stats[f"metrics/APr({suffix[i][0]})"] = val.stats_as_dict["APr"]
|
ultralytics/models/yolo/model.py
CHANGED
|
@@ -399,7 +399,7 @@ class YOLOE(Model):
|
|
|
399
399
|
"batch": 1,
|
|
400
400
|
"device": kwargs.get("device", None),
|
|
401
401
|
"half": kwargs.get("half", False),
|
|
402
|
-
"imgsz": kwargs.get("imgsz", self.overrides
|
|
402
|
+
"imgsz": kwargs.get("imgsz", self.overrides.get("imgsz", 640)),
|
|
403
403
|
},
|
|
404
404
|
_callbacks=self.callbacks,
|
|
405
405
|
)
|
|
@@ -50,7 +50,7 @@ class OBBPredictor(DetectionPredictor):
|
|
|
50
50
|
(Results): The result object containing the original image, image path, class names, and oriented bounding
|
|
51
51
|
boxes.
|
|
52
52
|
"""
|
|
53
|
-
rboxes =
|
|
53
|
+
rboxes = torch.cat([pred[:, :4], pred[:, -1:]], dim=-1)
|
|
54
54
|
rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
|
|
55
55
|
obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
|
|
56
56
|
return Results(orig_img, path=img_path, names=self.model.names, obb=obb)
|
|
@@ -73,7 +73,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
|
|
|
73
73
|
|
|
74
74
|
def get_validator(self):
|
|
75
75
|
"""Return an instance of OBBValidator for validation of YOLO model."""
|
|
76
|
-
self.loss_names = "box_loss", "cls_loss", "dfl_loss"
|
|
76
|
+
self.loss_names = "box_loss", "cls_loss", "dfl_loss", "angle_loss"
|
|
77
77
|
return yolo.obb.OBBValidator(
|
|
78
78
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
79
79
|
)
|
|
@@ -90,7 +90,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
|
90
90
|
|
|
91
91
|
def get_validator(self):
|
|
92
92
|
"""Return an instance of the PoseValidator class for validation."""
|
|
93
|
-
self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
|
|
93
|
+
self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss", "rle_loss"
|
|
94
94
|
return yolo.pose.PoseValidator(
|
|
95
95
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
96
96
|
)
|
|
@@ -60,7 +60,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
|
60
60
|
>>> results = predictor.postprocess(preds, img, orig_img)
|
|
61
61
|
"""
|
|
62
62
|
# Extract protos - tuple if PyTorch model or array if exported
|
|
63
|
-
protos = preds[
|
|
63
|
+
protos = preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
|
|
64
64
|
return super().postprocess(preds[0], img, orig_imgs, protos=protos)
|
|
65
65
|
|
|
66
66
|
def construct_results(self, preds, img, orig_imgs, protos):
|
|
@@ -63,7 +63,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
|
63
63
|
|
|
64
64
|
def get_validator(self):
|
|
65
65
|
"""Return an instance of SegmentationValidator for validation of YOLO model."""
|
|
66
|
-
self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss"
|
|
66
|
+
self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss", "sem_loss"
|
|
67
67
|
return yolo.segment.SegmentationValidator(
|
|
68
68
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
69
69
|
)
|
|
@@ -99,7 +99,9 @@ class SegmentationValidator(DetectionValidator):
|
|
|
99
99
|
Returns:
|
|
100
100
|
list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
|
|
101
101
|
"""
|
|
102
|
-
proto =
|
|
102
|
+
proto = (
|
|
103
|
+
preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
|
|
104
|
+
) # second output is len 3 if pt, but only 1 if exported
|
|
103
105
|
preds = super().postprocess(preds[0])
|
|
104
106
|
imgsz = [4 * x for x in proto.shape[2:]] # get image size from proto
|
|
105
107
|
for i, pred in enumerate(preds):
|
|
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
|
|
|
147
147
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
148
148
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
149
149
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
150
|
-
|
|
150
|
+
|
|
151
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
152
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
153
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
154
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
155
|
+
|
|
151
156
|
model.train()
|
|
152
157
|
|
|
153
158
|
return model
|
|
@@ -104,7 +104,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
|
|
|
104
104
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
105
105
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
106
106
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
107
|
-
|
|
107
|
+
|
|
108
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
109
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
110
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
111
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
112
|
+
|
|
108
113
|
model.train()
|
|
109
114
|
|
|
110
115
|
return model
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -221,6 +221,7 @@ class AutoBackend(nn.Module):
|
|
|
221
221
|
for p in model.parameters():
|
|
222
222
|
p.requires_grad = False
|
|
223
223
|
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
|
224
|
+
end2end = getattr(model, "end2end", False)
|
|
224
225
|
|
|
225
226
|
# TorchScript
|
|
226
227
|
elif jit:
|
|
@@ -545,8 +546,7 @@ class AutoBackend(nn.Module):
|
|
|
545
546
|
# NCNN
|
|
546
547
|
elif ncnn:
|
|
547
548
|
LOGGER.info(f"Loading {w} for NCNN inference...")
|
|
548
|
-
|
|
549
|
-
check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
|
|
549
|
+
check_requirements("ncnn", cmds="--no-deps")
|
|
550
550
|
import ncnn as pyncnn
|
|
551
551
|
|
|
552
552
|
net = pyncnn.Net()
|
|
@@ -657,7 +657,7 @@ class AutoBackend(nn.Module):
|
|
|
657
657
|
names = metadata["names"]
|
|
658
658
|
kpt_shape = metadata.get("kpt_shape")
|
|
659
659
|
kpt_names = metadata.get("kpt_names")
|
|
660
|
-
end2end = metadata.get("args", {}).get("nms", False)
|
|
660
|
+
end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
|
|
661
661
|
dynamic = metadata.get("args", {}).get("dynamic", dynamic)
|
|
662
662
|
ch = metadata.get("channels", 3)
|
|
663
663
|
elif not (pt or triton or nn_module):
|
|
@@ -78,15 +78,19 @@ from .conv import (
|
|
|
78
78
|
)
|
|
79
79
|
from .head import (
|
|
80
80
|
OBB,
|
|
81
|
+
OBB26,
|
|
81
82
|
Classify,
|
|
82
83
|
Detect,
|
|
83
84
|
LRPCHead,
|
|
84
85
|
Pose,
|
|
86
|
+
Pose26,
|
|
85
87
|
RTDETRDecoder,
|
|
86
88
|
Segment,
|
|
89
|
+
Segment26,
|
|
87
90
|
WorldDetect,
|
|
88
91
|
YOLOEDetect,
|
|
89
92
|
YOLOESegment,
|
|
93
|
+
YOLOESegment26,
|
|
90
94
|
v10Detect,
|
|
91
95
|
)
|
|
92
96
|
from .transformer import (
|
|
@@ -115,6 +119,7 @@ __all__ = (
|
|
|
115
119
|
"ELAN1",
|
|
116
120
|
"MLP",
|
|
117
121
|
"OBB",
|
|
122
|
+
"OBB26",
|
|
118
123
|
"PSA",
|
|
119
124
|
"SPP",
|
|
120
125
|
"SPPELAN",
|
|
@@ -161,6 +166,7 @@ __all__ = (
|
|
|
161
166
|
"MSDeformAttn",
|
|
162
167
|
"MaxSigmoidAttnBlock",
|
|
163
168
|
"Pose",
|
|
169
|
+
"Pose26",
|
|
164
170
|
"Proto",
|
|
165
171
|
"RTDETRDecoder",
|
|
166
172
|
"RepC3",
|
|
@@ -170,6 +176,7 @@ __all__ = (
|
|
|
170
176
|
"ResNetLayer",
|
|
171
177
|
"SCDown",
|
|
172
178
|
"Segment",
|
|
179
|
+
"Segment26",
|
|
173
180
|
"SpatialAttention",
|
|
174
181
|
"TorchVision",
|
|
175
182
|
"TransformerBlock",
|
|
@@ -178,5 +185,6 @@ __all__ = (
|
|
|
178
185
|
"WorldDetect",
|
|
179
186
|
"YOLOEDetect",
|
|
180
187
|
"YOLOESegment",
|
|
188
|
+
"YOLOESegment26",
|
|
181
189
|
"v10Detect",
|
|
182
190
|
)
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -208,28 +208,33 @@ class SPP(nn.Module):
|
|
|
208
208
|
class SPPF(nn.Module):
|
|
209
209
|
"""Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
|
|
210
210
|
|
|
211
|
-
def __init__(self, c1: int, c2: int, k: int = 5):
|
|
211
|
+
def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
|
|
212
212
|
"""Initialize the SPPF layer with given input/output channels and kernel size.
|
|
213
213
|
|
|
214
214
|
Args:
|
|
215
215
|
c1 (int): Input channels.
|
|
216
216
|
c2 (int): Output channels.
|
|
217
217
|
k (int): Kernel size.
|
|
218
|
+
n (int): Number of pooling iterations.
|
|
219
|
+
shortcut (bool): Whether to use shortcut connection.
|
|
218
220
|
|
|
219
221
|
Notes:
|
|
220
222
|
This module is equivalent to SPP(k=(5, 9, 13)).
|
|
221
223
|
"""
|
|
222
224
|
super().__init__()
|
|
223
225
|
c_ = c1 // 2 # hidden channels
|
|
224
|
-
self.cv1 = Conv(c1, c_, 1, 1)
|
|
225
|
-
self.cv2 = Conv(c_ *
|
|
226
|
+
self.cv1 = Conv(c1, c_, 1, 1, act=False)
|
|
227
|
+
self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
|
|
226
228
|
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
|
229
|
+
self.n = n
|
|
230
|
+
self.add = shortcut and c1 == c2
|
|
227
231
|
|
|
228
232
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
229
233
|
"""Apply sequential pooling operations to input and return concatenated feature maps."""
|
|
230
234
|
y = [self.cv1(x)]
|
|
231
|
-
y.extend(self.m(y[-1]) for _ in range(3))
|
|
232
|
-
|
|
235
|
+
y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
|
|
236
|
+
y = self.cv2(torch.cat(y, 1))
|
|
237
|
+
return y + x if getattr(self, "add", False) else y
|
|
233
238
|
|
|
234
239
|
|
|
235
240
|
class C1(nn.Module):
|
|
@@ -1065,7 +1070,15 @@ class C3k2(C2f):
|
|
|
1065
1070
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
1066
1071
|
|
|
1067
1072
|
def __init__(
|
|
1068
|
-
self,
|
|
1073
|
+
self,
|
|
1074
|
+
c1: int,
|
|
1075
|
+
c2: int,
|
|
1076
|
+
n: int = 1,
|
|
1077
|
+
c3k: bool = False,
|
|
1078
|
+
e: float = 0.5,
|
|
1079
|
+
attn: bool = False,
|
|
1080
|
+
g: int = 1,
|
|
1081
|
+
shortcut: bool = True,
|
|
1069
1082
|
):
|
|
1070
1083
|
"""Initialize C3k2 module.
|
|
1071
1084
|
|
|
@@ -1075,12 +1088,21 @@ class C3k2(C2f):
|
|
|
1075
1088
|
n (int): Number of blocks.
|
|
1076
1089
|
c3k (bool): Whether to use C3k blocks.
|
|
1077
1090
|
e (float): Expansion ratio.
|
|
1091
|
+
attn (bool): Whether to use attention blocks.
|
|
1078
1092
|
g (int): Groups for convolutions.
|
|
1079
1093
|
shortcut (bool): Whether to use shortcut connections.
|
|
1080
1094
|
"""
|
|
1081
1095
|
super().__init__(c1, c2, n, shortcut, g, e)
|
|
1082
1096
|
self.m = nn.ModuleList(
|
|
1083
|
-
|
|
1097
|
+
nn.Sequential(
|
|
1098
|
+
Bottleneck(self.c, self.c, shortcut, g),
|
|
1099
|
+
PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
|
|
1100
|
+
)
|
|
1101
|
+
if attn
|
|
1102
|
+
else C3k(self.c, self.c, 2, shortcut, g)
|
|
1103
|
+
if c3k
|
|
1104
|
+
else Bottleneck(self.c, self.c, shortcut, g)
|
|
1105
|
+
for _ in range(n)
|
|
1084
1106
|
)
|
|
1085
1107
|
|
|
1086
1108
|
|
|
@@ -1148,6 +1170,8 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1148
1170
|
|
|
1149
1171
|
This method fuses the convolutional layers and updates the weights and biases accordingly.
|
|
1150
1172
|
"""
|
|
1173
|
+
if not hasattr(self, "conv1"):
|
|
1174
|
+
return # already fused
|
|
1151
1175
|
conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
|
|
1152
1176
|
conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
|
|
1153
1177
|
|
|
@@ -1391,7 +1415,7 @@ class PSA(nn.Module):
|
|
|
1391
1415
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
1392
1416
|
self.cv2 = Conv(2 * self.c, c1, 1)
|
|
1393
1417
|
|
|
1394
|
-
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
|
|
1418
|
+
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
|
|
1395
1419
|
self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
|
|
1396
1420
|
|
|
1397
1421
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
@@ -1945,3 +1969,99 @@ class SAVPE(nn.Module):
|
|
|
1945
1969
|
aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
|
|
1946
1970
|
|
|
1947
1971
|
return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
|
|
1972
|
+
|
|
1973
|
+
|
|
1974
|
+
class Proto26(Proto):
|
|
1975
|
+
"""Ultralytics YOLO26 models mask Proto module for segmentation models."""
|
|
1976
|
+
|
|
1977
|
+
def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
|
|
1978
|
+
"""Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
|
|
1979
|
+
|
|
1980
|
+
Args:
|
|
1981
|
+
ch (tuple): Tuple of channel sizes from backbone feature maps.
|
|
1982
|
+
c_ (int): Intermediate channels.
|
|
1983
|
+
c2 (int): Output channels (number of protos).
|
|
1984
|
+
nc (int): Number of classes for semantic segmentation.
|
|
1985
|
+
"""
|
|
1986
|
+
super().__init__(c_, c_, c2)
|
|
1987
|
+
self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
|
|
1988
|
+
self.feat_fuse = Conv(ch[0], c_, k=3)
|
|
1989
|
+
self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
|
|
1990
|
+
|
|
1991
|
+
def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
|
|
1992
|
+
"""Perform a forward pass through layers using an upsampled input image."""
|
|
1993
|
+
feat = x[0]
|
|
1994
|
+
for i, f in enumerate(self.feat_refine):
|
|
1995
|
+
up_feat = f(x[i + 1])
|
|
1996
|
+
up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
|
|
1997
|
+
feat = feat + up_feat
|
|
1998
|
+
p = super().forward(self.feat_fuse(feat))
|
|
1999
|
+
if self.training and return_semseg:
|
|
2000
|
+
semseg = self.semseg(feat)
|
|
2001
|
+
return (p, semseg)
|
|
2002
|
+
return p
|
|
2003
|
+
|
|
2004
|
+
def fuse(self):
|
|
2005
|
+
"""Fuse the model for inference by removing the semantic segmentation head."""
|
|
2006
|
+
self.semseg = None
|
|
2007
|
+
|
|
2008
|
+
|
|
2009
|
+
class RealNVP(nn.Module):
|
|
2010
|
+
"""RealNVP: a flow-based generative model.
|
|
2011
|
+
|
|
2012
|
+
References:
|
|
2013
|
+
https://arxiv.org/abs/1605.08803
|
|
2014
|
+
https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
|
|
2015
|
+
"""
|
|
2016
|
+
|
|
2017
|
+
@staticmethod
|
|
2018
|
+
def nets():
|
|
2019
|
+
"""Get the scale model in a single invertable mapping."""
|
|
2020
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
|
|
2021
|
+
|
|
2022
|
+
@staticmethod
|
|
2023
|
+
def nett():
|
|
2024
|
+
"""Get the translation model in a single invertable mapping."""
|
|
2025
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
|
|
2026
|
+
|
|
2027
|
+
@property
|
|
2028
|
+
def prior(self):
|
|
2029
|
+
"""The prior distribution."""
|
|
2030
|
+
return torch.distributions.MultivariateNormal(self.loc, self.cov)
|
|
2031
|
+
|
|
2032
|
+
def __init__(self):
|
|
2033
|
+
super().__init__()
|
|
2034
|
+
|
|
2035
|
+
self.register_buffer("loc", torch.zeros(2))
|
|
2036
|
+
self.register_buffer("cov", torch.eye(2))
|
|
2037
|
+
self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
|
|
2038
|
+
|
|
2039
|
+
self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
|
|
2040
|
+
self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
|
|
2041
|
+
self.init_weights()
|
|
2042
|
+
|
|
2043
|
+
def init_weights(self):
|
|
2044
|
+
"""Initialization model weights."""
|
|
2045
|
+
for m in self.modules():
|
|
2046
|
+
if isinstance(m, nn.Linear):
|
|
2047
|
+
nn.init.xavier_uniform_(m.weight, gain=0.01)
|
|
2048
|
+
|
|
2049
|
+
def backward_p(self, x):
|
|
2050
|
+
"""Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
|
|
2051
|
+
matrix.
|
|
2052
|
+
"""
|
|
2053
|
+
log_det_jacob, z = x.new_zeros(x.shape[0]), x
|
|
2054
|
+
for i in reversed(range(len(self.t))):
|
|
2055
|
+
z_ = self.mask[i] * z
|
|
2056
|
+
s = self.s[i](z_) * (1 - self.mask[i])
|
|
2057
|
+
t = self.t[i](z_) * (1 - self.mask[i])
|
|
2058
|
+
z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
|
|
2059
|
+
log_det_jacob -= s.sum(dim=1)
|
|
2060
|
+
return z, log_det_jacob
|
|
2061
|
+
|
|
2062
|
+
def log_prob(self, x):
|
|
2063
|
+
"""Calculate the log probability of given sample in data space."""
|
|
2064
|
+
if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
|
|
2065
|
+
self.float()
|
|
2066
|
+
z, log_det = self.backward_p(x)
|
|
2067
|
+
return self.prior.log_prob(z) + log_det
|