ultralytics-opencv-headless 8.3.253__py3-none-any.whl → 8.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. tests/__init__.py +2 -2
  2. tests/conftest.py +1 -1
  3. tests/test_cuda.py +8 -2
  4. tests/test_engine.py +8 -8
  5. tests/test_exports.py +11 -4
  6. tests/test_integrations.py +9 -9
  7. tests/test_python.py +14 -14
  8. tests/test_solutions.py +3 -3
  9. ultralytics/__init__.py +1 -1
  10. ultralytics/cfg/__init__.py +7 -9
  11. ultralytics/cfg/default.yaml +3 -1
  12. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  13. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  14. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  15. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  16. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  17. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  18. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  19. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  20. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  21. ultralytics/data/augment.py +7 -0
  22. ultralytics/data/converter.py +49 -30
  23. ultralytics/data/dataset.py +1 -1
  24. ultralytics/engine/exporter.py +9 -4
  25. ultralytics/engine/model.py +1 -1
  26. ultralytics/engine/results.py +19 -10
  27. ultralytics/engine/trainer.py +48 -25
  28. ultralytics/engine/tuner.py +15 -7
  29. ultralytics/models/fastsam/predict.py +1 -1
  30. ultralytics/models/yolo/detect/train.py +3 -2
  31. ultralytics/models/yolo/detect/val.py +6 -0
  32. ultralytics/models/yolo/model.py +1 -1
  33. ultralytics/models/yolo/obb/predict.py +1 -1
  34. ultralytics/models/yolo/obb/train.py +1 -1
  35. ultralytics/models/yolo/pose/train.py +1 -1
  36. ultralytics/models/yolo/segment/predict.py +1 -1
  37. ultralytics/models/yolo/segment/train.py +1 -1
  38. ultralytics/models/yolo/segment/val.py +3 -1
  39. ultralytics/models/yolo/yoloe/train.py +6 -1
  40. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  41. ultralytics/nn/autobackend.py +3 -3
  42. ultralytics/nn/modules/__init__.py +8 -0
  43. ultralytics/nn/modules/block.py +128 -8
  44. ultralytics/nn/modules/head.py +789 -204
  45. ultralytics/nn/tasks.py +74 -29
  46. ultralytics/nn/text_model.py +5 -2
  47. ultralytics/optim/__init__.py +5 -0
  48. ultralytics/optim/muon.py +338 -0
  49. ultralytics/utils/callbacks/platform.py +9 -7
  50. ultralytics/utils/downloads.py +3 -1
  51. ultralytics/utils/export/engine.py +19 -10
  52. ultralytics/utils/export/imx.py +22 -11
  53. ultralytics/utils/export/tensorflow.py +21 -21
  54. ultralytics/utils/loss.py +587 -203
  55. ultralytics/utils/metrics.py +1 -0
  56. ultralytics/utils/ops.py +11 -2
  57. ultralytics/utils/tal.py +98 -19
  58. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/METADATA +31 -39
  59. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/RECORD +63 -52
  60. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/WHEEL +0 -0
  61. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/entry_points.txt +0 -0
  62. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/licenses/LICENSE +0 -0
  63. {ultralytics_opencv_headless-8.3.253.dist-info → ultralytics_opencv_headless-8.4.2.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,7 @@ from ultralytics import __version__
27
27
  from ultralytics.cfg import get_cfg, get_save_dir
28
28
  from ultralytics.data.utils import check_cls_dataset, check_det_dataset
29
29
  from ultralytics.nn.tasks import load_checkpoint
30
+ from ultralytics.optim import MuSGD
30
31
  from ultralytics.utils import (
31
32
  DEFAULT_CFG,
32
33
  GIT,
@@ -464,6 +465,9 @@ class BaseTrainer:
464
465
 
465
466
  self.run_callbacks("on_train_batch_end")
466
467
 
468
+ if hasattr(unwrap_model(self.model).criterion, "update"):
469
+ unwrap_model(self.model).criterion.update()
470
+
467
471
  self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
468
472
 
469
473
  self.run_callbacks("on_train_epoch_end")
@@ -628,21 +632,19 @@ class BaseTrainer:
628
632
  (dict): A dictionary containing the training/validation/test dataset and category names.
629
633
  """
630
634
  try:
631
- if self.args.task == "classify":
632
- data = check_cls_dataset(self.args.data)
633
- elif str(self.args.data).rsplit(".", 1)[-1] == "ndjson" or (
634
- str(self.args.data).startswith("ul://") and "/datasets/" in str(self.args.data)
635
- ):
636
- # Convert NDJSON to YOLO format (including ul:// platform dataset URIs)
635
+ # Convert ul:// platform URIs and NDJSON files to local dataset format first
636
+ data_str = str(self.args.data)
637
+ if data_str.endswith(".ndjson") or (data_str.startswith("ul://") and "/datasets/" in data_str):
637
638
  import asyncio
638
639
 
639
640
  from ultralytics.data.converter import convert_ndjson_to_yolo
640
641
  from ultralytics.utils.checks import check_file
641
642
 
642
- ndjson_file = check_file(self.args.data) # Resolve ul:// or URL to local .ndjson file
643
- yaml_path = asyncio.run(convert_ndjson_to_yolo(ndjson_file))
644
- self.args.data = str(yaml_path)
645
- data = check_det_dataset(self.args.data)
643
+ self.args.data = str(asyncio.run(convert_ndjson_to_yolo(check_file(self.args.data))))
644
+
645
+ # Task-specific dataset checking
646
+ if self.args.task == "classify":
647
+ data = check_cls_dataset(self.args.data)
646
648
  elif str(self.args.data).rsplit(".", 1)[-1] in {"yaml", "yml"} or self.args.task in {
647
649
  "detect",
648
650
  "segment",
@@ -930,7 +932,7 @@ class BaseTrainer:
930
932
  Returns:
931
933
  (torch.optim.Optimizer): The constructed optimizer.
932
934
  """
933
- g = [], [], [] # optimizer parameter groups
935
+ g = [{}, {}, {}, {}] # optimizer parameter groups
934
936
  bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
935
937
  if name == "auto":
936
938
  LOGGER.info(
@@ -940,38 +942,59 @@ class BaseTrainer:
940
942
  )
941
943
  nc = self.data.get("nc", 10) # number of classes
942
944
  lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
943
- name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
945
+ name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("MuSGD", lr_fit, 0.9)
944
946
  self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
945
947
 
946
- for module_name, module in model.named_modules():
948
+ use_muon = name == "MuSGD"
949
+ for module_name, module in unwrap_model(model).named_modules():
947
950
  for param_name, param in module.named_parameters(recurse=False):
948
951
  fullname = f"{module_name}.{param_name}" if module_name else param_name
949
- if "bias" in fullname: # bias (no decay)
950
- g[2].append(param)
952
+ if param.ndim >= 2 and use_muon:
953
+ g[3][fullname] = param # muon params
954
+ elif "bias" in fullname: # bias (no decay)
955
+ g[2][fullname] = param
951
956
  elif isinstance(module, bn) or "logit_scale" in fullname: # weight (no decay)
952
957
  # ContrastiveHead and BNContrastiveHead included here with 'logit_scale'
953
- g[1].append(param)
958
+ g[1][fullname] = param
954
959
  else: # weight (with decay)
955
- g[0].append(param)
960
+ g[0][fullname] = param
961
+ if not use_muon:
962
+ g = [x.values() for x in g[:3]] # convert to list of params
956
963
 
957
- optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"}
964
+ optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "MuSGD", "auto"}
958
965
  name = {x.lower(): x for x in optimizers}.get(name.lower())
959
966
  if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}:
960
- optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
967
+ optim_args = dict(lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
961
968
  elif name == "RMSProp":
962
- optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum)
963
- elif name == "SGD":
964
- optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
969
+ optim_args = dict(lr=lr, momentum=momentum)
970
+ elif name == "SGD" or name == "MuSGD":
971
+ optim_args = dict(lr=lr, momentum=momentum, nesterov=True)
965
972
  else:
966
973
  raise NotImplementedError(
967
974
  f"Optimizer '{name}' not found in list of available optimizers {optimizers}. "
968
975
  "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
969
976
  )
970
977
 
971
- optimizer.add_param_group({"params": g[0], "weight_decay": decay}) # add g0 with weight_decay
972
- optimizer.add_param_group({"params": g[1], "weight_decay": 0.0}) # add g1 (BatchNorm2d weights)
978
+ g[2] = {"params": g[2], **optim_args}
979
+ g[0] = {"params": g[0], **optim_args, "weight_decay": decay}
980
+ g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0}
981
+ if name == "MuSGD":
982
+ g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True}
983
+ import re
984
+
985
+ # higher lr for certain parameters in MuSGD
986
+ pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
987
+ g_ = [] # new param groups
988
+ for x in g:
989
+ p = x.pop("params")
990
+ p1 = [v for k, v in p.items() if pattern.search(k)]
991
+ p2 = [v for k, v in p.items() if not pattern.search(k)]
992
+ g_.extend([{"params": p1, **x, "lr": lr * 3}, {"params": p2, **x}])
993
+ g = g_
994
+ optimizer = getattr(optim, name, MuSGD)(params=g)
995
+
973
996
  LOGGER.info(
974
997
  f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
975
- f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)"
998
+ f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
976
999
  )
977
1000
  return optimizer
@@ -90,15 +90,15 @@ class Tuner:
90
90
  """
91
91
  self.space = args.pop("space", None) or { # key: (min, max, gain(optional))
92
92
  # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
93
- "lr0": (1e-5, 1e-1), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
94
- "lrf": (0.0001, 0.1), # final OneCycleLR learning rate (lr0 * lrf)
93
+ "lr0": (1e-5, 1e-2), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
94
+ "lrf": (0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
95
95
  "momentum": (0.7, 0.98, 0.3), # SGD momentum/Adam beta1
96
96
  "weight_decay": (0.0, 0.001), # optimizer weight decay 5e-4
97
97
  "warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
98
98
  "warmup_momentum": (0.0, 0.95), # warmup initial momentum
99
99
  "box": (1.0, 20.0), # box loss gain
100
100
  "cls": (0.1, 4.0), # cls loss gain (scale with pixels)
101
- "dfl": (0.4, 6.0), # dfl loss gain
101
+ "dfl": (0.4, 12.0), # dfl loss gain
102
102
  "hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
103
103
  "hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
104
104
  "hsv_v": (0.0, 0.9), # image HSV-Value augmentation (fraction)
@@ -254,7 +254,7 @@ class Tuner:
254
254
  f.write(headers)
255
255
  for result in all_results:
256
256
  fitness = result["fitness"]
257
- hyp_values = [result["hyperparameters"][k] for k in self.space.keys()]
257
+ hyp_values = [result["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
258
258
  log_row = [round(fitness, 5), *hyp_values]
259
259
  f.write(",".join(map(str, log_row)) + "\n")
260
260
 
@@ -273,6 +273,8 @@ class Tuner:
273
273
  parents_mat = np.stack([x[i][1:] for i in idxs], 0) # (k, ng) strip fitness
274
274
  lo, hi = parents_mat.min(0), parents_mat.max(0)
275
275
  span = hi - lo
276
+ # given a small value when span is zero to avoid no mutation
277
+ span = np.where(span == 0, np.random.uniform(0.01, 0.1, span.shape), span)
276
278
  return np.random.uniform(lo - alpha * span, hi + alpha * span)
277
279
 
278
280
  def _mutate(
@@ -297,7 +299,12 @@ class Tuner:
297
299
  if self.mongodb:
298
300
  if results := self._get_mongodb_results(n):
299
301
  # MongoDB already sorted by fitness DESC, so results[0] is best
300
- x = np.array([[r["fitness"]] + [r["hyperparameters"][k] for k in self.space.keys()] for r in results])
302
+ x = np.array(
303
+ [
304
+ [r["fitness"]] + [r["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
305
+ for r in results
306
+ ]
307
+ )
301
308
  elif self.collection.name in self.collection.database.list_collection_names(): # Tuner started elsewhere
302
309
  x = np.array([[0.0] + [getattr(self.args, k) for k in self.space.keys()]])
303
310
 
@@ -335,10 +342,12 @@ class Tuner:
335
342
  # Update types
336
343
  if "close_mosaic" in hyp:
337
344
  hyp["close_mosaic"] = round(hyp["close_mosaic"])
345
+ if "epochs" in hyp:
346
+ hyp["epochs"] = round(hyp["epochs"])
338
347
 
339
348
  return hyp
340
349
 
341
- def __call__(self, model=None, iterations: int = 10, cleanup: bool = True):
350
+ def __call__(self, iterations: int = 10, cleanup: bool = True):
342
351
  """Execute the hyperparameter evolution process when the Tuner instance is called.
343
352
 
344
353
  This method iterates through the specified number of iterations, performing the following steps:
@@ -349,7 +358,6 @@ class Tuner:
349
358
  5. Track the best performing configuration across all iterations
350
359
 
351
360
  Args:
352
- model (Model | None, optional): A pre-initialized YOLO model to be used for training.
353
361
  iterations (int): The number of generations to run the evolution for.
354
362
  cleanup (bool): Whether to delete iteration weights to reduce storage space during tuning.
355
363
  """
@@ -63,7 +63,7 @@ class FastSAMPredictor(SegmentationPredictor):
63
63
  results = super().postprocess(preds, img, orig_imgs)
64
64
  for result in results:
65
65
  full_box = torch.tensor(
66
- [0, 0, result.orig_shape[1], result.orig_shape[0]], device=preds[0].device, dtype=torch.float32
66
+ [0, 0, result.orig_shape[1], result.orig_shape[0]], device=result.boxes.data.device, dtype=torch.float32
67
67
  )
68
68
  boxes = adjust_bboxes_to_image_border(result.boxes.xyxy, result.orig_shape)
69
69
  idx = torch.nonzero(box_iou(full_box[None], boxes) > 0.9).flatten()
@@ -117,10 +117,11 @@ class DetectionTrainer(BaseTrainer):
117
117
  if isinstance(v, torch.Tensor):
118
118
  batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
119
119
  batch["img"] = batch["img"].float() / 255
120
- if self.args.multi_scale:
120
+ multi_scale = self.args.multi_scale
121
+ if random.random() < multi_scale:
121
122
  imgs = batch["img"]
122
123
  sz = (
123
- random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1.5 + self.stride))
124
+ random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
124
125
  // self.stride
125
126
  * self.stride
126
127
  ) # size
@@ -494,6 +494,12 @@ class DetectionValidator(BaseValidator):
494
494
  # update mAP50-95 and mAP50
495
495
  stats[f"metrics/mAP50({suffix[i][0]})"] = val.stats_as_dict["AP_50"]
496
496
  stats[f"metrics/mAP50-95({suffix[i][0]})"] = val.stats_as_dict["AP_all"]
497
+ # record mAP for small, medium, large objects as well
498
+ stats["metrics/mAP_small(B)"] = val.stats_as_dict["AP_small"]
499
+ stats["metrics/mAP_medium(B)"] = val.stats_as_dict["AP_medium"]
500
+ stats["metrics/mAP_large(B)"] = val.stats_as_dict["AP_large"]
501
+ # update fitness
502
+ stats["fitness"] = 0.9 * val.stats_as_dict["AP_all"] + 0.1 * val.stats_as_dict["AP_50"]
497
503
 
498
504
  if self.is_lvis:
499
505
  stats[f"metrics/APr({suffix[i][0]})"] = val.stats_as_dict["APr"]
@@ -399,7 +399,7 @@ class YOLOE(Model):
399
399
  "batch": 1,
400
400
  "device": kwargs.get("device", None),
401
401
  "half": kwargs.get("half", False),
402
- "imgsz": kwargs.get("imgsz", self.overrides["imgsz"]),
402
+ "imgsz": kwargs.get("imgsz", self.overrides.get("imgsz", 640)),
403
403
  },
404
404
  _callbacks=self.callbacks,
405
405
  )
@@ -50,7 +50,7 @@ class OBBPredictor(DetectionPredictor):
50
50
  (Results): The result object containing the original image, image path, class names, and oriented bounding
51
51
  boxes.
52
52
  """
53
- rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
53
+ rboxes = torch.cat([pred[:, :4], pred[:, -1:]], dim=-1)
54
54
  rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
55
55
  obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
56
56
  return Results(orig_img, path=img_path, names=self.model.names, obb=obb)
@@ -73,7 +73,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
73
73
 
74
74
  def get_validator(self):
75
75
  """Return an instance of OBBValidator for validation of YOLO model."""
76
- self.loss_names = "box_loss", "cls_loss", "dfl_loss"
76
+ self.loss_names = "box_loss", "cls_loss", "dfl_loss", "angle_loss"
77
77
  return yolo.obb.OBBValidator(
78
78
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
79
79
  )
@@ -90,7 +90,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
90
90
 
91
91
  def get_validator(self):
92
92
  """Return an instance of the PoseValidator class for validation."""
93
- self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
93
+ self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss", "rle_loss"
94
94
  return yolo.pose.PoseValidator(
95
95
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
96
96
  )
@@ -60,7 +60,7 @@ class SegmentationPredictor(DetectionPredictor):
60
60
  >>> results = predictor.postprocess(preds, img, orig_img)
61
61
  """
62
62
  # Extract protos - tuple if PyTorch model or array if exported
63
- protos = preds[1][-1] if isinstance(preds[1], tuple) else preds[1]
63
+ protos = preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
64
64
  return super().postprocess(preds[0], img, orig_imgs, protos=protos)
65
65
 
66
66
  def construct_results(self, preds, img, orig_imgs, protos):
@@ -63,7 +63,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
63
63
 
64
64
  def get_validator(self):
65
65
  """Return an instance of SegmentationValidator for validation of YOLO model."""
66
- self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss"
66
+ self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss", "sem_loss"
67
67
  return yolo.segment.SegmentationValidator(
68
68
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
69
69
  )
@@ -99,7 +99,9 @@ class SegmentationValidator(DetectionValidator):
99
99
  Returns:
100
100
  list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
101
101
  """
102
- proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
102
+ proto = (
103
+ preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
104
+ ) # second output is len 3 if pt, but only 1 if exported
103
105
  preds = super().postprocess(preds[0])
104
106
  imgsz = [4 * x for x in proto.shape[2:]] # get image size from proto
105
107
  for i, pred in enumerate(preds):
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
147
147
  model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
148
148
  model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
149
149
  model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
150
- del model.pe
150
+
151
+ if getattr(model.model[-1], "one2one_cv3", None) is not None:
152
+ model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
153
+ model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
154
+ model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
155
+
151
156
  model.train()
152
157
 
153
158
  return model
@@ -104,7 +104,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
104
104
  model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
105
105
  model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
106
106
  model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
107
- del model.pe
107
+
108
+ if getattr(model.model[-1], "one2one_cv3", None) is not None:
109
+ model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
110
+ model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
111
+ model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
112
+
108
113
  model.train()
109
114
 
110
115
  return model
@@ -221,6 +221,7 @@ class AutoBackend(nn.Module):
221
221
  for p in model.parameters():
222
222
  p.requires_grad = False
223
223
  self.model = model # explicitly assign for to(), cpu(), cuda(), half()
224
+ end2end = getattr(model, "end2end", False)
224
225
 
225
226
  # TorchScript
226
227
  elif jit:
@@ -545,8 +546,7 @@ class AutoBackend(nn.Module):
545
546
  # NCNN
546
547
  elif ncnn:
547
548
  LOGGER.info(f"Loading {w} for NCNN inference...")
548
- # use git source for ARM64 due to broken PyPI packages https://github.com/Tencent/ncnn/issues/6509
549
- check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
549
+ check_requirements("ncnn", cmds="--no-deps")
550
550
  import ncnn as pyncnn
551
551
 
552
552
  net = pyncnn.Net()
@@ -657,7 +657,7 @@ class AutoBackend(nn.Module):
657
657
  names = metadata["names"]
658
658
  kpt_shape = metadata.get("kpt_shape")
659
659
  kpt_names = metadata.get("kpt_names")
660
- end2end = metadata.get("args", {}).get("nms", False)
660
+ end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
661
661
  dynamic = metadata.get("args", {}).get("dynamic", dynamic)
662
662
  ch = metadata.get("channels", 3)
663
663
  elif not (pt or triton or nn_module):
@@ -78,15 +78,19 @@ from .conv import (
78
78
  )
79
79
  from .head import (
80
80
  OBB,
81
+ OBB26,
81
82
  Classify,
82
83
  Detect,
83
84
  LRPCHead,
84
85
  Pose,
86
+ Pose26,
85
87
  RTDETRDecoder,
86
88
  Segment,
89
+ Segment26,
87
90
  WorldDetect,
88
91
  YOLOEDetect,
89
92
  YOLOESegment,
93
+ YOLOESegment26,
90
94
  v10Detect,
91
95
  )
92
96
  from .transformer import (
@@ -115,6 +119,7 @@ __all__ = (
115
119
  "ELAN1",
116
120
  "MLP",
117
121
  "OBB",
122
+ "OBB26",
118
123
  "PSA",
119
124
  "SPP",
120
125
  "SPPELAN",
@@ -161,6 +166,7 @@ __all__ = (
161
166
  "MSDeformAttn",
162
167
  "MaxSigmoidAttnBlock",
163
168
  "Pose",
169
+ "Pose26",
164
170
  "Proto",
165
171
  "RTDETRDecoder",
166
172
  "RepC3",
@@ -170,6 +176,7 @@ __all__ = (
170
176
  "ResNetLayer",
171
177
  "SCDown",
172
178
  "Segment",
179
+ "Segment26",
173
180
  "SpatialAttention",
174
181
  "TorchVision",
175
182
  "TransformerBlock",
@@ -178,5 +185,6 @@ __all__ = (
178
185
  "WorldDetect",
179
186
  "YOLOEDetect",
180
187
  "YOLOESegment",
188
+ "YOLOESegment26",
181
189
  "v10Detect",
182
190
  )
@@ -208,28 +208,33 @@ class SPP(nn.Module):
208
208
  class SPPF(nn.Module):
209
209
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
210
210
 
211
- def __init__(self, c1: int, c2: int, k: int = 5):
211
+ def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
212
212
  """Initialize the SPPF layer with given input/output channels and kernel size.
213
213
 
214
214
  Args:
215
215
  c1 (int): Input channels.
216
216
  c2 (int): Output channels.
217
217
  k (int): Kernel size.
218
+ n (int): Number of pooling iterations.
219
+ shortcut (bool): Whether to use shortcut connection.
218
220
 
219
221
  Notes:
220
222
  This module is equivalent to SPP(k=(5, 9, 13)).
221
223
  """
222
224
  super().__init__()
223
225
  c_ = c1 // 2 # hidden channels
224
- self.cv1 = Conv(c1, c_, 1, 1)
225
- self.cv2 = Conv(c_ * 4, c2, 1, 1)
226
+ self.cv1 = Conv(c1, c_, 1, 1, act=False)
227
+ self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
226
228
  self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
229
+ self.n = n
230
+ self.add = shortcut and c1 == c2
227
231
 
228
232
  def forward(self, x: torch.Tensor) -> torch.Tensor:
229
233
  """Apply sequential pooling operations to input and return concatenated feature maps."""
230
234
  y = [self.cv1(x)]
231
- y.extend(self.m(y[-1]) for _ in range(3))
232
- return self.cv2(torch.cat(y, 1))
235
+ y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
236
+ y = self.cv2(torch.cat(y, 1))
237
+ return y + x if getattr(self, "add", False) else y
233
238
 
234
239
 
235
240
  class C1(nn.Module):
@@ -1065,7 +1070,15 @@ class C3k2(C2f):
1065
1070
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1066
1071
 
1067
1072
  def __init__(
1068
- self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
1073
+ self,
1074
+ c1: int,
1075
+ c2: int,
1076
+ n: int = 1,
1077
+ c3k: bool = False,
1078
+ e: float = 0.5,
1079
+ attn: bool = False,
1080
+ g: int = 1,
1081
+ shortcut: bool = True,
1069
1082
  ):
1070
1083
  """Initialize C3k2 module.
1071
1084
 
@@ -1075,12 +1088,21 @@ class C3k2(C2f):
1075
1088
  n (int): Number of blocks.
1076
1089
  c3k (bool): Whether to use C3k blocks.
1077
1090
  e (float): Expansion ratio.
1091
+ attn (bool): Whether to use attention blocks.
1078
1092
  g (int): Groups for convolutions.
1079
1093
  shortcut (bool): Whether to use shortcut connections.
1080
1094
  """
1081
1095
  super().__init__(c1, c2, n, shortcut, g, e)
1082
1096
  self.m = nn.ModuleList(
1083
- C3k(self.c, self.c, 2, shortcut, g) if c3k else Bottleneck(self.c, self.c, shortcut, g) for _ in range(n)
1097
+ nn.Sequential(
1098
+ Bottleneck(self.c, self.c, shortcut, g),
1099
+ PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
1100
+ )
1101
+ if attn
1102
+ else C3k(self.c, self.c, 2, shortcut, g)
1103
+ if c3k
1104
+ else Bottleneck(self.c, self.c, shortcut, g)
1105
+ for _ in range(n)
1084
1106
  )
1085
1107
 
1086
1108
 
@@ -1148,6 +1170,8 @@ class RepVGGDW(torch.nn.Module):
1148
1170
 
1149
1171
  This method fuses the convolutional layers and updates the weights and biases accordingly.
1150
1172
  """
1173
+ if not hasattr(self, "conv1"):
1174
+ return # already fused
1151
1175
  conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
1152
1176
  conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
1153
1177
 
@@ -1391,7 +1415,7 @@ class PSA(nn.Module):
1391
1415
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
1392
1416
  self.cv2 = Conv(2 * self.c, c1, 1)
1393
1417
 
1394
- self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
1418
+ self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
1395
1419
  self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
1396
1420
 
1397
1421
  def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -1945,3 +1969,99 @@ class SAVPE(nn.Module):
1945
1969
  aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
1946
1970
 
1947
1971
  return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
1972
+
1973
+
1974
+ class Proto26(Proto):
1975
+ """Ultralytics YOLO26 models mask Proto module for segmentation models."""
1976
+
1977
+ def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
1978
+ """Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
1979
+
1980
+ Args:
1981
+ ch (tuple): Tuple of channel sizes from backbone feature maps.
1982
+ c_ (int): Intermediate channels.
1983
+ c2 (int): Output channels (number of protos).
1984
+ nc (int): Number of classes for semantic segmentation.
1985
+ """
1986
+ super().__init__(c_, c_, c2)
1987
+ self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
1988
+ self.feat_fuse = Conv(ch[0], c_, k=3)
1989
+ self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
1990
+
1991
+ def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
1992
+ """Perform a forward pass through layers using an upsampled input image."""
1993
+ feat = x[0]
1994
+ for i, f in enumerate(self.feat_refine):
1995
+ up_feat = f(x[i + 1])
1996
+ up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
1997
+ feat = feat + up_feat
1998
+ p = super().forward(self.feat_fuse(feat))
1999
+ if self.training and return_semseg:
2000
+ semseg = self.semseg(feat)
2001
+ return (p, semseg)
2002
+ return p
2003
+
2004
+ def fuse(self):
2005
+ """Fuse the model for inference by removing the semantic segmentation head."""
2006
+ self.semseg = None
2007
+
2008
+
2009
+ class RealNVP(nn.Module):
2010
+ """RealNVP: a flow-based generative model.
2011
+
2012
+ References:
2013
+ https://arxiv.org/abs/1605.08803
2014
+ https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
2015
+ """
2016
+
2017
+ @staticmethod
2018
+ def nets():
2019
+ """Get the scale model in a single invertable mapping."""
2020
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
2021
+
2022
+ @staticmethod
2023
+ def nett():
2024
+ """Get the translation model in a single invertable mapping."""
2025
+ return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
2026
+
2027
+ @property
2028
+ def prior(self):
2029
+ """The prior distribution."""
2030
+ return torch.distributions.MultivariateNormal(self.loc, self.cov)
2031
+
2032
+ def __init__(self):
2033
+ super().__init__()
2034
+
2035
+ self.register_buffer("loc", torch.zeros(2))
2036
+ self.register_buffer("cov", torch.eye(2))
2037
+ self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
2038
+
2039
+ self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
2040
+ self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
2041
+ self.init_weights()
2042
+
2043
+ def init_weights(self):
2044
+ """Initialization model weights."""
2045
+ for m in self.modules():
2046
+ if isinstance(m, nn.Linear):
2047
+ nn.init.xavier_uniform_(m.weight, gain=0.01)
2048
+
2049
+ def backward_p(self, x):
2050
+ """Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
2051
+ matrix.
2052
+ """
2053
+ log_det_jacob, z = x.new_zeros(x.shape[0]), x
2054
+ for i in reversed(range(len(self.t))):
2055
+ z_ = self.mask[i] * z
2056
+ s = self.s[i](z_) * (1 - self.mask[i])
2057
+ t = self.t[i](z_) * (1 - self.mask[i])
2058
+ z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
2059
+ log_det_jacob -= s.sum(dim=1)
2060
+ return z, log_det_jacob
2061
+
2062
+ def log_prob(self, x):
2063
+ """Calculate the log probability of given sample in data space."""
2064
+ if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
2065
+ self.float()
2066
+ z, log_det = self.backward_p(x)
2067
+ return self.prior.log_prob(z) + log_det