ultralytics-opencv-headless 8.4.4__py3-none-any.whl → 8.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. tests/test_cli.py +10 -3
  2. tests/test_exports.py +64 -43
  3. tests/test_python.py +40 -11
  4. ultralytics/__init__.py +1 -1
  5. ultralytics/cfg/__init__.py +5 -4
  6. ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
  7. ultralytics/cfg/default.yaml +2 -1
  8. ultralytics/data/augment.py +8 -0
  9. ultralytics/data/converter.py +32 -9
  10. ultralytics/data/utils.py +2 -2
  11. ultralytics/engine/exporter.py +10 -6
  12. ultralytics/engine/predictor.py +5 -0
  13. ultralytics/engine/results.py +8 -3
  14. ultralytics/engine/trainer.py +6 -4
  15. ultralytics/engine/tuner.py +2 -2
  16. ultralytics/engine/validator.py +5 -0
  17. ultralytics/models/sam/predict.py +2 -2
  18. ultralytics/models/yolo/classify/train.py +14 -1
  19. ultralytics/models/yolo/detect/train.py +8 -4
  20. ultralytics/models/yolo/pose/train.py +2 -1
  21. ultralytics/models/yolo/world/train_world.py +21 -1
  22. ultralytics/models/yolo/yoloe/train.py +1 -2
  23. ultralytics/nn/autobackend.py +1 -1
  24. ultralytics/nn/modules/head.py +13 -2
  25. ultralytics/nn/tasks.py +18 -0
  26. ultralytics/solutions/security_alarm.py +1 -1
  27. ultralytics/trackers/byte_tracker.py +7 -7
  28. ultralytics/utils/benchmarks.py +3 -9
  29. ultralytics/utils/callbacks/platform.py +2 -1
  30. ultralytics/utils/callbacks/wb.py +6 -1
  31. ultralytics/utils/dist.py +1 -0
  32. ultralytics/utils/loss.py +18 -9
  33. ultralytics/utils/patches.py +42 -0
  34. ultralytics/utils/tal.py +15 -5
  35. ultralytics/utils/torch_utils.py +1 -1
  36. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/METADATA +4 -5
  37. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/RECORD +41 -40
  38. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/WHEEL +1 -1
  39. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/entry_points.txt +0 -0
  40. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
  41. {ultralytics_opencv_headless-8.4.4.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/top_level.txt +0 -0
@@ -803,12 +803,17 @@ class Results(SimpleClass, DataExportMixin):
803
803
  "y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(),
804
804
  }
805
805
  if self.keypoints is not None:
806
- x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor
806
+ kpt = self.keypoints[i]
807
+ if kpt.has_visible:
808
+ x, y, visible = kpt.data[0].cpu().unbind(dim=1)
809
+ else:
810
+ x, y = kpt.data[0].cpu().unbind(dim=1)
807
811
  result["keypoints"] = {
808
- "x": (x / w).numpy().round(decimals).tolist(), # decimals named argument required
812
+ "x": (x / w).numpy().round(decimals).tolist(),
809
813
  "y": (y / h).numpy().round(decimals).tolist(),
810
- "visible": visible.numpy().round(decimals).tolist(),
811
814
  }
815
+ if kpt.has_visible:
816
+ result["keypoints"]["visible"] = visible.numpy().round(decimals).tolist()
812
817
  results.append(result)
813
818
 
814
819
  return results
@@ -948,7 +948,7 @@ class BaseTrainer:
948
948
  )
949
949
  nc = self.data.get("nc", 10) # number of classes
950
950
  lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
951
- name, lr, momentum = ("MuSGD", 0.01 if iterations > 10000 else lr_fit, 0.9)
951
+ name, lr, momentum = ("MuSGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
952
952
  self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
953
953
 
954
954
  use_muon = name == "MuSGD"
@@ -981,16 +981,18 @@ class BaseTrainer:
981
981
  "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
982
982
  )
983
983
 
984
+ num_params = [len(g[0]), len(g[1]), len(g[2])] # number of param groups
984
985
  g[2] = {"params": g[2], **optim_args, "param_group": "bias"}
985
986
  g[0] = {"params": g[0], **optim_args, "weight_decay": decay, "param_group": "weight"}
986
987
  g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0, "param_group": "bn"}
987
- muon, sgd = (0.1, 1.0) if iterations > 10000 else (0.5, 0.5) # scale factor for MuSGD
988
+ muon, sgd = (0.2, 1.0)
988
989
  if use_muon:
990
+ num_params[0] = len(g[3]) # update number of params
989
991
  g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True, "param_group": "muon"}
990
992
  import re
991
993
 
992
994
  # higher lr for certain parameters in MuSGD when funetuning
993
- pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
995
+ pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg")
994
996
  g_ = [] # new param groups
995
997
  for x in g:
996
998
  p = x.pop("params")
@@ -1002,6 +1004,6 @@ class BaseTrainer:
1002
1004
 
1003
1005
  LOGGER.info(
1004
1006
  f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
1005
- f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
1007
+ f"{num_params[1]} weight(decay=0.0), {num_params[0]} weight(decay={decay}), {num_params[2]} bias(decay=0.0)"
1006
1008
  )
1007
1009
  return optimizer
@@ -26,7 +26,7 @@ from datetime import datetime
26
26
  import numpy as np
27
27
  import torch
28
28
 
29
- from ultralytics.cfg import get_cfg, get_save_dir
29
+ from ultralytics.cfg import CFG_INT_KEYS, get_cfg, get_save_dir
30
30
  from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
31
31
  from ultralytics.utils.checks import check_requirements
32
32
  from ultralytics.utils.patches import torch_load
@@ -448,7 +448,7 @@ class Tuner:
448
448
  f"{self.prefix}Best fitness model is {best_save_dir}"
449
449
  )
450
450
  LOGGER.info("\n" + header)
451
- data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
451
+ data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
452
452
  YAML.save(
453
453
  self.tune_dir / "best_hyperparameters.yaml",
454
454
  data=data,
@@ -156,6 +156,11 @@ class BaseValidator:
156
156
  if str(self.args.model).endswith(".yaml") and model is None:
157
157
  LOGGER.warning("validating an untrained model YAML will result in 0 mAP.")
158
158
  callbacks.add_integration_callbacks(self)
159
+ if hasattr(model, "end2end"):
160
+ if self.args.end2end is not None:
161
+ model.end2end = self.args.end2end
162
+ if model.end2end:
163
+ model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
159
164
  model = AutoBackend(
160
165
  model=model or self.args.model,
161
166
  device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),
@@ -2619,6 +2619,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
2619
2619
  if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
2620
2620
  orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
2621
2621
 
2622
+ names = []
2622
2623
  if len(curr_obj_ids) == 0:
2623
2624
  pred_masks, pred_boxes = None, torch.zeros((0, 7), device=self.device)
2624
2625
  else:
@@ -2656,9 +2657,8 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
2656
2657
  background_value=0,
2657
2658
  ).squeeze(1)
2658
2659
  ) > 0
2660
+ names = self.model.names or dict(enumerate(str(i) for i in range(pred_boxes[:, 6].int().max())))
2659
2661
 
2660
- # names = getattr(self.model, "names", [str(i) for i in range(pred_scores.shape[0])])
2661
- names = dict(enumerate(str(i) for i in range(pred_boxes.shape[0])))
2662
2662
  results = []
2663
2663
  for masks, boxes, orig_img, img_path in zip([pred_masks], [pred_boxes], orig_imgs, self.batch[0]):
2664
2664
  results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=boxes))
@@ -11,7 +11,7 @@ from ultralytics.data import ClassificationDataset, build_dataloader
11
11
  from ultralytics.engine.trainer import BaseTrainer
12
12
  from ultralytics.models import yolo
13
13
  from ultralytics.nn.tasks import ClassificationModel
14
- from ultralytics.utils import DEFAULT_CFG, RANK
14
+ from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
15
15
  from ultralytics.utils.plotting import plot_images
16
16
  from ultralytics.utils.torch_utils import is_parallel, torch_distributed_zero_first
17
17
 
@@ -138,6 +138,19 @@ class ClassificationTrainer(BaseTrainer):
138
138
  with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
139
139
  dataset = self.build_dataset(dataset_path, mode)
140
140
 
141
+ # Filter out samples with class indices >= nc (prevents CUDA assertion errors)
142
+ nc = self.data.get("nc", 0)
143
+ dataset_nc = len(dataset.base.classes)
144
+ if nc and dataset_nc > nc:
145
+ extra_classes = dataset.base.classes[nc:]
146
+ original_count = len(dataset.samples)
147
+ dataset.samples = [s for s in dataset.samples if s[1] < nc]
148
+ skipped = original_count - len(dataset.samples)
149
+ LOGGER.warning(
150
+ f"{mode} split has {dataset_nc} classes but model expects {nc}. "
151
+ f"Skipping {skipped} samples from extra classes: {extra_classes}"
152
+ )
153
+
141
154
  loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
142
155
  # Attach inference transforms
143
156
  if mode != "train":
@@ -92,7 +92,7 @@ class DetectionTrainer(BaseTrainer):
92
92
  with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
93
93
  dataset = self.build_dataset(dataset_path, mode, batch_size)
94
94
  shuffle = mode == "train"
95
- if getattr(dataset, "rect", False) and shuffle:
95
+ if getattr(dataset, "rect", False) and shuffle and not np.all(dataset.batch_shapes == dataset.batch_shapes[0]):
96
96
  LOGGER.warning("'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
97
97
  shuffle = False
98
98
  return build_dataloader(
@@ -117,11 +117,13 @@ class DetectionTrainer(BaseTrainer):
117
117
  if isinstance(v, torch.Tensor):
118
118
  batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
119
119
  batch["img"] = batch["img"].float() / 255
120
- multi_scale = self.args.multi_scale
121
- if random.random() < multi_scale:
120
+ if self.args.multi_scale > 0.0:
122
121
  imgs = batch["img"]
123
122
  sz = (
124
- random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
123
+ random.randrange(
124
+ int(self.args.imgsz * (1.0 - self.args.multi_scale)),
125
+ int(self.args.imgsz * (1.0 + self.args.multi_scale) + self.stride),
126
+ )
125
127
  // self.stride
126
128
  * self.stride
127
129
  ) # size
@@ -143,6 +145,8 @@ class DetectionTrainer(BaseTrainer):
143
145
  self.model.nc = self.data["nc"] # attach number of classes to model
144
146
  self.model.names = self.data["names"] # attach class names to model
145
147
  self.model.args = self.args # attach hyperparameters to model
148
+ if getattr(self.model, "end2end"):
149
+ self.model.set_head_attr(max_det=self.args.max_det)
146
150
  # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
147
151
 
148
152
  def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True):
@@ -9,6 +9,7 @@ from typing import Any
9
9
  from ultralytics.models import yolo
10
10
  from ultralytics.nn.tasks import PoseModel
11
11
  from ultralytics.utils import DEFAULT_CFG
12
+ from ultralytics.utils.torch_utils import unwrap_model
12
13
 
13
14
 
14
15
  class PoseTrainer(yolo.detect.DetectionTrainer):
@@ -91,7 +92,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
91
92
  def get_validator(self):
92
93
  """Return an instance of the PoseValidator class for validation."""
93
94
  self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
94
- if getattr(self.model.model[-1], "flow_model", None) is not None:
95
+ if getattr(unwrap_model(self.model).model[-1], "flow_model", None) is not None:
95
96
  self.loss_names += ("rle_loss",)
96
97
  return yolo.pose.PoseValidator(
97
98
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
@@ -1,11 +1,14 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from pathlib import Path
4
6
 
5
7
  from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
6
8
  from ultralytics.data.utils import check_det_dataset
7
9
  from ultralytics.models.yolo.world import WorldTrainer
8
10
  from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
11
+ from ultralytics.utils.checks import check_file
9
12
  from ultralytics.utils.torch_utils import unwrap_model
10
13
 
11
14
 
@@ -100,6 +103,23 @@ class WorldTrainerFromScratch(WorldTrainer):
100
103
  self.set_text_embeddings(datasets, batch) # cache text embeddings to accelerate training
101
104
  return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
102
105
 
106
+ @staticmethod
107
+ def check_data_config(data: dict | str | Path) -> dict:
108
+ """Check and load the data configuration from a YAML file or dictionary.
109
+
110
+ Args:
111
+ data (dict | str | Path): Data configuration as a dictionary or path to a YAML file.
112
+
113
+ Returns:
114
+ (dict): Data configuration dictionary loaded from YAML file or passed directly.
115
+ """
116
+ # If string, load from YAML file
117
+ if not isinstance(data, dict):
118
+ from ultralytics.utils import YAML
119
+
120
+ return YAML.load(check_file(data))
121
+ return data
122
+
103
123
  def get_dataset(self):
104
124
  """Get train and validation paths from data dictionary.
105
125
 
@@ -114,7 +134,7 @@ class WorldTrainerFromScratch(WorldTrainer):
114
134
  AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
115
135
  """
116
136
  final_data = {}
117
- data_yaml = self.args.data
137
+ self.args.data = data_yaml = self.check_data_config(self.args.data)
118
138
  assert data_yaml.get("train", False), "train dataset not found" # object365.yaml
119
139
  assert data_yaml.get("val", False), "validation dataset not found" # lvis.yaml
120
140
  data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}
@@ -196,7 +196,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
196
196
  Returns:
197
197
  (dict): Dictionary mapping text samples to their embeddings.
198
198
  """
199
- model = "mobileclip:blt"
199
+ model = unwrap_model(self.model).text_model
200
200
  cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
201
201
  if cache_path.exists():
202
202
  LOGGER.info(f"Reading existed cache from '{cache_path}'")
@@ -204,7 +204,6 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
204
204
  if sorted(txt_map.keys()) == sorted(texts):
205
205
  return txt_map
206
206
  LOGGER.info(f"Caching text embeddings to '{cache_path}'")
207
- assert self.model is not None
208
207
  txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
209
208
  txt_map = dict(zip(texts, txt_feats.squeeze(0)))
210
209
  torch.save(txt_map, cache_path)
@@ -648,7 +648,7 @@ class AutoBackend(nn.Module):
648
648
  for k, v in metadata.items():
649
649
  if k in {"stride", "batch", "channels"}:
650
650
  metadata[k] = int(v)
651
- elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args"} and isinstance(v, str):
651
+ elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args", "end2end"} and isinstance(v, str):
652
652
  metadata[k] = ast.literal_eval(v)
653
653
  stride = metadata["stride"]
654
654
  task = metadata["task"]
@@ -69,6 +69,7 @@ class Detect(nn.Module):
69
69
  export = False # export mode
70
70
  format = None # export format
71
71
  max_det = 300 # max_det
72
+ agnostic_nms = False
72
73
  shape = None
73
74
  anchors = torch.empty(0) # init
74
75
  strides = torch.empty(0) # init
@@ -125,7 +126,12 @@ class Detect(nn.Module):
125
126
  @property
126
127
  def end2end(self):
127
128
  """Checks if the model has one2one for v5/v5/v8/v9/11 backward compatibility."""
128
- return hasattr(self, "one2one")
129
+ return getattr(self, "_end2end", True) and hasattr(self, "one2one")
130
+
131
+ @end2end.setter
132
+ def end2end(self, value):
133
+ """Override the end-to-end detection mode."""
134
+ self._end2end = value
129
135
 
130
136
  def forward_head(
131
137
  self, x: list[torch.Tensor], box_head: torch.nn.Module = None, cls_head: torch.nn.Module = None
@@ -230,6 +236,11 @@ class Detect(nn.Module):
230
236
  # Use max_det directly during export for TensorRT compatibility (requires k to be constant),
231
237
  # otherwise use min(max_det, anchors) for safety with small inputs during Python inference
232
238
  k = max_det if self.export else min(max_det, anchors)
239
+ if self.agnostic_nms:
240
+ scores, labels = scores.max(dim=-1, keepdim=True)
241
+ scores, indices = scores.topk(k, dim=1)
242
+ labels = labels.gather(1, indices)
243
+ return scores, labels, indices
233
244
  ori_index = scores.max(dim=-1)[0].topk(k)[1].unsqueeze(-1)
234
245
  scores = scores.gather(dim=1, index=ori_index.repeat(1, 1, nc))
235
246
  scores, index = scores.flatten(1).topk(k)
@@ -1098,7 +1109,7 @@ class YOLOEDetect(Detect):
1098
1109
  boxes, scores, index = [], [], []
1099
1110
  bs = x[0].shape[0]
1100
1111
  cv2 = self.cv2 if not self.end2end else self.one2one_cv2
1101
- cv3 = self.cv3 if not self.end2end else self.one2one_cv2
1112
+ cv3 = self.cv3 if not self.end2end else self.one2one_cv3
1102
1113
  for i in range(self.nl):
1103
1114
  cls_feat = cv3[i](x[i])
1104
1115
  loc_feat = cv2[i](x[i])
ultralytics/nn/tasks.py CHANGED
@@ -425,6 +425,24 @@ class DetectionModel(BaseModel):
425
425
  """Return whether the model uses end-to-end NMS-free detection."""
426
426
  return getattr(self.model[-1], "end2end", False)
427
427
 
428
+ @end2end.setter
429
+ def end2end(self, value):
430
+ """Override the end-to-end detection mode."""
431
+ self.set_head_attr(end2end=value)
432
+
433
+ def set_head_attr(self, **kwargs):
434
+ """Set attributes of the model head (last layer).
435
+
436
+ Args:
437
+ **kwargs: Arbitrary keyword arguments representing attributes to set.
438
+ """
439
+ head = self.model[-1]
440
+ for k, v in kwargs.items():
441
+ if not hasattr(head, k):
442
+ LOGGER.warning(f"Head has no attribute '{k}'.")
443
+ continue
444
+ setattr(head, k, v)
445
+
428
446
  def _predict_augment(self, x):
429
447
  """Perform augmentations on input image x and return augmented inference and train outputs.
430
448
 
@@ -62,7 +62,7 @@ class SecurityAlarm(BaseSolution):
62
62
  """
63
63
  import smtplib
64
64
 
65
- self.server = smtplib.SMTP("smtp.gmail.com: 587")
65
+ self.server = smtplib.SMTP("smtp.gmail.com", 587)
66
66
  self.server.starttls()
67
67
  self.server.login(from_email, password)
68
68
  self.to_email = to_email
@@ -270,9 +270,9 @@ class BYTETracker:
270
270
  args (Namespace): Command-line arguments containing tracking parameters.
271
271
  frame_rate (int): Frame rate of the video sequence.
272
272
  """
273
- self.tracked_stracks = [] # type: list[STrack]
274
- self.lost_stracks = [] # type: list[STrack]
275
- self.removed_stracks = [] # type: list[STrack]
273
+ self.tracked_stracks: list[STrack] = []
274
+ self.lost_stracks: list[STrack] = []
275
+ self.removed_stracks: list[STrack] = []
276
276
 
277
277
  self.frame_id = 0
278
278
  self.args = args
@@ -304,7 +304,7 @@ class BYTETracker:
304
304
  detections = self.init_track(results, feats_keep)
305
305
  # Add newly detected tracklets to tracked_stracks
306
306
  unconfirmed = []
307
- tracked_stracks = [] # type: list[STrack]
307
+ tracked_stracks: list[STrack] = []
308
308
  for track in self.tracked_stracks:
309
309
  if not track.is_activated:
310
310
  unconfirmed.append(track)
@@ -423,9 +423,9 @@ class BYTETracker:
423
423
 
424
424
  def reset(self):
425
425
  """Reset the tracker by clearing all tracked, lost, and removed tracks and reinitializing the Kalman filter."""
426
- self.tracked_stracks = [] # type: list[STrack]
427
- self.lost_stracks = [] # type: list[STrack]
428
- self.removed_stracks = [] # type: list[STrack]
426
+ self.tracked_stracks: list[STrack] = []
427
+ self.lost_stracks: list[STrack] = []
428
+ self.removed_stracks: list[STrack] = []
429
429
  self.frame_id = 0
430
430
  self.kalman_filter = self.get_kalmanfilter()
431
431
  self.reset_id()
@@ -36,6 +36,7 @@ import platform
36
36
  import re
37
37
  import shutil
38
38
  import time
39
+ from copy import deepcopy
39
40
  from pathlib import Path
40
41
 
41
42
  import numpy as np
@@ -101,7 +102,6 @@ def benchmark(
101
102
  device = select_device(device, verbose=False)
102
103
  if isinstance(model, (str, Path)):
103
104
  model = YOLO(model)
104
- is_end2end = getattr(model.model.model[-1], "end2end", False)
105
105
  data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect
106
106
  key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect
107
107
 
@@ -135,14 +135,12 @@ def benchmark(
135
135
  if format == "paddle":
136
136
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
137
137
  assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
138
- assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
139
138
  assert (LINUX and not IS_JETSON) or MACOS, "Windows and Jetson Paddle exports not supported yet"
140
139
  if format == "mnn":
141
140
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
142
141
  if format == "ncnn":
143
142
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
144
143
  if format == "imx":
145
- assert not is_end2end
146
144
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
147
145
  assert model.task in {"detect", "classify", "pose"}, (
148
146
  "IMX export is only supported for detection, classification and pose estimation tasks"
@@ -150,25 +148,21 @@ def benchmark(
150
148
  assert "C2f" in model.__str__(), "IMX only supported for YOLOv8n and YOLO11n"
151
149
  if format == "rknn":
152
150
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
153
- assert not is_end2end, "End-to-end models not supported by RKNN yet"
154
151
  assert LINUX, "RKNN only supported on Linux"
155
152
  assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices"
156
153
  if format == "executorch":
157
154
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 ExecuTorch exports not supported yet"
158
- assert not is_end2end, "End-to-end models not supported by ExecuTorch yet"
159
155
  if "cpu" in device.type:
160
156
  assert cpu, "inference not supported on CPU"
161
157
  if "cuda" in device.type:
162
158
  assert gpu, "inference not supported on GPU"
163
- if format == "ncnn":
164
- assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
165
159
 
166
160
  # Export
167
161
  if format == "-":
168
162
  filename = model.pt_path or model.ckpt_path or model.model_name
169
- exported_model = model # PyTorch format
163
+ exported_model = deepcopy(model) # PyTorch format
170
164
  else:
171
- filename = model.export(
165
+ filename = deepcopy(model).export(
172
166
  imgsz=imgsz, format=format, half=half, int8=int8, data=data, device=device, verbose=False, **kwargs
173
167
  )
174
168
  exported_model = YOLO(filename, task=model.task)
@@ -89,7 +89,8 @@ def resolve_platform_uri(uri, hard=True):
89
89
  raise ValueError(f"Invalid platform URI: {uri}. Use ul://user/datasets/name or ul://user/project/model")
90
90
 
91
91
  try:
92
- r = requests.head(url, headers=headers, allow_redirects=False, timeout=30)
92
+ timeout = 3600 if "/datasets/" in url else 90 # NDJSON generation can be slow for large datasets
93
+ r = requests.head(url, headers=headers, allow_redirects=False, timeout=timeout)
93
94
 
94
95
  # Handle redirect responses (301, 302, 303, 307, 308)
95
96
  if 300 <= r.status_code < 400 and "location" in r.headers:
@@ -128,10 +128,15 @@ def _log_plots(plots, step):
128
128
  def on_pretrain_routine_start(trainer):
129
129
  """Initialize and start wandb project if module is present."""
130
130
  if not wb.run:
131
+ from datetime import datetime
132
+
133
+ name = str(trainer.args.name).replace("/", "-").replace(" ", "_")
131
134
  wb.init(
132
135
  project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
133
- name=str(trainer.args.name).replace("/", "-"),
136
+ name=name,
134
137
  config=vars(trainer.args),
138
+ id=f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", # add unique id
139
+ dir=str(trainer.save_dir),
135
140
  )
136
141
 
137
142
 
ultralytics/utils/dist.py CHANGED
@@ -49,6 +49,7 @@ def generate_ddp_file(trainer):
49
49
 
50
50
  content = f"""
51
51
  # Ultralytics Multi-GPU training temp file (should be automatically deleted after use)
52
+ from pathlib import Path, PosixPath # For model arguments stored as Path instead of str
52
53
  overrides = {vars(trainer.args)}
53
54
 
54
55
  if __name__ == "__main__":
ultralytics/utils/loss.py CHANGED
@@ -512,9 +512,19 @@ class v8SegmentationLoss(v8DetectionLoss):
512
512
  )
513
513
  if pred_semseg is not None:
514
514
  sem_masks = batch["sem_masks"].to(self.device) # NxHxW
515
- mask_zero = sem_masks == 0 # NxHxW
516
515
  sem_masks = F.one_hot(sem_masks.long(), num_classes=self.nc).permute(0, 3, 1, 2).float() # NxCxHxW
517
- sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
516
+
517
+ if self.overlap:
518
+ mask_zero = masks == 0 # NxHxW
519
+ sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
520
+ else:
521
+ batch_idx = batch["batch_idx"].view(-1) # [total_instances]
522
+ for i in range(batch_size):
523
+ instance_mask_i = masks[batch_idx == i] # [num_instances_i, H, W]
524
+ if len(instance_mask_i) == 0:
525
+ continue
526
+ sem_masks[i, :, instance_mask_i.sum(dim=0) == 0] = 0
527
+
518
528
  loss[4] = self.bcedice_loss(pred_semseg, sem_masks)
519
529
  loss[4] *= self.hyp.box # seg gain
520
530
 
@@ -798,7 +808,7 @@ class PoseLoss26(v8PoseLoss):
798
808
  loss[0], loss[3], loss[4] = det_loss[0], det_loss[1], det_loss[2]
799
809
 
800
810
  batch_size = pred_kpts.shape[0]
801
- imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=pred_kpts.dtype) # image size (h,w)
811
+ imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=pred_kpts.dtype) * self.stride[0]
802
812
 
803
813
  pred_kpts = pred_kpts.view(batch_size, -1, *self.kpt_shape) # (b, h*w, 17, 3)
804
814
 
@@ -992,7 +1002,7 @@ class v8OBBLoss(v8DetectionLoss):
992
1002
  batch_size = pred_angle.shape[0] # batch size, number of masks, mask height, mask width
993
1003
 
994
1004
  dtype = pred_scores.dtype
995
- imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=dtype) # image size (h,w)
1005
+ imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]
996
1006
 
997
1007
  # targets
998
1008
  try:
@@ -1095,7 +1105,7 @@ class v8OBBLoss(v8DetectionLoss):
1095
1105
  pred_theta = pred_bboxes[..., 4]
1096
1106
  target_theta = target_bboxes[..., 4]
1097
1107
 
1098
- log_ar = torch.log(w_gt / h_gt)
1108
+ log_ar = torch.log((w_gt + 1e-9) / (h_gt + 1e-9))
1099
1109
  scale_weight = torch.exp(-(log_ar**2) / (lambda_val**2))
1100
1110
 
1101
1111
  delta_theta = pred_theta - target_theta
@@ -1164,9 +1174,9 @@ class E2ELoss:
1164
1174
  class TVPDetectLoss:
1165
1175
  """Criterion class for computing training losses for text-visual prompt detection."""
1166
1176
 
1167
- def __init__(self, model, tal_topk=10):
1177
+ def __init__(self, model, tal_topk=10, tal_topk2: int | None = None):
1168
1178
  """Initialize TVPDetectLoss with task-prompt and visual-prompt criteria using the provided model."""
1169
- self.vp_criterion = v8DetectionLoss(model, tal_topk)
1179
+ self.vp_criterion = v8DetectionLoss(model, tal_topk, tal_topk2)
1170
1180
  # NOTE: store following info as it's changeable in __call__
1171
1181
  self.hyp = self.vp_criterion.hyp
1172
1182
  self.ori_nc = self.vp_criterion.nc
@@ -1196,8 +1206,7 @@ class TVPDetectLoss:
1196
1206
 
1197
1207
  def _get_vp_features(self, preds: dict[str, torch.Tensor]) -> list[torch.Tensor]:
1198
1208
  """Extract visual-prompt features from the model output."""
1199
- # NOTE: remove empty placeholder
1200
- scores = preds["scores"][:, self.ori_nc :, :]
1209
+ scores = preds["scores"]
1201
1210
  vnc = scores.shape[1]
1202
1211
 
1203
1212
  self.vp_criterion.nc = vnc
@@ -40,9 +40,51 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
40
40
  return None
41
41
  else:
42
42
  im = cv2.imdecode(file_bytes, flags)
43
+ # Fallback for formats OpenCV imdecode may not support (AVIF, HEIC)
44
+ if im is None and filename.lower().endswith((".avif", ".heic")):
45
+ im = _imread_pil(filename, flags)
43
46
  return im[..., None] if im is not None and im.ndim == 2 else im # Always ensure 3 dimensions
44
47
 
45
48
 
49
+ _pil_plugins_registered = False
50
+
51
+
52
+ def _imread_pil(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
53
+ """Read an image using PIL as fallback for formats not supported by OpenCV.
54
+
55
+ Args:
56
+ filename (str): Path to the file to read.
57
+ flags (int, optional): OpenCV imread flags (used to determine grayscale conversion).
58
+
59
+ Returns:
60
+ (np.ndarray | None): The read image array in BGR format, or None if reading fails.
61
+ """
62
+ global _pil_plugins_registered
63
+ try:
64
+ from PIL import Image
65
+
66
+ # Register HEIF/AVIF plugins once
67
+ if not _pil_plugins_registered:
68
+ try:
69
+ import pillow_heif
70
+
71
+ pillow_heif.register_heif_opener()
72
+ except ImportError:
73
+ pass
74
+ try:
75
+ import pillow_avif # noqa: F401
76
+ except ImportError:
77
+ pass
78
+ _pil_plugins_registered = True
79
+
80
+ with Image.open(filename) as img:
81
+ if flags == cv2.IMREAD_GRAYSCALE:
82
+ return np.asarray(img.convert("L"))
83
+ return cv2.cvtColor(np.asarray(img.convert("RGB")), cv2.COLOR_RGB2BGR)
84
+ except Exception:
85
+ return None
86
+
87
+
46
88
  def imwrite(filename: str, img: np.ndarray, params: list[int] | None = None) -> bool:
47
89
  """Write an image to a file with multilanguage filename support.
48
90