ultralytics 8.3.153__py3-none-any.whl → 8.3.155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. tests/test_python.py +1 -0
  2. ultralytics/__init__.py +1 -1
  3. ultralytics/cfg/__init__.py +2 -0
  4. ultralytics/engine/predictor.py +1 -1
  5. ultralytics/engine/validator.py +0 -6
  6. ultralytics/models/fastsam/val.py +0 -2
  7. ultralytics/models/rtdetr/val.py +28 -16
  8. ultralytics/models/yolo/classify/val.py +26 -23
  9. ultralytics/models/yolo/detect/train.py +4 -7
  10. ultralytics/models/yolo/detect/val.py +88 -90
  11. ultralytics/models/yolo/obb/val.py +52 -44
  12. ultralytics/models/yolo/pose/train.py +1 -35
  13. ultralytics/models/yolo/pose/val.py +77 -176
  14. ultralytics/models/yolo/segment/train.py +1 -41
  15. ultralytics/models/yolo/segment/val.py +64 -176
  16. ultralytics/models/yolo/yoloe/val.py +2 -1
  17. ultralytics/nn/autobackend.py +2 -2
  18. ultralytics/nn/tasks.py +0 -1
  19. ultralytics/solutions/ai_gym.py +5 -5
  20. ultralytics/solutions/analytics.py +2 -2
  21. ultralytics/solutions/config.py +2 -2
  22. ultralytics/solutions/distance_calculation.py +1 -1
  23. ultralytics/solutions/heatmap.py +5 -3
  24. ultralytics/solutions/instance_segmentation.py +4 -2
  25. ultralytics/solutions/object_blurrer.py +4 -2
  26. ultralytics/solutions/object_counter.py +5 -5
  27. ultralytics/solutions/object_cropper.py +3 -2
  28. ultralytics/solutions/parking_management.py +9 -9
  29. ultralytics/solutions/queue_management.py +4 -2
  30. ultralytics/solutions/region_counter.py +13 -5
  31. ultralytics/solutions/security_alarm.py +6 -4
  32. ultralytics/solutions/similarity_search.py +6 -6
  33. ultralytics/solutions/solutions.py +9 -7
  34. ultralytics/solutions/speed_estimation.py +3 -2
  35. ultralytics/solutions/streamlit_inference.py +6 -6
  36. ultralytics/solutions/templates/similarity-search.html +31 -0
  37. ultralytics/solutions/trackzone.py +4 -2
  38. ultralytics/solutions/vision_eye.py +4 -2
  39. ultralytics/utils/callbacks/comet.py +1 -1
  40. ultralytics/utils/metrics.py +146 -317
  41. ultralytics/utils/ops.py +4 -4
  42. ultralytics/utils/plotting.py +31 -56
  43. {ultralytics-8.3.153.dist-info → ultralytics-8.3.155.dist-info}/METADATA +1 -1
  44. {ultralytics-8.3.153.dist-info → ultralytics-8.3.155.dist-info}/RECORD +48 -48
  45. {ultralytics-8.3.153.dist-info → ultralytics-8.3.155.dist-info}/WHEEL +0 -0
  46. {ultralytics-8.3.153.dist-info → ultralytics-8.3.155.dist-info}/entry_points.txt +0 -0
  47. {ultralytics-8.3.153.dist-info → ultralytics-8.3.155.dist-info}/licenses/LICENSE +0 -0
  48. {ultralytics-8.3.153.dist-info → ultralytics-8.3.155.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from ultralytics.engine.validator import BaseValidator
12
12
  from ultralytics.utils import LOGGER, ops
13
13
  from ultralytics.utils.checks import check_requirements
14
14
  from ultralytics.utils.metrics import ConfusionMatrix, DetMetrics, box_iou
15
- from ultralytics.utils.plotting import output_to_target, plot_images
15
+ from ultralytics.utils.plotting import plot_images
16
16
 
17
17
 
18
18
  class DetectionValidator(BaseValidator):
@@ -23,8 +23,6 @@ class DetectionValidator(BaseValidator):
23
23
  prediction processing, and visualization of results.
24
24
 
25
25
  Attributes:
26
- nt_per_class (np.ndarray): Number of targets per class.
27
- nt_per_image (np.ndarray): Number of targets per image.
28
26
  is_coco (bool): Whether the dataset is COCO.
29
27
  is_lvis (bool): Whether the dataset is LVIS.
30
28
  class_map (List[int]): Mapping from model class indices to dataset class indices.
@@ -53,15 +51,13 @@ class DetectionValidator(BaseValidator):
53
51
  _callbacks (List[Any], optional): List of callback functions.
54
52
  """
55
53
  super().__init__(dataloader, save_dir, args, _callbacks)
56
- self.nt_per_class = None
57
- self.nt_per_image = None
58
54
  self.is_coco = False
59
55
  self.is_lvis = False
60
56
  self.class_map = None
61
57
  self.args.task = "detect"
62
- self.metrics = DetMetrics(save_dir=self.save_dir)
63
58
  self.iouv = torch.linspace(0.5, 0.95, 10) # IoU vector for mAP@0.5:0.95
64
59
  self.niou = self.iouv.numel()
60
+ self.metrics = DetMetrics()
65
61
 
66
62
  def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
67
63
  """
@@ -99,18 +95,16 @@ class DetectionValidator(BaseValidator):
99
95
  self.names = model.names
100
96
  self.nc = len(model.names)
101
97
  self.end2end = getattr(model, "end2end", False)
102
- self.metrics.names = self.names
103
- self.metrics.plot = self.args.plots
104
- self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf, names=self.names.values())
105
98
  self.seen = 0
106
99
  self.jdict = []
107
- self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
100
+ self.metrics.names = self.names
101
+ self.confusion_matrix = ConfusionMatrix(names=list(model.names.values()))
108
102
 
109
103
  def get_desc(self) -> str:
110
104
  """Return a formatted string summarizing class metrics of YOLO model."""
111
105
  return ("%22s" + "%11s" * 6) % ("Class", "Images", "Instances", "Box(P", "R", "mAP50", "mAP50-95)")
112
106
 
113
- def postprocess(self, preds: torch.Tensor) -> List[torch.Tensor]:
107
+ def postprocess(self, preds: torch.Tensor) -> List[Dict[str, torch.Tensor]]:
114
108
  """
115
109
  Apply Non-maximum suppression to prediction outputs.
116
110
 
@@ -118,9 +112,10 @@ class DetectionValidator(BaseValidator):
118
112
  preds (torch.Tensor): Raw predictions from the model.
119
113
 
120
114
  Returns:
121
- (List[torch.Tensor]): Processed predictions after NMS.
115
+ (List[Dict[str, torch.Tensor]]): Processed predictions after NMS, where each dict contains
116
+ 'bboxes', 'conf', 'cls', and 'extra' tensors.
122
117
  """
123
- return ops.non_max_suppression(
118
+ outputs = ops.non_max_suppression(
124
119
  preds,
125
120
  self.args.conf,
126
121
  self.args.iou,
@@ -131,6 +126,7 @@ class DetectionValidator(BaseValidator):
131
126
  end2end=self.end2end,
132
127
  rotated=self.args.task == "obb",
133
128
  )
129
+ return [{"bboxes": x[:, :4], "conf": x[:, 4], "cls": x[:, 5], "extra": x[:, 6:]} for x in outputs]
134
130
 
135
131
  def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
136
132
  """
@@ -152,68 +148,60 @@ class DetectionValidator(BaseValidator):
152
148
  if len(cls):
153
149
  bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]] # target boxes
154
150
  ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad) # native-space labels
155
- return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
151
+ return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
156
152
 
157
- def _prepare_pred(self, pred: torch.Tensor, pbatch: Dict[str, Any]) -> torch.Tensor:
153
+ def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
158
154
  """
159
155
  Prepare predictions for evaluation against ground truth.
160
156
 
161
157
  Args:
162
- pred (torch.Tensor): Model predictions.
158
+ pred (Dict[str, torch.Tensor]): Post-processed predictions from the model.
163
159
  pbatch (Dict[str, Any]): Prepared batch information.
164
160
 
165
161
  Returns:
166
- (torch.Tensor): Prepared predictions in native space.
167
- """
168
- predn = pred.clone()
169
- ops.scale_boxes(
170
- pbatch["imgsz"], predn[:, :4], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
162
+ (Dict[str, torch.Tensor]): Prepared predictions in native space.
163
+ """
164
+ cls = pred["cls"]
165
+ if self.args.single_cls:
166
+ cls *= 0
167
+ # predn = pred.clone()
168
+ bboxes = ops.scale_boxes(
169
+ pbatch["imgsz"], pred["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
171
170
  ) # native-space pred
172
- return predn
171
+ return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
173
172
 
174
- def update_metrics(self, preds: List[torch.Tensor], batch: Dict[str, Any]) -> None:
173
+ def update_metrics(self, preds: List[Dict[str, torch.Tensor]], batch: Dict[str, Any]) -> None:
175
174
  """
176
175
  Update metrics with new predictions and ground truth.
177
176
 
178
177
  Args:
179
- preds (List[torch.Tensor]): List of predictions from the model.
178
+ preds (List[Dict[str, torch.Tensor]]): List of predictions from the model.
180
179
  batch (Dict[str, Any]): Batch data containing ground truth.
181
180
  """
182
181
  for si, pred in enumerate(preds):
183
182
  self.seen += 1
184
- npr = len(pred)
185
- stat = dict(
186
- conf=torch.zeros(0, device=self.device),
187
- pred_cls=torch.zeros(0, device=self.device),
188
- tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
189
- )
190
183
  pbatch = self._prepare_batch(si, batch)
191
- cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
192
- nl = len(cls)
193
- stat["target_cls"] = cls
194
- stat["target_img"] = cls.unique()
195
- if npr == 0:
196
- if nl:
197
- for k in self.stats.keys():
198
- self.stats[k].append(stat[k])
199
- if self.args.plots:
200
- self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
201
- continue
202
-
203
- # Predictions
204
- if self.args.single_cls:
205
- pred[:, 5] = 0
206
184
  predn = self._prepare_pred(pred, pbatch)
207
- stat["conf"] = predn[:, 4]
208
- stat["pred_cls"] = predn[:, 5]
209
185
 
186
+ cls = pbatch["cls"].cpu().numpy()
187
+ no_pred = len(predn["cls"]) == 0
188
+ if no_pred and len(cls) == 0:
189
+ continue
190
+ self.metrics.update_stats(
191
+ {
192
+ **self._process_batch(predn, pbatch),
193
+ "target_cls": cls,
194
+ "target_img": np.unique(cls),
195
+ "conf": np.zeros(0) if no_pred else predn["conf"].cpu().numpy(),
196
+ "pred_cls": np.zeros(0) if no_pred else predn["cls"].cpu().numpy(),
197
+ }
198
+ )
210
199
  # Evaluate
211
- if nl:
212
- stat["tp"] = self._process_batch(predn, bbox, cls)
213
200
  if self.args.plots:
214
- self.confusion_matrix.process_batch(predn, bbox, cls)
215
- for k in self.stats.keys():
216
- self.stats[k].append(stat[k])
201
+ self.confusion_matrix.process_batch(predn, pbatch, conf=self.args.conf)
202
+
203
+ if no_pred:
204
+ continue
217
205
 
218
206
  # Save
219
207
  if self.args.save_json:
@@ -241,44 +229,45 @@ class DetectionValidator(BaseValidator):
241
229
  Returns:
242
230
  (Dict[str, Any]): Dictionary containing metrics results.
243
231
  """
244
- stats = {k: torch.cat(v, 0).cpu().numpy() for k, v in self.stats.items()} # to numpy
245
- self.nt_per_class = np.bincount(stats["target_cls"].astype(int), minlength=self.nc)
246
- self.nt_per_image = np.bincount(stats["target_img"].astype(int), minlength=self.nc)
247
- stats.pop("target_img", None)
248
- if len(stats):
249
- self.metrics.process(**stats, on_plot=self.on_plot)
232
+ self.metrics.process(save_dir=self.save_dir, plot=self.args.plots, on_plot=self.on_plot)
233
+ self.metrics.clear_stats()
250
234
  return self.metrics.results_dict
251
235
 
252
236
  def print_results(self) -> None:
253
237
  """Print training/validation set metrics per class."""
254
238
  pf = "%22s" + "%11i" * 2 + "%11.3g" * len(self.metrics.keys) # print format
255
- LOGGER.info(pf % ("all", self.seen, self.nt_per_class.sum(), *self.metrics.mean_results()))
256
- if self.nt_per_class.sum() == 0:
239
+ LOGGER.info(pf % ("all", self.seen, self.metrics.nt_per_class.sum(), *self.metrics.mean_results()))
240
+ if self.metrics.nt_per_class.sum() == 0:
257
241
  LOGGER.warning(f"no labels found in {self.args.task} set, can not compute metrics without labels")
258
242
 
259
243
  # Print results per class
260
- if self.args.verbose and not self.training and self.nc > 1 and len(self.stats):
244
+ if self.args.verbose and not self.training and self.nc > 1 and len(self.metrics.stats):
261
245
  for i, c in enumerate(self.metrics.ap_class_index):
262
246
  LOGGER.info(
263
- pf % (self.names[c], self.nt_per_image[c], self.nt_per_class[c], *self.metrics.class_result(i))
247
+ pf
248
+ % (
249
+ self.names[c],
250
+ self.metrics.nt_per_image[c],
251
+ self.metrics.nt_per_class[c],
252
+ *self.metrics.class_result(i),
253
+ )
264
254
  )
265
255
 
266
- def _process_batch(self, detections: torch.Tensor, gt_bboxes: torch.Tensor, gt_cls: torch.Tensor) -> torch.Tensor:
256
+ def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, Any]) -> Dict[str, np.ndarray]:
267
257
  """
268
258
  Return correct prediction matrix.
269
259
 
270
260
  Args:
271
- detections (torch.Tensor): Tensor of shape (N, 6) representing detections where each detection is
272
- (x1, y1, x2, y2, conf, class).
273
- gt_bboxes (torch.Tensor): Tensor of shape (M, 4) representing ground-truth bounding box coordinates. Each
274
- bounding box is of the format: (x1, y1, x2, y2).
275
- gt_cls (torch.Tensor): Tensor of shape (M,) representing target class indices.
261
+ preds (Dict[str, torch.Tensor]): Dictionary containing prediction data with 'bboxes' and 'cls' keys.
262
+ batch (Dict[str, Any]): Batch dictionary containing ground truth data with 'bboxes' and 'cls' keys.
276
263
 
277
264
  Returns:
278
- (torch.Tensor): Correct prediction matrix of shape (N, 10) for 10 IoU levels.
265
+ (Dict[str, np.ndarray]): Dictionary containing 'tp' key with correct prediction matrix of shape (N, 10) for 10 IoU levels.
279
266
  """
280
- iou = box_iou(gt_bboxes, detections[:, :4])
281
- return self.match_predictions(detections[:, 5], gt_cls, iou)
267
+ if len(batch["cls"]) == 0 or len(preds["cls"]) == 0:
268
+ return {"tp": np.zeros((len(preds["cls"]), self.niou), dtype=bool)}
269
+ iou = box_iou(batch["bboxes"], preds["bboxes"])
270
+ return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
282
271
 
283
272
  def build_dataset(self, img_path: str, mode: str = "val", batch: Optional[int] = None) -> torch.utils.data.Dataset:
284
273
  """
@@ -317,42 +306,50 @@ class DetectionValidator(BaseValidator):
317
306
  ni (int): Batch index.
318
307
  """
319
308
  plot_images(
320
- batch["img"],
321
- batch["batch_idx"],
322
- batch["cls"].squeeze(-1),
323
- batch["bboxes"],
309
+ labels=batch,
324
310
  paths=batch["im_file"],
325
311
  fname=self.save_dir / f"val_batch{ni}_labels.jpg",
326
312
  names=self.names,
327
313
  on_plot=self.on_plot,
328
314
  )
329
315
 
330
- def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int) -> None:
316
+ def plot_predictions(
317
+ self, batch: Dict[str, Any], preds: List[Dict[str, torch.Tensor]], ni: int, max_det: Optional[int] = None
318
+ ) -> None:
331
319
  """
332
320
  Plot predicted bounding boxes on input images and save the result.
333
321
 
334
322
  Args:
335
323
  batch (Dict[str, Any]): Batch containing images and annotations.
336
- preds (List[torch.Tensor]): List of predictions from the model.
324
+ preds (List[Dict[str, torch.Tensor]]): List of predictions from the model.
337
325
  ni (int): Batch index.
338
- """
326
+ max_det (Optional[int]): Maximum number of detections to plot.
327
+ """
328
+ # TODO: optimize this
329
+ for i, pred in enumerate(preds):
330
+ pred["batch_idx"] = torch.ones_like(pred["conf"]) * i # add batch index to predictions
331
+ keys = preds[0].keys()
332
+ max_det = max_det or self.args.max_det
333
+ batched_preds = {k: torch.cat([x[k][:max_det] for x in preds], dim=0) for k in keys}
334
+ # TODO: fix this
335
+ batched_preds["bboxes"][:, :4] = ops.xyxy2xywh(batched_preds["bboxes"][:, :4]) # convert to xywh format
339
336
  plot_images(
340
- batch["img"],
341
- *output_to_target(preds, max_det=self.args.max_det),
337
+ images=batch["img"],
338
+ labels=batched_preds,
342
339
  paths=batch["im_file"],
343
340
  fname=self.save_dir / f"val_batch{ni}_pred.jpg",
344
341
  names=self.names,
345
342
  on_plot=self.on_plot,
346
343
  ) # pred
347
344
 
348
- def save_one_txt(self, predn: torch.Tensor, save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
345
+ def save_one_txt(self, predn: Dict[str, torch.Tensor], save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
349
346
  """
350
347
  Save YOLO detections to a txt file in normalized coordinates in a specific format.
351
348
 
352
349
  Args:
353
- predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
350
+ predn (Dict[str, torch.Tensor]): Dictionary containing predictions with keys 'bboxes', 'conf', and 'cls'.
354
351
  save_conf (bool): Whether to save confidence scores.
355
- shape (Tuple[int, int]): Shape of the original image.
352
+ shape (Tuple[int, int]): Shape of the original image (height, width).
356
353
  file (Path): File path to save the detections.
357
354
  """
358
355
  from ultralytics.engine.results import Results
@@ -361,28 +358,29 @@ class DetectionValidator(BaseValidator):
361
358
  np.zeros((shape[0], shape[1]), dtype=np.uint8),
362
359
  path=None,
363
360
  names=self.names,
364
- boxes=predn[:, :6],
361
+ boxes=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
365
362
  ).save_txt(file, save_conf=save_conf)
366
363
 
367
- def pred_to_json(self, predn: torch.Tensor, filename: str) -> None:
364
+ def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: str) -> None:
368
365
  """
369
366
  Serialize YOLO predictions to COCO json format.
370
367
 
371
368
  Args:
372
- predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
369
+ predn (Dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys
370
+ with bounding box coordinates, confidence scores, and class predictions.
373
371
  filename (str): Image filename.
374
372
  """
375
373
  stem = Path(filename).stem
376
374
  image_id = int(stem) if stem.isnumeric() else stem
377
- box = ops.xyxy2xywh(predn[:, :4]) # xywh
375
+ box = ops.xyxy2xywh(predn["bboxes"]) # xywh
378
376
  box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
379
- for p, b in zip(predn.tolist(), box.tolist()):
377
+ for b, s, c in zip(box.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
380
378
  self.jdict.append(
381
379
  {
382
380
  "image_id": image_id,
383
- "category_id": self.class_map[int(p[5])],
381
+ "category_id": self.class_map[int(c)],
384
382
  "bbox": [round(x, 3) for x in b],
385
- "score": round(p[4], 5),
383
+ "score": round(s, 5),
386
384
  }
387
385
  )
388
386
 
@@ -3,12 +3,12 @@
3
3
  from pathlib import Path
4
4
  from typing import Any, Dict, List, Tuple, Union
5
5
 
6
+ import numpy as np
6
7
  import torch
7
8
 
8
9
  from ultralytics.models.yolo.detect import DetectionValidator
9
10
  from ultralytics.utils import LOGGER, ops
10
11
  from ultralytics.utils.metrics import OBBMetrics, batch_probiou
11
- from ultralytics.utils.plotting import output_to_rotated_target, plot_images
12
12
 
13
13
 
14
14
  class OBBValidator(DetectionValidator):
@@ -55,7 +55,7 @@ class OBBValidator(DetectionValidator):
55
55
  """
56
56
  super().__init__(dataloader, save_dir, args, _callbacks)
57
57
  self.args.task = "obb"
58
- self.metrics = OBBMetrics(save_dir=self.save_dir, plot=True)
58
+ self.metrics = OBBMetrics()
59
59
 
60
60
  def init_metrics(self, model: torch.nn.Module) -> None:
61
61
  """
@@ -68,20 +68,20 @@ class OBBValidator(DetectionValidator):
68
68
  val = self.data.get(self.args.split, "") # validation path
69
69
  self.is_dota = isinstance(val, str) and "DOTA" in val # check if dataset is DOTA format
70
70
 
71
- def _process_batch(self, detections: torch.Tensor, gt_bboxes: torch.Tensor, gt_cls: torch.Tensor) -> torch.Tensor:
71
+ def _process_batch(self, preds: Dict[str, torch.Tensor], batch: Dict[str, torch.Tensor]) -> Dict[str, np.ndarray]:
72
72
  """
73
73
  Compute the correct prediction matrix for a batch of detections and ground truth bounding boxes.
74
74
 
75
75
  Args:
76
- detections (torch.Tensor): Detected bounding boxes and associated data with shape (N, 7) where each
77
- detection is represented as (x1, y1, x2, y2, conf, class, angle).
78
- gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (M, 5) where each box is represented
79
- as (x1, y1, x2, y2, angle).
80
- gt_cls (torch.Tensor): Class labels for the ground truth bounding boxes with shape (M,).
76
+ preds (Dict[str, torch.Tensor]): Prediction dictionary containing 'cls' and 'bboxes' keys with detected
77
+ class labels and bounding boxes.
78
+ batch (Dict[str, torch.Tensor]): Batch dictionary containing 'cls' and 'bboxes' keys with ground truth
79
+ class labels and bounding boxes.
81
80
 
82
81
  Returns:
83
- (torch.Tensor): The correct prediction matrix with shape (N, 10), which includes 10 IoU levels for each
84
- detection, indicating the accuracy of predictions compared to the ground truth.
82
+ (Dict[str, np.ndarray]): Dictionary containing 'tp' key with the correct prediction matrix as a numpy
83
+ array with shape (N, 10), which includes 10 IoU levels for each detection, indicating the accuracy
84
+ of predictions compared to the ground truth.
85
85
 
86
86
  Examples:
87
87
  >>> detections = torch.rand(100, 7) # 100 sample detections
@@ -89,10 +89,25 @@ class OBBValidator(DetectionValidator):
89
89
  >>> gt_cls = torch.randint(0, 5, (50,)) # 50 ground truth class labels
90
90
  >>> correct_matrix = validator._process_batch(detections, gt_bboxes, gt_cls)
91
91
  """
92
- iou = batch_probiou(gt_bboxes, torch.cat([detections[:, :4], detections[:, -1:]], dim=-1))
93
- return self.match_predictions(detections[:, 5], gt_cls, iou)
92
+ if len(batch["cls"]) == 0 or len(preds["cls"]) == 0:
93
+ return {"tp": np.zeros((len(preds["cls"]), self.niou), dtype=bool)}
94
+ iou = batch_probiou(batch["bboxes"], preds["bboxes"])
95
+ return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
94
96
 
95
- def _prepare_batch(self, si: int, batch: Dict) -> Dict:
97
+ def postprocess(self, preds: torch.Tensor) -> List[Dict[str, torch.Tensor]]:
98
+ """
99
+ Args:
100
+ preds (torch.Tensor): Raw predictions from the model.
101
+
102
+ Returns:
103
+ (List[Dict[str, torch.Tensor]]): Processed predictions with angle information concatenated to bboxes.
104
+ """
105
+ preds = super().postprocess(preds)
106
+ for pred in preds:
107
+ pred["bboxes"] = torch.cat([pred["bboxes"], pred.pop("extra")], dim=-1) # concatenate angle
108
+ return preds
109
+
110
+ def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
96
111
  """
97
112
  Prepare batch data for OBB validation with proper scaling and formatting.
98
113
 
@@ -118,9 +133,9 @@ class OBBValidator(DetectionValidator):
118
133
  if len(cls):
119
134
  bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]) # target boxes
120
135
  ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad, xywh=True) # native-space labels
121
- return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
136
+ return {"cls": cls, "bboxes": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
122
137
 
123
- def _prepare_pred(self, pred: torch.Tensor, pbatch: Dict[str, Any]) -> torch.Tensor:
138
+ def _prepare_pred(self, pred: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
124
139
  """
125
140
  Prepare predictions by scaling bounding boxes to original image dimensions.
126
141
 
@@ -128,20 +143,22 @@ class OBBValidator(DetectionValidator):
128
143
  input dimensions to the original image dimensions using the provided batch information.
129
144
 
130
145
  Args:
131
- pred (torch.Tensor): Prediction tensor containing bounding box coordinates and other information.
146
+ pred (Dict[str, torch.Tensor]): Prediction dictionary containing bounding box coordinates and other information.
132
147
  pbatch (Dict[str, Any]): Dictionary containing batch information with keys:
133
148
  - imgsz (tuple): Model input image size.
134
149
  - ori_shape (tuple): Original image shape.
135
150
  - ratio_pad (tuple): Ratio and padding information for scaling.
136
151
 
137
152
  Returns:
138
- (torch.Tensor): Scaled prediction tensor with bounding boxes in original image dimensions.
153
+ (Dict[str, torch.Tensor]): Scaled prediction dictionary with bounding boxes in original image dimensions.
139
154
  """
140
- predn = pred.clone()
141
- ops.scale_boxes(
142
- pbatch["imgsz"], predn[:, :4], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
155
+ cls = pred["cls"]
156
+ if self.args.single_cls:
157
+ cls *= 0
158
+ bboxes = ops.scale_boxes(
159
+ pbatch["imgsz"], pred["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
143
160
  ) # native-space pred
144
- return predn
161
+ return {"bboxes": bboxes, "conf": pred["conf"], "cls": cls}
145
162
 
146
163
  def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int) -> None:
147
164
  """
@@ -158,22 +175,18 @@ class OBBValidator(DetectionValidator):
158
175
  >>> preds = [torch.rand(10, 7)] # Example predictions for one image
159
176
  >>> validator.plot_predictions(batch, preds, 0)
160
177
  """
161
- plot_images(
162
- batch["img"],
163
- *output_to_rotated_target(preds, max_det=self.args.max_det),
164
- paths=batch["im_file"],
165
- fname=self.save_dir / f"val_batch{ni}_pred.jpg",
166
- names=self.names,
167
- on_plot=self.on_plot,
168
- ) # pred
178
+ for p in preds:
179
+ # TODO: fix this duplicated `xywh2xyxy`
180
+ p["bboxes"][:, :4] = ops.xywh2xyxy(p["bboxes"][:, :4]) # convert to xyxy format for plotting
181
+ super().plot_predictions(batch, preds, ni) # plot bboxes
169
182
 
170
- def pred_to_json(self, predn: torch.Tensor, filename: Union[str, Path]) -> None:
183
+ def pred_to_json(self, predn: Dict[str, torch.Tensor], filename: Union[str, Path]) -> None:
171
184
  """
172
185
  Convert YOLO predictions to COCO JSON format with rotated bounding box information.
173
186
 
174
187
  Args:
175
- predn (torch.Tensor): Prediction tensor containing bounding box coordinates, confidence scores,
176
- class predictions, and rotation angles with shape (N, 6+) where the last column is the angle.
188
+ predn (Dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', and 'cls' keys
189
+ with bounding box coordinates, confidence scores, and class predictions.
177
190
  filename (str | Path): Path to the image file for which predictions are being processed.
178
191
 
179
192
  Notes:
@@ -183,22 +196,20 @@ class OBBValidator(DetectionValidator):
183
196
  """
184
197
  stem = Path(filename).stem
185
198
  image_id = int(stem) if stem.isnumeric() else stem
186
- rbox = torch.cat([predn[:, :4], predn[:, -1:]], dim=-1)
199
+ rbox = predn["bboxes"]
187
200
  poly = ops.xywhr2xyxyxyxy(rbox).view(-1, 8)
188
- for i, (r, b) in enumerate(zip(rbox.tolist(), poly.tolist())):
201
+ for r, b, s, c in zip(rbox.tolist(), poly.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
189
202
  self.jdict.append(
190
203
  {
191
204
  "image_id": image_id,
192
- "category_id": self.class_map[int(predn[i, 5].item())],
193
- "score": round(predn[i, 4].item(), 5),
205
+ "category_id": self.class_map[int(c)],
206
+ "score": round(s, 5),
194
207
  "rbox": [round(x, 3) for x in r],
195
208
  "poly": [round(x, 3) for x in b],
196
209
  }
197
210
  )
198
211
 
199
- def save_one_txt(
200
- self, predn: torch.Tensor, save_conf: bool, shape: Tuple[int, int], file: Union[Path, str]
201
- ) -> None:
212
+ def save_one_txt(self, predn: Dict[str, torch.Tensor], save_conf: bool, shape: Tuple[int, int], file: Path) -> None:
202
213
  """
203
214
  Save YOLO OBB detections to a text file in normalized coordinates.
204
215
 
@@ -207,7 +218,7 @@ class OBBValidator(DetectionValidator):
207
218
  class predictions, and angles in format (x, y, w, h, conf, cls, angle).
208
219
  save_conf (bool): Whether to save confidence scores in the text file.
209
220
  shape (Tuple[int, int]): Original image shape in format (height, width).
210
- file (Path | str): Output file path to save detections.
221
+ file (Path): Output file path to save detections.
211
222
 
212
223
  Examples:
213
224
  >>> validator = OBBValidator()
@@ -218,14 +229,11 @@ class OBBValidator(DetectionValidator):
218
229
 
219
230
  from ultralytics.engine.results import Results
220
231
 
221
- rboxes = torch.cat([predn[:, :4], predn[:, -1:]], dim=-1)
222
- # xywh, r, conf, cls
223
- obb = torch.cat([rboxes, predn[:, 4:6]], dim=-1)
224
232
  Results(
225
233
  np.zeros((shape[0], shape[1]), dtype=np.uint8),
226
234
  path=None,
227
235
  names=self.names,
228
- obb=obb,
236
+ obb=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
229
237
  ).save_txt(file, save_conf=save_conf)
230
238
 
231
239
  def eval_json(self, stats: Dict[str, Any]) -> Dict[str, Any]:
@@ -7,7 +7,7 @@ from typing import Any, Dict, Optional, Union
7
7
  from ultralytics.models import yolo
8
8
  from ultralytics.nn.tasks import PoseModel
9
9
  from ultralytics.utils import DEFAULT_CFG, LOGGER
10
- from ultralytics.utils.plotting import plot_images, plot_results
10
+ from ultralytics.utils.plotting import plot_results
11
11
 
12
12
 
13
13
  class PoseTrainer(yolo.detect.DetectionTrainer):
@@ -108,40 +108,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
108
108
  self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
109
109
  )
110
110
 
111
- def plot_training_samples(self, batch: Dict[str, Any], ni: int):
112
- """
113
- Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints.
114
-
115
- Args:
116
- batch (dict): Dictionary containing batch data with the following keys:
117
- - img (torch.Tensor): Batch of images
118
- - keypoints (torch.Tensor): Keypoints coordinates for pose estimation
119
- - cls (torch.Tensor): Class labels
120
- - bboxes (torch.Tensor): Bounding box coordinates
121
- - im_file (list): List of image file paths
122
- - batch_idx (torch.Tensor): Batch indices for each instance
123
- ni (int): Current training iteration number used for filename
124
-
125
- The function saves the plotted batch as an image in the trainer's save directory with the filename
126
- 'train_batch{ni}.jpg', where ni is the iteration number.
127
- """
128
- images = batch["img"]
129
- kpts = batch["keypoints"]
130
- cls = batch["cls"].squeeze(-1)
131
- bboxes = batch["bboxes"]
132
- paths = batch["im_file"]
133
- batch_idx = batch["batch_idx"]
134
- plot_images(
135
- images,
136
- batch_idx,
137
- cls,
138
- bboxes,
139
- kpts=kpts,
140
- paths=paths,
141
- fname=self.save_dir / f"train_batch{ni}.jpg",
142
- on_plot=self.on_plot,
143
- )
144
-
145
111
  def plot_metrics(self):
146
112
  """Plot training/validation metrics."""
147
113
  plot_results(file=self.csv, pose=True, on_plot=self.on_plot) # save results.png