ultralytics 8.1.6__py3-none-any.whl → 8.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (43) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/cfg/__init__.py +1 -1
  3. ultralytics/data/converter.py +5 -2
  4. ultralytics/data/dataset.py +9 -4
  5. ultralytics/data/explorer/explorer.py +5 -2
  6. ultralytics/engine/exporter.py +17 -3
  7. ultralytics/engine/model.py +355 -81
  8. ultralytics/engine/results.py +94 -43
  9. ultralytics/engine/trainer.py +7 -3
  10. ultralytics/hub/__init__.py +6 -3
  11. ultralytics/hub/auth.py +2 -2
  12. ultralytics/hub/session.py +2 -2
  13. ultralytics/models/sam/amg.py +4 -2
  14. ultralytics/models/sam/modules/decoders.py +1 -1
  15. ultralytics/models/sam/modules/tiny_encoder.py +1 -1
  16. ultralytics/models/yolo/segment/predict.py +1 -1
  17. ultralytics/models/yolo/segment/val.py +6 -2
  18. ultralytics/nn/autobackend.py +6 -6
  19. ultralytics/nn/modules/head.py +11 -10
  20. ultralytics/nn/tasks.py +11 -2
  21. ultralytics/solutions/distance_calculation.py +5 -17
  22. ultralytics/solutions/heatmap.py +2 -1
  23. ultralytics/solutions/object_counter.py +1 -2
  24. ultralytics/solutions/speed_estimation.py +1 -1
  25. ultralytics/trackers/utils/gmc.py +10 -12
  26. ultralytics/utils/__init__.py +78 -7
  27. ultralytics/utils/benchmarks.py +1 -2
  28. ultralytics/utils/callbacks/mlflow.py +6 -2
  29. ultralytics/utils/checks.py +2 -2
  30. ultralytics/utils/loss.py +7 -2
  31. ultralytics/utils/metrics.py +4 -4
  32. ultralytics/utils/ops.py +0 -1
  33. ultralytics/utils/plotting.py +63 -5
  34. ultralytics/utils/tal.py +2 -2
  35. ultralytics/utils/torch_utils.py +2 -2
  36. ultralytics/utils/triton.py +1 -1
  37. ultralytics/utils/tuner.py +1 -1
  38. {ultralytics-8.1.6.dist-info → ultralytics-8.1.12.dist-info}/METADATA +4 -4
  39. {ultralytics-8.1.6.dist-info → ultralytics-8.1.12.dist-info}/RECORD +43 -43
  40. {ultralytics-8.1.6.dist-info → ultralytics-8.1.12.dist-info}/LICENSE +0 -0
  41. {ultralytics-8.1.6.dist-info → ultralytics-8.1.12.dist-info}/WHEEL +0 -0
  42. {ultralytics-8.1.6.dist-info → ultralytics-8.1.12.dist-info}/entry_points.txt +0 -0
  43. {ultralytics-8.1.6.dist-info → ultralytics-8.1.12.dist-info}/top_level.txt +0 -0
@@ -67,30 +67,45 @@ class Results(SimpleClass):
67
67
  """
68
68
  A class for storing and manipulating inference results.
69
69
 
70
- Args:
71
- orig_img (numpy.ndarray): The original image as a numpy array.
72
- path (str): The path to the image file.
73
- names (dict): A dictionary of class names.
74
- boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection.
75
- masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image.
76
- probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
77
- keypoints (List[List[float]], optional): A list of detected keypoints for each object.
78
-
79
70
  Attributes:
80
- orig_img (numpy.ndarray): The original image as a numpy array.
81
- orig_shape (tuple): The original image shape in (height, width) format.
82
- boxes (Boxes, optional): A Boxes object containing the detection bounding boxes.
83
- masks (Masks, optional): A Masks object containing the detection masks.
84
- probs (Probs, optional): A Probs object containing probabilities of each class for classification task.
85
- keypoints (Keypoints, optional): A Keypoints object containing detected keypoints for each object.
86
- speed (dict): A dictionary of preprocess, inference, and postprocess speeds in milliseconds per image.
87
- names (dict): A dictionary of class names.
88
- path (str): The path to the image file.
89
- _keys (tuple): A tuple of attribute names for non-empty attributes.
71
+ orig_img (numpy.ndarray): Original image as a numpy array.
72
+ orig_shape (tuple): Original image shape in (height, width) format.
73
+ boxes (Boxes, optional): Object containing detection bounding boxes.
74
+ masks (Masks, optional): Object containing detection masks.
75
+ probs (Probs, optional): Object containing class probabilities for classification tasks.
76
+ keypoints (Keypoints, optional): Object containing detected keypoints for each object.
77
+ speed (dict): Dictionary of preprocess, inference, and postprocess speeds (ms/image).
78
+ names (dict): Dictionary of class names.
79
+ path (str): Path to the image file.
80
+
81
+ Methods:
82
+ update(boxes=None, masks=None, probs=None, obb=None): Updates object attributes with new detection results.
83
+ cpu(): Returns a copy of the Results object with all tensors on CPU memory.
84
+ numpy(): Returns a copy of the Results object with all tensors as numpy arrays.
85
+ cuda(): Returns a copy of the Results object with all tensors on GPU memory.
86
+ to(*args, **kwargs): Returns a copy of the Results object with tensors on a specified device and dtype.
87
+ new(): Returns a new Results object with the same image, path, and names.
88
+ plot(...): Plots detection results on an input image, returning an annotated image.
89
+ verbose(): Returns a log string for each task, detailing detections and classifications.
90
+ save_txt(txt_file, save_conf=False): Saves detection results to a text file.
91
+ save_crop(save_dir, file_name=Path("im.jpg")): Saves cropped detection images.
92
+ tojson(normalize=False): Converts detection results to JSON format.
90
93
  """
91
94
 
92
95
  def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None) -> None:
93
- """Initialize the Results class."""
96
+ """
97
+ Initialize the Results class.
98
+
99
+ Args:
100
+ orig_img (numpy.ndarray): The original image as a numpy array.
101
+ path (str): The path to the image file.
102
+ names (dict): A dictionary of class names.
103
+ boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection.
104
+ masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image.
105
+ probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
106
+ keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection.
107
+ obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection.
108
+ """
94
109
  self.orig_img = orig_img
95
110
  self.orig_shape = orig_img.shape[:2]
96
111
  self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
@@ -181,6 +196,9 @@ class Results(SimpleClass):
181
196
  boxes=True,
182
197
  masks=True,
183
198
  probs=True,
199
+ show=False,
200
+ save=False,
201
+ filename=None,
184
202
  ):
185
203
  """
186
204
  Plots the detection results on an input RGB image. Accepts a numpy array (cv2) or a PIL Image.
@@ -199,6 +217,9 @@ class Results(SimpleClass):
199
217
  boxes (bool): Whether to plot the bounding boxes.
200
218
  masks (bool): Whether to plot the masks.
201
219
  probs (bool): Whether to plot classification probability
220
+ show (bool): Whether to display the annotated image directly.
221
+ save (bool): Whether to save the annotated image to `filename`.
222
+ filename (str): Filename to save image to if save is True.
202
223
 
203
224
  Returns:
204
225
  (numpy.ndarray): A numpy array of the annotated image.
@@ -268,8 +289,27 @@ class Results(SimpleClass):
268
289
  for k in reversed(self.keypoints.data):
269
290
  annotator.kpts(k, self.orig_shape, radius=kpt_radius, kpt_line=kpt_line)
270
291
 
292
+ # Show results
293
+ if show:
294
+ annotator.show(self.path)
295
+
296
+ # Save results
297
+ if save:
298
+ annotator.save(filename)
299
+
271
300
  return annotator.result()
272
301
 
302
+ def show(self, *args, **kwargs):
303
+ """Show annotated results image."""
304
+ self.plot(show=True, *args, **kwargs)
305
+
306
+ def save(self, filename=None, *args, **kwargs):
307
+ """Save annotated results image."""
308
+ if not filename:
309
+ filename = f"results_{Path(self.path).name}"
310
+ self.plot(save=True, filename=filename, *args, **kwargs)
311
+ return filename
312
+
273
313
  def verbose(self):
274
314
  """Return log string for each task."""
275
315
  log_string = ""
@@ -377,33 +417,41 @@ class Results(SimpleClass):
377
417
 
378
418
  class Boxes(BaseTensor):
379
419
  """
380
- A class for storing and manipulating detection boxes.
381
-
382
- Args:
383
- boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
384
- with shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values.
385
- If present, the third last column contains track IDs.
386
- orig_shape (tuple): Original image size, in the format (height, width).
420
+ Manages detection boxes, providing easy access and manipulation of box coordinates, confidence scores, class
421
+ identifiers, and optional tracking IDs. Supports multiple formats for box coordinates, including both absolute and
422
+ normalized forms.
387
423
 
388
424
  Attributes:
389
- xyxy (torch.Tensor | numpy.ndarray): The boxes in xyxy format.
390
- conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
391
- cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
392
- id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
393
- xywh (torch.Tensor | numpy.ndarray): The boxes in xywh format.
394
- xyxyn (torch.Tensor | numpy.ndarray): The boxes in xyxy format normalized by original image size.
395
- xywhn (torch.Tensor | numpy.ndarray): The boxes in xywh format normalized by original image size.
396
- data (torch.Tensor): The raw bboxes tensor (alias for `boxes`).
425
+ data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
426
+ orig_shape (tuple): The original image size as a tuple (height, width), used for normalization.
427
+ is_track (bool): Indicates whether tracking IDs are included in the box data.
428
+
429
+ Properties:
430
+ xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
431
+ conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
432
+ cls (torch.Tensor | numpy.ndarray): Class labels for each box.
433
+ id (torch.Tensor | numpy.ndarray, optional): Tracking IDs for each box, if available.
434
+ xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, calculated on demand.
435
+ xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes, relative to `orig_shape`.
436
+ xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes, relative to `orig_shape`.
397
437
 
398
438
  Methods:
399
- cpu(): Move the object to CPU memory.
400
- numpy(): Convert the object to a numpy array.
401
- cuda(): Move the object to CUDA memory.
402
- to(*args, **kwargs): Move the object to the specified device.
439
+ cpu(): Moves the boxes to CPU memory.
440
+ numpy(): Converts the boxes to a numpy array format.
441
+ cuda(): Moves the boxes to CUDA (GPU) memory.
442
+ to(device, dtype=None): Moves the boxes to the specified device.
403
443
  """
404
444
 
405
445
  def __init__(self, boxes, orig_shape) -> None:
406
- """Initialize the Boxes class."""
446
+ """
447
+ Initialize the Boxes class.
448
+
449
+ Args:
450
+ boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, with
451
+ shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values.
452
+ If present, the third last column contains track IDs.
453
+ orig_shape (tuple): Original image size, in the format (height, width).
454
+ """
407
455
  if boxes.ndim == 1:
408
456
  boxes = boxes[None, :]
409
457
  n = boxes.shape[-1]
@@ -607,7 +655,7 @@ class OBB(BaseTensor):
607
655
  conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
608
656
  cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
609
657
  id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
610
- xyxyxyxyn (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format normalized by original image size.
658
+ xyxyxyxyn (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format normalized by orig image size.
611
659
  xyxyxyxy (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format.
612
660
  xyxy (torch.Tensor | numpy.ndarray): The horizontal boxes in xyxyxyxy format.
613
661
  data (torch.Tensor): The raw OBB tensor (alias for `boxes`).
@@ -667,8 +715,11 @@ class OBB(BaseTensor):
667
715
  @property
668
716
  @lru_cache(maxsize=2)
669
717
  def xyxy(self):
670
- """Return the horizontal boxes in xyxy format, (N, 4)."""
671
- # This way to fit both torch and numpy version
718
+ """
719
+ Return the horizontal boxes in xyxy format, (N, 4).
720
+
721
+ Accepts both torch and numpy boxes.
722
+ """
672
723
  x1 = self.xyxyxyxy[..., 0].min(1).values
673
724
  x2 = self.xyxyxyxy[..., 0].max(1).values
674
725
  y1 = self.xyxyxyxy[..., 1].min(1).values
@@ -400,7 +400,7 @@ class BaseTrainer:
400
400
 
401
401
  # Log
402
402
  mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G" # (GB)
403
- loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1
403
+ loss_len = self.tloss.shape[0] if len(self.tloss.shape) else 1
404
404
  losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
405
405
  if RANK in (-1, 0):
406
406
  pbar.set_description(
@@ -563,8 +563,12 @@ class BaseTrainer:
563
563
  raise NotImplementedError("build_dataset function not implemented in trainer")
564
564
 
565
565
  def label_loss_items(self, loss_items=None, prefix="train"):
566
- """Returns a loss dict with labelled training loss items tensor."""
567
- # Not needed for classification but necessary for segmentation & detection
566
+ """
567
+ Returns a loss dict with labelled training loss items tensor.
568
+
569
+ Note:
570
+ This is not needed for classification but necessary for segmentation & detection
571
+ """
568
572
  return {"loss": loss_items} if loss_items is not None else ["loss"]
569
573
 
570
574
  def set_model_attributes(self):
@@ -12,13 +12,16 @@ def login(api_key: str = None, save=True) -> bool:
12
12
  """
13
13
  Log in to the Ultralytics HUB API using the provided API key.
14
14
 
15
- The session is not stored; a new session is created when needed using the saved SETTINGS or the HUB_API_KEY environment variable if successfully authenticated.
15
+ The session is not stored; a new session is created when needed using the saved SETTINGS or the HUB_API_KEY
16
+ environment variable if successfully authenticated.
16
17
 
17
18
  Args:
18
- api_key (str, optional): The API key to use for authentication. If not provided, it will be retrieved from SETTINGS or HUB_API_KEY environment variable.
19
+ api_key (str, optional): API key to use for authentication.
20
+ If not provided, it will be retrieved from SETTINGS or HUB_API_KEY environment variable.
19
21
  save (bool, optional): Whether to save the API key to SETTINGS if authentication is successful.
22
+
20
23
  Returns:
21
- bool: True if authentication is successful, False otherwise.
24
+ (bool): True if authentication is successful, False otherwise.
22
25
  """
23
26
  checks.check_requirements("hub-sdk>=0.0.2")
24
27
  from hub_sdk import HUBClient
ultralytics/hub/auth.py CHANGED
@@ -87,7 +87,7 @@ class Auth:
87
87
  Attempt to authenticate with the server using either id_token or API key.
88
88
 
89
89
  Returns:
90
- bool: True if authentication is successful, False otherwise.
90
+ (bool): True if authentication is successful, False otherwise.
91
91
  """
92
92
  try:
93
93
  if header := self.get_auth_header():
@@ -107,7 +107,7 @@ class Auth:
107
107
  supported browser.
108
108
 
109
109
  Returns:
110
- bool: True if authentication is successful, False otherwise.
110
+ (bool): True if authentication is successful, False otherwise.
111
111
  """
112
112
  if not is_colab():
113
113
  return False # Currently only works with Colab
@@ -277,7 +277,7 @@ class HUBTrainingSession:
277
277
  timeout: The maximum timeout duration.
278
278
 
279
279
  Returns:
280
- str: The retry message.
280
+ (str): The retry message.
281
281
  """
282
282
  if self._should_retry(response.status_code):
283
283
  return f"Retrying {retry}x for {timeout}s." if retry else ""
@@ -341,7 +341,7 @@ class HUBTrainingSession:
341
341
  response (requests.Response): The response object from the file download request.
342
342
 
343
343
  Returns:
344
- (None)
344
+ None
345
345
  """
346
346
  with TQDM(total=content_length, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
347
347
  for data in response.iter_content(chunk_size=1024):
@@ -35,9 +35,11 @@ def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, thresh
35
35
 
36
36
  The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high
37
37
  and low values.
38
+
39
+ Notes:
40
+ - One mask is always contained inside the other.
41
+ - Save memory by preventing unnecessary cast to torch.int64
38
42
  """
39
- # One mask is always contained inside the other.
40
- # Save memory by preventing unnecessary cast to torch.int64
41
43
  intersections = (masks > (mask_threshold + threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, dtype=torch.int32)
42
44
  unions = (masks > (mask_threshold - threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, dtype=torch.int32)
43
45
  return intersections / unions
@@ -121,7 +121,7 @@ class MaskDecoder(nn.Module):
121
121
  """
122
122
  # Concatenate output tokens
123
123
  output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
124
- output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)
124
+ output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.shape[0], -1, -1)
125
125
  tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
126
126
 
127
127
  # Expand per-image data in batch direction to be per-mask
@@ -732,7 +732,7 @@ class TinyViT(nn.Module):
732
732
  for i in range(start_i, len(self.layers)):
733
733
  layer = self.layers[i]
734
734
  x = layer(x)
735
- B, _, C = x.size()
735
+ B, _, C = x.shape
736
736
  x = x.view(B, 64, 64, C)
737
737
  x = x.permute(0, 3, 1, 2)
738
738
  return self.neck(x)
@@ -41,7 +41,7 @@ class SegmentationPredictor(DetectionPredictor):
41
41
  orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
42
42
 
43
43
  results = []
44
- proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
44
+ proto = preds[1][-1] if isinstance(preds[1], tuple) else preds[1] # tuple if PyTorch model or array if exported
45
45
  for i, pred in enumerate(p):
46
46
  orig_img = orig_imgs[i]
47
47
  img_path = self.batch[0][i]
@@ -215,8 +215,12 @@ class SegmentationValidator(DetectionValidator):
215
215
  self.plot_masks.clear()
216
216
 
217
217
  def pred_to_json(self, predn, filename, pred_masks):
218
- """Save one JSON result."""
219
- # Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
218
+ """
219
+ Save one JSON result.
220
+
221
+ Examples:
222
+ >>> result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
223
+ """
220
224
  from pycocotools.mask import encode # noqa
221
225
 
222
226
  def single_encode(x):
@@ -508,9 +508,6 @@ class AutoBackend(nn.Module):
508
508
 
509
509
  Args:
510
510
  imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width)
511
-
512
- Returns:
513
- (None): This method runs the forward pass and don't return any value
514
511
  """
515
512
  warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
516
513
  if any(warmup_types) and (self.device.type != "cpu" or self.triton):
@@ -521,13 +518,16 @@ class AutoBackend(nn.Module):
521
518
  @staticmethod
522
519
  def _model_type(p="path/to/model.pt"):
523
520
  """
524
- This function takes a path to a model file and returns the model type.
521
+ This function takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml,
522
+ engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
525
523
 
526
524
  Args:
527
525
  p: path to the model file. Defaults to path/to/model.pt
526
+
527
+ Examples:
528
+ >>> model = AutoBackend(weights="path/to/model.onnx")
529
+ >>> model_type = model._model_type() # returns "onnx"
528
530
  """
529
- # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
530
- # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
531
531
  from ultralytics.engine.exporter import export_formats
532
532
 
533
533
  sf = list(export_formats().Suffix) # export suffixes
@@ -59,16 +59,17 @@ class Detect(nn.Module):
59
59
  cls = x_cat[:, self.reg_max * 4 :]
60
60
  else:
61
61
  box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
62
- dbox = self.decode_bboxes(box)
63
62
 
64
63
  if self.export and self.format in ("tflite", "edgetpu"):
65
64
  # Precompute normalization factor to increase numerical stability
66
65
  # See https://github.com/ultralytics/ultralytics/issues/7371
67
- img_h = shape[2]
68
- img_w = shape[3]
69
- img_size = torch.tensor([img_w, img_h, img_w, img_h], device=box.device).reshape(1, 4, 1)
70
- norm = self.strides / (self.stride[0] * img_size)
71
- dbox = dist2bbox(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2], xywh=True, dim=1)
66
+ grid_h = shape[2]
67
+ grid_w = shape[3]
68
+ grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
69
+ norm = self.strides / (self.stride[0] * grid_size)
70
+ dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
71
+ else:
72
+ dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
72
73
 
73
74
  y = torch.cat((dbox, cls.sigmoid()), 1)
74
75
  return y if self.export else (y, x)
@@ -82,9 +83,9 @@ class Detect(nn.Module):
82
83
  a[-1].bias.data[:] = 1.0 # box
83
84
  b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
84
85
 
85
- def decode_bboxes(self, bboxes):
86
+ def decode_bboxes(self, bboxes, anchors):
86
87
  """Decode bounding boxes."""
87
- return dist2bbox(self.dfl(bboxes), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
88
+ return dist2bbox(bboxes, anchors, xywh=True, dim=1)
88
89
 
89
90
 
90
91
  class Segment(Detect):
@@ -139,9 +140,9 @@ class OBB(Detect):
139
140
  return x, angle
140
141
  return torch.cat([x, angle], 1) if self.export else (torch.cat([x[0], angle], 1), (x[1], angle))
141
142
 
142
- def decode_bboxes(self, bboxes):
143
+ def decode_bboxes(self, bboxes, anchors):
143
144
  """Decode rotated bounding boxes."""
144
- return dist2rbox(self.dfl(bboxes), self.angle, self.anchors.unsqueeze(0), dim=1) * self.strides
145
+ return dist2rbox(bboxes, self.angle, anchors, dim=1)
145
146
 
146
147
 
147
148
  class Pose(Detect):
ultralytics/nn/tasks.py CHANGED
@@ -631,7 +631,7 @@ def torch_safe_load(weight):
631
631
  "ultralytics.yolo.data": "ultralytics.data",
632
632
  }
633
633
  ): # for legacy 8.0 Classify and Pose models
634
- return torch.load(file, map_location="cpu"), file # load
634
+ ckpt = torch.load(file, map_location="cpu")
635
635
 
636
636
  except ModuleNotFoundError as e: # e.name is missing module name
637
637
  if e.name == "models":
@@ -651,8 +651,17 @@ def torch_safe_load(weight):
651
651
  f"run a command with an official YOLOv8 model, i.e. 'yolo predict model=yolov8n.pt'"
652
652
  )
653
653
  check_requirements(e.name) # install missing module
654
+ ckpt = torch.load(file, map_location="cpu")
654
655
 
655
- return torch.load(file, map_location="cpu"), file # load
656
+ if not isinstance(ckpt, dict):
657
+ # File is likely a YOLO instance saved with i.e. torch.save(model, "saved_model.pt")
658
+ LOGGER.warning(
659
+ f"WARNING ⚠️ The file '{weight}' appears to be improperly saved or formatted. "
660
+ f"For optimal results, use model.save('filename.pt') to correctly save YOLO models."
661
+ )
662
+ ckpt = {"model": ckpt.model}
663
+
664
+ return ckpt, file # load
656
665
 
657
666
 
658
667
  def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
@@ -121,21 +121,7 @@ class DistanceCalculation:
121
121
  centroid2 (point): Second bounding box data
122
122
  """
123
123
  pixel_distance = math.sqrt((centroid1[0] - centroid2[0]) ** 2 + (centroid1[1] - centroid2[1]) ** 2)
124
- return pixel_distance / self.pixel_per_meter
125
-
126
- def plot_distance_and_line(self, distance):
127
- """
128
- Plot the distance and line on frame
129
- Args:
130
- distance (float): Distance between two centroids
131
- """
132
- cv2.rectangle(self.im0, (15, 25), (280, 70), (255, 255, 255), -1)
133
- cv2.putText(
134
- self.im0, f"Distance : {distance:.2f}m", (20, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv2.LINE_AA
135
- )
136
- cv2.line(self.im0, self.centroids[0], self.centroids[1], self.line_color, 3)
137
- cv2.circle(self.im0, self.centroids[0], 6, self.centroid_color, -1)
138
- cv2.circle(self.im0, self.centroids[1], 6, self.centroid_color, -1)
124
+ return pixel_distance / self.pixel_per_meter, (pixel_distance / self.pixel_per_meter) * 1000
139
125
 
140
126
  def start_process(self, im0, tracks):
141
127
  """
@@ -166,8 +152,10 @@ class DistanceCalculation:
166
152
  centroid = self.calculate_centroid(self.selected_boxes[trk_id])
167
153
  self.centroids.append(centroid)
168
154
 
169
- distance = self.calculate_distance(self.centroids[0], self.centroids[1])
170
- self.plot_distance_and_line(distance)
155
+ distance_m, distance_mm = self.calculate_distance(self.centroids[0], self.centroids[1])
156
+ self.annotator.plot_distance_and_line(
157
+ distance_m, distance_mm, self.centroids, self.line_color, self.centroid_color
158
+ )
171
159
 
172
160
  self.centroids = []
173
161
 
@@ -167,9 +167,10 @@ class Heatmap:
167
167
  """
168
168
  self.im0 = im0
169
169
  if tracks[0].boxes.id is None:
170
+ self.heatmap = np.zeros((int(self.imh), int(self.imw)), dtype=np.float32)
170
171
  if self.view_img and self.env_check:
171
172
  self.display_frames()
172
- return
173
+ return im0
173
174
  self.heatmap *= self.decay_factor # decay factor
174
175
  self.extract_results(tracks)
175
176
  self.annotator = Annotator(self.im0, self.count_txt_thickness, None)
@@ -136,7 +136,6 @@ class ObjectCounter:
136
136
  cv2.EVENT_FLAG_SHIFTKEY, etc.).
137
137
  params (dict): Additional parameters you may want to pass to the function.
138
138
  """
139
- # global is_drawing, selected_point
140
139
  if event == cv2.EVENT_LBUTTONDOWN:
141
140
  for i, point in enumerate(self.reg_pts):
142
141
  if (
@@ -257,7 +256,7 @@ class ObjectCounter:
257
256
  if tracks[0].boxes.id is None:
258
257
  if self.view_img:
259
258
  self.display_frames()
260
- return
259
+ return im0
261
260
  self.extract_and_process_tracks(tracks)
262
261
 
263
262
  if self.view_img:
@@ -164,7 +164,7 @@ class SpeedEstimator:
164
164
  if tracks[0].boxes.id is None:
165
165
  if self.view_img and self.env_check:
166
166
  self.display_frames()
167
- return
167
+ return im0
168
168
  self.extract_tracks(tracks)
169
169
 
170
170
  self.annotator = Annotator(self.im0, line_width=2)
@@ -97,19 +97,18 @@ class GMC:
97
97
  if self.method in ["orb", "sift"]:
98
98
  return self.applyFeatures(raw_frame, detections)
99
99
  elif self.method == "ecc":
100
- return self.applyEcc(raw_frame, detections)
100
+ return self.applyEcc(raw_frame)
101
101
  elif self.method == "sparseOptFlow":
102
- return self.applySparseOptFlow(raw_frame, detections)
102
+ return self.applySparseOptFlow(raw_frame)
103
103
  else:
104
104
  return np.eye(2, 3)
105
105
 
106
- def applyEcc(self, raw_frame: np.array, detections: list = None) -> np.array:
106
+ def applyEcc(self, raw_frame: np.array) -> np.array:
107
107
  """
108
108
  Apply ECC algorithm to a raw frame.
109
109
 
110
110
  Args:
111
111
  raw_frame (np.array): The raw frame to be processed.
112
- detections (list): List of detections to be used in the processing.
113
112
 
114
113
  Returns:
115
114
  (np.array): Processed frame.
@@ -144,7 +143,7 @@ class GMC:
144
143
  # Run the ECC algorithm. The results are stored in warp_matrix.
145
144
  # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
146
145
  try:
147
- (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
146
+ (_, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
148
147
  except Exception as e:
149
148
  LOGGER.warning(f"WARNING: find transform failed. Set warp as identity {e}")
150
149
 
@@ -258,7 +257,7 @@ class GMC:
258
257
  # import matplotlib.pyplot as plt
259
258
  # matches_img = np.hstack((self.prevFrame, frame))
260
259
  # matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
261
- # W = np.size(self.prevFrame, 1)
260
+ # W = self.prevFrame.shape[1]
262
261
  # for m in goodMatches:
263
262
  # prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
264
263
  # curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
@@ -275,7 +274,7 @@ class GMC:
275
274
  # plt.show()
276
275
 
277
276
  # Find rigid matrix
278
- if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
277
+ if prevPoints.shape[0] > 4:
279
278
  H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
280
279
 
281
280
  # Handle downscale
@@ -292,13 +291,12 @@ class GMC:
292
291
 
293
292
  return H
294
293
 
295
- def applySparseOptFlow(self, raw_frame: np.array, detections: list = None) -> np.array:
294
+ def applySparseOptFlow(self, raw_frame: np.array) -> np.array:
296
295
  """
297
296
  Apply Sparse Optical Flow method to a raw frame.
298
297
 
299
298
  Args:
300
299
  raw_frame (np.array): The raw frame to be processed.
301
- detections (list): List of detections to be used in the processing.
302
300
 
303
301
  Returns:
304
302
  (np.array): Processed frame.
@@ -328,7 +326,7 @@ class GMC:
328
326
  return H
329
327
 
330
328
  # Find correspondences
331
- matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
329
+ matchedKeypoints, status, _ = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
332
330
 
333
331
  # Leave good correspondences only
334
332
  prevPoints = []
@@ -343,8 +341,8 @@ class GMC:
343
341
  currPoints = np.array(currPoints)
344
342
 
345
343
  # Find rigid matrix
346
- if np.size(prevPoints, 0) > 4 and np.size(prevPoints, 0) == np.size(prevPoints, 0):
347
- H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
344
+ if (prevPoints.shape[0] > 4) and (prevPoints.shape[0] == prevPoints.shape[0]):
345
+ H, _ = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
348
346
 
349
347
  if self.downscale > 1.0:
350
348
  H[0, 2] *= self.downscale