ultralytics 8.3.163__py3-none-any.whl → 8.3.164__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/data/augment.py +182 -153
  3. ultralytics/data/build.py +23 -3
  4. ultralytics/data/dataset.py +6 -2
  5. ultralytics/data/loaders.py +2 -2
  6. ultralytics/data/utils.py +9 -7
  7. ultralytics/engine/exporter.py +7 -3
  8. ultralytics/engine/results.py +42 -42
  9. ultralytics/models/fastsam/model.py +1 -1
  10. ultralytics/models/fastsam/predict.py +1 -1
  11. ultralytics/models/sam/model.py +4 -4
  12. ultralytics/models/sam/modules/blocks.py +5 -5
  13. ultralytics/models/sam/modules/memory_attention.py +19 -19
  14. ultralytics/models/sam/modules/transformer.py +24 -22
  15. ultralytics/models/yolo/detect/val.py +2 -2
  16. ultralytics/models/yolo/world/train_world.py +9 -1
  17. ultralytics/solutions/distance_calculation.py +1 -1
  18. ultralytics/solutions/instance_segmentation.py +2 -2
  19. ultralytics/solutions/object_blurrer.py +2 -2
  20. ultralytics/solutions/object_counter.py +2 -2
  21. ultralytics/solutions/object_cropper.py +1 -1
  22. ultralytics/solutions/queue_management.py +1 -1
  23. ultralytics/solutions/security_alarm.py +2 -2
  24. ultralytics/solutions/templates/similarity-search.html +0 -24
  25. ultralytics/solutions/vision_eye.py +1 -1
  26. ultralytics/utils/benchmarks.py +2 -2
  27. ultralytics/utils/export.py +0 -2
  28. ultralytics/utils/instance.py +32 -25
  29. ultralytics/utils/ops.py +8 -8
  30. {ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/METADATA +1 -1
  31. {ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/RECORD +35 -35
  32. {ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/WHEEL +0 -0
  33. {ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/entry_points.txt +0 -0
  34. {ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/licenses/LICENSE +0 -0
  35. {ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ import copy
4
4
  from typing import Optional
5
5
 
6
6
  import torch
7
- from torch import Tensor, nn
7
+ from torch import nn
8
8
 
9
9
  from .blocks import RoPEAttention
10
10
 
@@ -103,7 +103,7 @@ class MemoryAttentionLayer(nn.Module):
103
103
  self.pos_enc_at_cross_attn_queries = pos_enc_at_cross_attn_queries
104
104
  self.pos_enc_at_cross_attn_keys = pos_enc_at_cross_attn_keys
105
105
 
106
- def _forward_sa(self, tgt: Tensor, query_pos: Optional[Tensor]) -> Tensor:
106
+ def _forward_sa(self, tgt: torch.Tensor, query_pos: Optional[torch.Tensor]) -> torch.Tensor:
107
107
  """Perform self-attention on input tensor using positional encoding and RoPE attention mechanism."""
108
108
  tgt2 = self.norm1(tgt)
109
109
  q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
@@ -113,12 +113,12 @@ class MemoryAttentionLayer(nn.Module):
113
113
 
114
114
  def _forward_ca(
115
115
  self,
116
- tgt: Tensor,
117
- memory: Tensor,
118
- query_pos: Optional[Tensor],
119
- pos: Optional[Tensor],
116
+ tgt: torch.Tensor,
117
+ memory: torch.Tensor,
118
+ query_pos: Optional[torch.Tensor],
119
+ pos: Optional[torch.Tensor],
120
120
  num_k_exclude_rope: int = 0,
121
- ) -> Tensor:
121
+ ) -> torch.Tensor:
122
122
  """Perform cross-attention between target and memory tensors using RoPEAttention mechanism."""
123
123
  kwds = {}
124
124
  if num_k_exclude_rope > 0:
@@ -138,20 +138,20 @@ class MemoryAttentionLayer(nn.Module):
138
138
 
139
139
  def forward(
140
140
  self,
141
- tgt: Tensor,
142
- memory: Tensor,
143
- pos: Optional[Tensor] = None,
144
- query_pos: Optional[Tensor] = None,
141
+ tgt: torch.Tensor,
142
+ memory: torch.Tensor,
143
+ pos: Optional[torch.Tensor] = None,
144
+ query_pos: Optional[torch.Tensor] = None,
145
145
  num_k_exclude_rope: int = 0,
146
146
  ) -> torch.Tensor:
147
147
  """
148
148
  Process input tensors through self-attention, cross-attention, and feedforward network layers.
149
149
 
150
150
  Args:
151
- tgt (Tensor): Target tensor for self-attention with shape (N, L, D).
152
- memory (Tensor): Memory tensor for cross-attention with shape (N, S, D).
153
- pos (Optional[Tensor]): Positional encoding for memory tensor.
154
- query_pos (Optional[Tensor]): Positional encoding for target tensor.
151
+ tgt (torch.Tensor): Target tensor for self-attention with shape (N, L, D).
152
+ memory (torch.Tensor): Memory tensor for cross-attention with shape (N, S, D).
153
+ pos (Optional[torch.Tensor]): Positional encoding for memory tensor.
154
+ query_pos (Optional[torch.Tensor]): Positional encoding for target tensor.
155
155
  num_k_exclude_rope (int): Number of keys to exclude from rotary position embedding.
156
156
 
157
157
  Returns:
@@ -242,8 +242,8 @@ class MemoryAttention(nn.Module):
242
242
  self,
243
243
  curr: torch.Tensor, # self-attention inputs
244
244
  memory: torch.Tensor, # cross-attention inputs
245
- curr_pos: Optional[Tensor] = None, # pos_enc for self-attention inputs
246
- memory_pos: Optional[Tensor] = None, # pos_enc for cross-attention inputs
245
+ curr_pos: Optional[torch.Tensor] = None, # pos_enc for self-attention inputs
246
+ memory_pos: Optional[torch.Tensor] = None, # pos_enc for cross-attention inputs
247
247
  num_obj_ptr_tokens: int = 0, # number of object pointer *tokens*
248
248
  ) -> torch.Tensor:
249
249
  """
@@ -252,8 +252,8 @@ class MemoryAttention(nn.Module):
252
252
  Args:
253
253
  curr (torch.Tensor): Self-attention input tensor, representing the current state.
254
254
  memory (torch.Tensor): Cross-attention input tensor, representing memory information.
255
- curr_pos (Optional[Tensor]): Positional encoding for self-attention inputs.
256
- memory_pos (Optional[Tensor]): Positional encoding for cross-attention inputs.
255
+ curr_pos (Optional[torch.Tensor]): Positional encoding for self-attention inputs.
256
+ memory_pos (Optional[torch.Tensor]): Positional encoding for cross-attention inputs.
257
257
  num_obj_ptr_tokens (int): Number of object pointer tokens to exclude from rotary position embedding.
258
258
 
259
259
  Returns:
@@ -82,21 +82,21 @@ class TwoWayTransformer(nn.Module):
82
82
 
83
83
  def forward(
84
84
  self,
85
- image_embedding: Tensor,
86
- image_pe: Tensor,
87
- point_embedding: Tensor,
88
- ) -> Tuple[Tensor, Tensor]:
85
+ image_embedding: torch.Tensor,
86
+ image_pe: torch.Tensor,
87
+ point_embedding: torch.Tensor,
88
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
89
89
  """
90
90
  Process image and point embeddings through the Two-Way Transformer.
91
91
 
92
92
  Args:
93
- image_embedding (Tensor): Image to attend to, with shape (B, embedding_dim, H, W).
94
- image_pe (Tensor): Positional encoding to add to the image, with same shape as image_embedding.
95
- point_embedding (Tensor): Embedding to add to query points, with shape (B, N_points, embedding_dim).
93
+ image_embedding (torch.Tensor): Image to attend to, with shape (B, embedding_dim, H, W).
94
+ image_pe (torch.Tensor): Positional encoding to add to the image, with same shape as image_embedding.
95
+ point_embedding (torch.Tensor): Embedding to add to query points, with shape (B, N_points, embedding_dim).
96
96
 
97
97
  Returns:
98
- queries (Tensor): Processed point embeddings with shape (B, N_points, embedding_dim).
99
- keys (Tensor): Processed image embeddings with shape (B, H*W, embedding_dim).
98
+ queries (torch.Tensor): Processed point embeddings with shape (B, N_points, embedding_dim).
99
+ keys (torch.Tensor): Processed image embeddings with shape (B, H*W, embedding_dim).
100
100
  """
101
101
  # BxCxHxW -> BxHWxC == B x N_image_tokens x C
102
102
  image_embedding = image_embedding.flatten(2).permute(0, 2, 1)
@@ -196,19 +196,21 @@ class TwoWayAttentionBlock(nn.Module):
196
196
 
197
197
  self.skip_first_layer_pe = skip_first_layer_pe
198
198
 
199
- def forward(self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor) -> Tuple[Tensor, Tensor]:
199
+ def forward(
200
+ self, queries: torch.Tensor, keys: torch.Tensor, query_pe: torch.Tensor, key_pe: torch.Tensor
201
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
200
202
  """
201
203
  Apply two-way attention to process query and key embeddings in a transformer block.
202
204
 
203
205
  Args:
204
- queries (Tensor): Query embeddings with shape (B, N_queries, embedding_dim).
205
- keys (Tensor): Key embeddings with shape (B, N_keys, embedding_dim).
206
- query_pe (Tensor): Positional encodings for queries with same shape as queries.
207
- key_pe (Tensor): Positional encodings for keys with same shape as keys.
206
+ queries (torch.Tensor): Query embeddings with shape (B, N_queries, embedding_dim).
207
+ keys (torch.Tensor): Key embeddings with shape (B, N_keys, embedding_dim).
208
+ query_pe (torch.Tensor): Positional encodings for queries with same shape as queries.
209
+ key_pe (torch.Tensor): Positional encodings for keys with same shape as keys.
208
210
 
209
211
  Returns:
210
- queries (Tensor): Processed query embeddings with shape (B, N_queries, embedding_dim).
211
- keys (Tensor): Processed key embeddings with shape (B, N_keys, embedding_dim).
212
+ queries (torch.Tensor): Processed query embeddings with shape (B, N_queries, embedding_dim).
213
+ keys (torch.Tensor): Processed key embeddings with shape (B, N_keys, embedding_dim).
212
214
  """
213
215
  # Self attention block
214
216
  if self.skip_first_layer_pe:
@@ -304,7 +306,7 @@ class Attention(nn.Module):
304
306
  self.out_proj = nn.Linear(self.internal_dim, embedding_dim)
305
307
 
306
308
  @staticmethod
307
- def _separate_heads(x: Tensor, num_heads: int) -> Tensor:
309
+ def _separate_heads(x: torch.Tensor, num_heads: int) -> torch.Tensor:
308
310
  """Separate the input tensor into the specified number of attention heads."""
309
311
  b, n, c = x.shape
310
312
  x = x.reshape(b, n, num_heads, c // num_heads)
@@ -317,17 +319,17 @@ class Attention(nn.Module):
317
319
  x = x.transpose(1, 2)
318
320
  return x.reshape(b, n_tokens, n_heads * c_per_head) # B x N_tokens x C
319
321
 
320
- def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
322
+ def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
321
323
  """
322
324
  Apply multi-head attention to query, key, and value tensors with optional downsampling.
323
325
 
324
326
  Args:
325
- q (Tensor): Query tensor with shape (B, N_q, embedding_dim).
326
- k (Tensor): Key tensor with shape (B, N_k, embedding_dim).
327
- v (Tensor): Value tensor with shape (B, N_k, embedding_dim).
327
+ q (torch.Tensor): Query tensor with shape (B, N_q, embedding_dim).
328
+ k (torch.Tensor): Key tensor with shape (B, N_k, embedding_dim).
329
+ v (torch.Tensor): Value tensor with shape (B, N_k, embedding_dim).
328
330
 
329
331
  Returns:
330
- (Tensor): Output tensor after attention with shape (B, N_q, embedding_dim).
332
+ (torch.Tensor): Output tensor after attention with shape (B, N_q, embedding_dim).
331
333
  """
332
334
  # Input projections
333
335
  q = self.q_proj(q)
@@ -450,8 +450,8 @@ class DetectionValidator(BaseValidator):
450
450
  val.summarize()
451
451
 
452
452
  # update mAP50-95 and mAP50
453
- stats[f"metrics/mAP50({suffix[i][0]})"] = val.stats_as_dict["AP_all"]
454
- stats[f"metrics/mAP50-95({suffix[i][0]})"] = val.stats_as_dict["AP_50"]
453
+ stats[f"metrics/mAP50({suffix[i][0]})"] = val.stats_as_dict["AP_50"]
454
+ stats[f"metrics/mAP50-95({suffix[i][0]})"] = val.stats_as_dict["AP_all"]
455
455
 
456
456
  if self.is_lvis:
457
457
  stats[f"metrics/APr({suffix[i][0]})"] = val.stats_as_dict["APr"]
@@ -107,7 +107,15 @@ class WorldTrainerFromScratch(WorldTrainer):
107
107
  datasets = [
108
108
  build_yolo_dataset(self.args, im_path, batch, self.training_data[im_path], stride=gs, multi_modal=True)
109
109
  if isinstance(im_path, str)
110
- else build_grounding(self.args, im_path["img_path"], im_path["json_file"], batch, stride=gs)
110
+ else build_grounding(
111
+ # assign `nc` from validation set to max number of text samples for training consistency
112
+ self.args,
113
+ im_path["img_path"],
114
+ im_path["json_file"],
115
+ batch,
116
+ stride=gs,
117
+ max_samples=self.data["nc"],
118
+ )
111
119
  for im_path in img_path
112
120
  ]
113
121
  self.set_text_embeddings(datasets, batch) # cache text embeddings to accelerate training
@@ -76,7 +76,7 @@ class DistanceCalculation(BaseSolution):
76
76
  between two user-selected objects if they have been chosen.
77
77
 
78
78
  Args:
79
- im0 (numpy.ndarray): The input image frame to process.
79
+ im0 (np.ndarray): The input image frame to process.
80
80
 
81
81
  Returns:
82
82
  (SolutionResults): Contains processed image `plot_im`, `total_tracks` (int) representing the total number
@@ -19,7 +19,7 @@ class InstanceSegmentation(BaseSolution):
19
19
  names (Dict[int, str]): Dictionary mapping class indices to class names.
20
20
  clss (List[int]): List of detected class indices.
21
21
  track_ids (List[int]): List of track IDs for detected instances.
22
- masks (List[numpy.ndarray]): List of segmentation masks for detected instances.
22
+ masks (List[np.ndarray]): List of segmentation masks for detected instances.
23
23
  show_conf (bool): Whether to display confidence scores.
24
24
  show_labels (bool): Whether to display class labels.
25
25
  show_boxes (bool): Whether to display bounding boxes.
@@ -55,7 +55,7 @@ class InstanceSegmentation(BaseSolution):
55
55
  Perform instance segmentation on the input image and annotate the results.
56
56
 
57
57
  Args:
58
- im0 (numpy.ndarray): The input image for segmentation.
58
+ im0 (np.ndarray): The input image for segmentation.
59
59
 
60
60
  Returns:
61
61
  (SolutionResults): Object containing the annotated image and total number of tracked instances.
@@ -56,11 +56,11 @@ class ObjectBlurrer(BaseSolution):
56
56
  and annotates the image with bounding boxes.
57
57
 
58
58
  Args:
59
- im0 (numpy.ndarray): The input image containing detected objects.
59
+ im0 (np.ndarray): The input image containing detected objects.
60
60
 
61
61
  Returns:
62
62
  (SolutionResults): Object containing the processed image and number of tracked objects.
63
- - plot_im (numpy.ndarray): The annotated output image with blurred objects.
63
+ - plot_im (np.ndarray): The annotated output image with blurred objects.
64
64
  - total_tracks (int): The total number of tracked objects in the frame.
65
65
 
66
66
  Examples:
@@ -122,7 +122,7 @@ class ObjectCounter(BaseSolution):
122
122
  Display object counts on the input image or frame.
123
123
 
124
124
  Args:
125
- plot_im (numpy.ndarray): The image or frame to display counts on.
125
+ plot_im (np.ndarray): The image or frame to display counts on.
126
126
 
127
127
  Examples:
128
128
  >>> counter = ObjectCounter()
@@ -146,7 +146,7 @@ class ObjectCounter(BaseSolution):
146
146
  object counts, and displays the results on the input image.
147
147
 
148
148
  Args:
149
- im0 (numpy.ndarray): The input image or frame to be processed.
149
+ im0 (np.ndarray): The input image or frame to be processed.
150
150
 
151
151
  Returns:
152
152
  (SolutionResults): Contains processed image `im0`, 'in_count' (int, count of objects entering the region),
@@ -57,7 +57,7 @@ class ObjectCropper(BaseSolution):
57
57
  Crop detected objects from the input image and save them as separate images.
58
58
 
59
59
  Args:
60
- im0 (numpy.ndarray): The input image containing detected objects.
60
+ im0 (np.ndarray): The input image containing detected objects.
61
61
 
62
62
  Returns:
63
63
  (SolutionResults): A SolutionResults object containing the total number of cropped objects and processed
@@ -50,7 +50,7 @@ class QueueManager(BaseSolution):
50
50
  Process queue management for a single frame of video.
51
51
 
52
52
  Args:
53
- im0 (numpy.ndarray): Input image for processing, typically a frame from a video stream.
53
+ im0 (np.ndarray): Input image for processing, typically a frame from a video stream.
54
54
 
55
55
  Returns:
56
56
  (SolutionResults): Contains processed image `im0`, 'queue_count' (int, number of objects in the queue) and
@@ -76,7 +76,7 @@ class SecurityAlarm(BaseSolution):
76
76
  Send an email notification with an image attachment indicating the number of objects detected.
77
77
 
78
78
  Args:
79
- im0 (numpy.ndarray): The input image or frame to be attached to the email.
79
+ im0 (np.ndarray): The input image or frame to be attached to the email.
80
80
  records (int, optional): The number of detected objects to be included in the email message.
81
81
 
82
82
  This method encodes the input image, composes the email message with details about the detection, and sends it
@@ -121,7 +121,7 @@ class SecurityAlarm(BaseSolution):
121
121
  Monitor the frame, process object detections, and trigger alerts if thresholds are exceeded.
122
122
 
123
123
  Args:
124
- im0 (numpy.ndarray): The input image or frame to be processed and annotated.
124
+ im0 (np.ndarray): The input image or frame to be processed and annotated.
125
125
 
126
126
  Returns:
127
127
  (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (total number of tracked objects) and
@@ -35,7 +35,6 @@
35
35
  align-items: center;
36
36
  gap: 1rem;
37
37
  margin-bottom: 3rem;
38
- animation: fadeIn 1s ease-in-out;
39
38
  }
40
39
 
41
40
  input[type="text"] {
@@ -78,7 +77,6 @@
78
77
  gap: 1.5rem;
79
78
  max-width: 1600px;
80
79
  margin: auto;
81
- animation: fadeInUp 1s ease-in-out;
82
80
  }
83
81
 
84
82
  .card {
@@ -102,28 +100,6 @@
102
100
  object-fit: cover;
103
101
  display: block;
104
102
  }
105
-
106
- @keyframes fadeIn {
107
- 0% {
108
- opacity: 0;
109
- transform: scale(0.95);
110
- }
111
- 100% {
112
- opacity: 1;
113
- transform: scale(1);
114
- }
115
- }
116
-
117
- @keyframes fadeInUp {
118
- 0% {
119
- opacity: 0;
120
- transform: translateY(20px);
121
- }
122
- 100% {
123
- opacity: 1;
124
- transform: translateY(0);
125
- }
126
- }
127
103
  </style>
128
104
  </head>
129
105
  <script>
@@ -42,7 +42,7 @@ class VisionEye(BaseSolution):
42
42
  Perform object detection, vision mapping, and annotation on the input image.
43
43
 
44
44
  Args:
45
- im0 (numpy.ndarray): The input image for detection and annotation.
45
+ im0 (np.ndarray): The input image for detection and annotation.
46
46
 
47
47
  Returns:
48
48
  (SolutionResults): Object containing the annotated image and tracking statistics.
@@ -520,12 +520,12 @@ class ProfileModels:
520
520
  Apply iterative sigma clipping to data to remove outliers.
521
521
 
522
522
  Args:
523
- data (numpy.ndarray): Input data array.
523
+ data (np.ndarray): Input data array.
524
524
  sigma (float): Number of standard deviations to use for clipping.
525
525
  max_iters (int): Maximum number of iterations for the clipping process.
526
526
 
527
527
  Returns:
528
- (numpy.ndarray): Clipped data array with outliers removed.
528
+ (np.ndarray): Clipped data array with outliers removed.
529
529
  """
530
530
  data = np.array(data)
531
531
  for _ in range(max_iters):
@@ -135,8 +135,6 @@ def export_engine(
135
135
  LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')
136
136
 
137
137
  if dynamic:
138
- if shape[0] <= 1:
139
- LOGGER.warning(f"{prefix} 'dynamic=True' model requires max batch size, i.e. 'batch=16'")
140
138
  profile = builder.create_optimization_profile()
141
139
  min_shape = (1, shape[1], 32, 32) # minimum input shape
142
140
  max_shape = (*shape[:2], *(int(max(2, workspace or 2) * d) for d in shape[2:])) # max input shape
@@ -3,7 +3,7 @@
3
3
  from collections import abc
4
4
  from itertools import repeat
5
5
  from numbers import Number
6
- from typing import List
6
+ from typing import List, Union
7
7
 
8
8
  import numpy as np
9
9
 
@@ -59,7 +59,7 @@ class Bboxes:
59
59
  This class does not handle normalization or denormalization of bounding boxes.
60
60
  """
61
61
 
62
- def __init__(self, bboxes, format="xyxy") -> None:
62
+ def __init__(self, bboxes: np.ndarray, format: str = "xyxy") -> None:
63
63
  """
64
64
  Initialize the Bboxes class with bounding box data in a specified format.
65
65
 
@@ -74,7 +74,7 @@ class Bboxes:
74
74
  self.bboxes = bboxes
75
75
  self.format = format
76
76
 
77
- def convert(self, format):
77
+ def convert(self, format: str) -> None:
78
78
  """
79
79
  Convert bounding box format from one type to another.
80
80
 
@@ -93,7 +93,7 @@ class Bboxes:
93
93
  self.bboxes = func(self.bboxes)
94
94
  self.format = format
95
95
 
96
- def areas(self):
96
+ def areas(self) -> np.ndarray:
97
97
  """Calculate the area of bounding boxes."""
98
98
  return (
99
99
  (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) # format xyxy
@@ -101,7 +101,7 @@ class Bboxes:
101
101
  else self.bboxes[:, 3] * self.bboxes[:, 2] # format xywh or ltwh
102
102
  )
103
103
 
104
- def mul(self, scale):
104
+ def mul(self, scale: Union[int, tuple, list]) -> None:
105
105
  """
106
106
  Multiply bounding box coordinates by scale factor(s).
107
107
 
@@ -118,7 +118,7 @@ class Bboxes:
118
118
  self.bboxes[:, 2] *= scale[2]
119
119
  self.bboxes[:, 3] *= scale[3]
120
120
 
121
- def add(self, offset):
121
+ def add(self, offset: Union[int, tuple, list]) -> None:
122
122
  """
123
123
  Add offset to bounding box coordinates.
124
124
 
@@ -135,12 +135,12 @@ class Bboxes:
135
135
  self.bboxes[:, 2] += offset[2]
136
136
  self.bboxes[:, 3] += offset[3]
137
137
 
138
- def __len__(self):
138
+ def __len__(self) -> int:
139
139
  """Return the number of bounding boxes."""
140
140
  return len(self.bboxes)
141
141
 
142
142
  @classmethod
143
- def concatenate(cls, boxes_list: List["Bboxes"], axis=0) -> "Bboxes":
143
+ def concatenate(cls, boxes_list: List["Bboxes"], axis: int = 0) -> "Bboxes":
144
144
  """
145
145
  Concatenate a list of Bboxes objects into a single Bboxes object.
146
146
 
@@ -163,7 +163,7 @@ class Bboxes:
163
163
  return boxes_list[0]
164
164
  return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis))
165
165
 
166
- def __getitem__(self, index) -> "Bboxes":
166
+ def __getitem__(self, index: Union[int, np.ndarray, slice]) -> "Bboxes":
167
167
  """
168
168
  Retrieve a specific bounding box or a set of bounding boxes using indexing.
169
169
 
@@ -220,13 +220,20 @@ class Instances:
220
220
  ... )
221
221
  """
222
222
 
223
- def __init__(self, bboxes, segments=None, keypoints=None, bbox_format="xywh", normalized=True) -> None:
223
+ def __init__(
224
+ self,
225
+ bboxes: np.ndarray,
226
+ segments: np.ndarray = None,
227
+ keypoints: np.ndarray = None,
228
+ bbox_format: str = "xywh",
229
+ normalized: bool = True,
230
+ ) -> None:
224
231
  """
225
232
  Initialize the Instances object with bounding boxes, segments, and keypoints.
226
233
 
227
234
  Args:
228
235
  bboxes (np.ndarray): Bounding boxes with shape (N, 4).
229
- segments (List | np.ndarray, optional): Segmentation masks.
236
+ segments (np.ndarray, optional): Segmentation masks.
230
237
  keypoints (np.ndarray, optional): Keypoints with shape (N, 17, 3) in format (x, y, visible).
231
238
  bbox_format (str): Format of bboxes.
232
239
  normalized (bool): Whether the coordinates are normalized.
@@ -236,7 +243,7 @@ class Instances:
236
243
  self.normalized = normalized
237
244
  self.segments = segments
238
245
 
239
- def convert_bbox(self, format):
246
+ def convert_bbox(self, format: str) -> None:
240
247
  """
241
248
  Convert bounding box format.
242
249
 
@@ -246,11 +253,11 @@ class Instances:
246
253
  self._bboxes.convert(format=format)
247
254
 
248
255
  @property
249
- def bbox_areas(self):
256
+ def bbox_areas(self) -> np.ndarray:
250
257
  """Calculate the area of bounding boxes."""
251
258
  return self._bboxes.areas()
252
259
 
253
- def scale(self, scale_w, scale_h, bbox_only=False):
260
+ def scale(self, scale_w: float, scale_h: float, bbox_only: bool = False):
254
261
  """
255
262
  Scale coordinates by given factors.
256
263
 
@@ -268,7 +275,7 @@ class Instances:
268
275
  self.keypoints[..., 0] *= scale_w
269
276
  self.keypoints[..., 1] *= scale_h
270
277
 
271
- def denormalize(self, w, h):
278
+ def denormalize(self, w: int, h: int) -> None:
272
279
  """
273
280
  Convert normalized coordinates to absolute coordinates.
274
281
 
@@ -286,7 +293,7 @@ class Instances:
286
293
  self.keypoints[..., 1] *= h
287
294
  self.normalized = False
288
295
 
289
- def normalize(self, w, h):
296
+ def normalize(self, w: int, h: int) -> None:
290
297
  """
291
298
  Convert absolute coordinates to normalized coordinates.
292
299
 
@@ -304,7 +311,7 @@ class Instances:
304
311
  self.keypoints[..., 1] /= h
305
312
  self.normalized = True
306
313
 
307
- def add_padding(self, padw, padh):
314
+ def add_padding(self, padw: int, padh: int) -> None:
308
315
  """
309
316
  Add padding to coordinates.
310
317
 
@@ -320,7 +327,7 @@ class Instances:
320
327
  self.keypoints[..., 0] += padw
321
328
  self.keypoints[..., 1] += padh
322
329
 
323
- def __getitem__(self, index) -> "Instances":
330
+ def __getitem__(self, index: Union[int, np.ndarray, slice]) -> "Instances":
324
331
  """
325
332
  Retrieve a specific instance or a set of instances using indexing.
326
333
 
@@ -346,7 +353,7 @@ class Instances:
346
353
  normalized=self.normalized,
347
354
  )
348
355
 
349
- def flipud(self, h):
356
+ def flipud(self, h: int) -> None:
350
357
  """
351
358
  Flip coordinates vertically.
352
359
 
@@ -364,7 +371,7 @@ class Instances:
364
371
  if self.keypoints is not None:
365
372
  self.keypoints[..., 1] = h - self.keypoints[..., 1]
366
373
 
367
- def fliplr(self, w):
374
+ def fliplr(self, w: int) -> None:
368
375
  """
369
376
  Flip coordinates horizontally.
370
377
 
@@ -382,7 +389,7 @@ class Instances:
382
389
  if self.keypoints is not None:
383
390
  self.keypoints[..., 0] = w - self.keypoints[..., 0]
384
391
 
385
- def clip(self, w, h):
392
+ def clip(self, w: int, h: int) -> None:
386
393
  """
387
394
  Clip coordinates to stay within image boundaries.
388
395
 
@@ -409,7 +416,7 @@ class Instances:
409
416
  self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w)
410
417
  self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
411
418
 
412
- def remove_zero_area_boxes(self):
419
+ def remove_zero_area_boxes(self) -> np.ndarray:
413
420
  """
414
421
  Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height.
415
422
 
@@ -425,7 +432,7 @@ class Instances:
425
432
  self.keypoints = self.keypoints[good]
426
433
  return good
427
434
 
428
- def update(self, bboxes, segments=None, keypoints=None):
435
+ def update(self, bboxes: np.ndarray, segments: np.ndarray = None, keypoints: np.ndarray = None):
429
436
  """
430
437
  Update instance variables.
431
438
 
@@ -440,7 +447,7 @@ class Instances:
440
447
  if keypoints is not None:
441
448
  self.keypoints = keypoints
442
449
 
443
- def __len__(self):
450
+ def __len__(self) -> int:
444
451
  """Return the number of instances."""
445
452
  return len(self.bboxes)
446
453
 
@@ -492,6 +499,6 @@ class Instances:
492
499
  return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized)
493
500
 
494
501
  @property
495
- def bboxes(self):
502
+ def bboxes(self) -> np.ndarray:
496
503
  """Return bounding boxes."""
497
504
  return self._bboxes.bboxes
ultralytics/utils/ops.py CHANGED
@@ -343,11 +343,11 @@ def clip_boxes(boxes, shape):
343
343
  Clip bounding boxes to image boundaries.
344
344
 
345
345
  Args:
346
- boxes (torch.Tensor | numpy.ndarray): Bounding boxes to clip.
346
+ boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
347
347
  shape (tuple): Image shape as (height, width).
348
348
 
349
349
  Returns:
350
- (torch.Tensor | numpy.ndarray): Clipped bounding boxes.
350
+ (torch.Tensor | np.ndarray): Clipped bounding boxes.
351
351
  """
352
352
  if isinstance(boxes, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
353
353
  boxes[..., 0] = boxes[..., 0].clamp(0, shape[1]) # x1
@@ -365,11 +365,11 @@ def clip_coords(coords, shape):
365
365
  Clip line coordinates to image boundaries.
366
366
 
367
367
  Args:
368
- coords (torch.Tensor | numpy.ndarray): Line coordinates to clip.
368
+ coords (torch.Tensor | np.ndarray): Line coordinates to clip.
369
369
  shape (tuple): Image shape as (height, width).
370
370
 
371
371
  Returns:
372
- (torch.Tensor | numpy.ndarray): Clipped coordinates.
372
+ (torch.Tensor | np.ndarray): Clipped coordinates.
373
373
  """
374
374
  if isinstance(coords, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
375
375
  coords[..., 0] = coords[..., 0].clamp(0, shape[1]) # x
@@ -564,10 +564,10 @@ def xyxyxyxy2xywhr(x):
564
564
  Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
565
565
 
566
566
  Args:
567
- x (numpy.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.
567
+ x (np.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.
568
568
 
569
569
  Returns:
570
- (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5).
570
+ (np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5).
571
571
  Rotation values are in radians from 0 to pi/2.
572
572
  """
573
573
  is_torch = isinstance(x, torch.Tensor)
@@ -587,11 +587,11 @@ def xywhr2xyxyxyxy(x):
587
587
  Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
588
588
 
589
589
  Args:
590
- x (numpy.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5).
590
+ x (np.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5).
591
591
  Rotation values should be in radians from 0 to pi/2.
592
592
 
593
593
  Returns:
594
- (numpy.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
594
+ (np.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
595
595
  """
596
596
  cos, sin, cat, stack = (
597
597
  (torch.cos, torch.sin, torch.cat, torch.stack)