ultralytics 8.2.61__py3-none-any.whl → 8.2.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

@@ -19,7 +19,28 @@ from ultralytics.utils.torch_utils import smart_inference_mode
19
19
 
20
20
 
21
21
  class BaseTensor(SimpleClass):
22
- """Base tensor class with additional methods for easy manipulation and device handling."""
22
+ """
23
+ Base tensor class with additional methods for easy manipulation and device handling.
24
+
25
+ Attributes:
26
+ data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
27
+ orig_shape (Tuple[int, int]): Original shape of the image, typically in the format (height, width).
28
+
29
+ Methods:
30
+ cpu: Return a copy of the tensor stored in CPU memory.
31
+ numpy: Returns a copy of the tensor as a numpy array.
32
+ cuda: Moves the tensor to GPU memory, returning a new instance if necessary.
33
+ to: Return a copy of the tensor with the specified device and dtype.
34
+
35
+ Examples:
36
+ >>> import torch
37
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
38
+ >>> orig_shape = (720, 1280)
39
+ >>> base_tensor = BaseTensor(data, orig_shape)
40
+ >>> cpu_tensor = base_tensor.cpu()
41
+ >>> numpy_array = base_tensor.numpy()
42
+ >>> gpu_tensor = base_tensor.cuda()
43
+ """
23
44
 
24
45
  def __init__(self, data, orig_shape) -> None:
25
46
  """
@@ -27,20 +48,13 @@ class BaseTensor(SimpleClass):
27
48
 
28
49
  Args:
29
50
  data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
30
- orig_shape (tuple): Original shape of the image, typically in the format (height, width).
31
-
32
- Returns:
33
- (None)
34
-
35
- Example:
36
- ```python
37
- import torch
38
- from ultralytics.engine.results import BaseTensor
51
+ orig_shape (Tuple[int, int]): Original shape of the image in (height, width) format.
39
52
 
40
- data = torch.tensor([[1, 2, 3], [4, 5, 6]])
41
- orig_shape = (720, 1280)
42
- base_tensor = BaseTensor(data, orig_shape)
43
- ```
53
+ Examples:
54
+ >>> import torch
55
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
56
+ >>> orig_shape = (720, 1280)
57
+ >>> base_tensor = BaseTensor(data, orig_shape)
44
58
  """
45
59
  assert isinstance(data, (torch.Tensor, np.ndarray)), "data must be torch.Tensor or np.ndarray"
46
60
  self.data = data
@@ -48,31 +62,124 @@ class BaseTensor(SimpleClass):
48
62
 
49
63
  @property
50
64
  def shape(self):
51
- """Returns the shape of the underlying data tensor for easier manipulation and device handling."""
65
+ """
66
+ Returns the shape of the underlying data tensor.
67
+
68
+ Returns:
69
+ (Tuple[int, ...]): The shape of the data tensor.
70
+
71
+ Examples:
72
+ >>> data = torch.rand(100, 4)
73
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
74
+ >>> print(base_tensor.shape)
75
+ (100, 4)
76
+ """
52
77
  return self.data.shape
53
78
 
54
79
  def cpu(self):
55
- """Return a copy of the tensor stored in CPU memory."""
80
+ """
81
+ Returns a copy of the tensor stored in CPU memory.
82
+
83
+ Returns:
84
+ (BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory.
85
+
86
+ Examples:
87
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]).cuda()
88
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
89
+ >>> cpu_tensor = base_tensor.cpu()
90
+ >>> isinstance(cpu_tensor, BaseTensor)
91
+ True
92
+ >>> cpu_tensor.data.device
93
+ device(type='cpu')
94
+ """
56
95
  return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
57
96
 
58
97
  def numpy(self):
59
- """Returns a copy of the tensor as a numpy array for efficient numerical operations."""
98
+ """
99
+ Returns a copy of the tensor as a numpy array.
100
+
101
+ Returns:
102
+ (np.ndarray): A numpy array containing the same data as the original tensor.
103
+
104
+ Examples:
105
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
106
+ >>> orig_shape = (720, 1280)
107
+ >>> base_tensor = BaseTensor(data, orig_shape)
108
+ >>> numpy_array = base_tensor.numpy()
109
+ >>> print(type(numpy_array))
110
+ <class 'numpy.ndarray'>
111
+ """
60
112
  return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
61
113
 
62
114
  def cuda(self):
63
- """Moves the tensor to GPU memory, returning a new instance if necessary."""
115
+ """
116
+ Moves the tensor to GPU memory.
117
+
118
+ Returns:
119
+ (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a
120
+ numpy array, otherwise returns self.
121
+
122
+ Examples:
123
+ >>> import torch
124
+ >>> from ultralytics.engine.results import BaseTensor
125
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
126
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
127
+ >>> gpu_tensor = base_tensor.cuda()
128
+ >>> print(gpu_tensor.data.device)
129
+ cuda:0
130
+ """
64
131
  return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
65
132
 
66
133
  def to(self, *args, **kwargs):
67
- """Return a copy of the tensor with the specified device and dtype."""
134
+ """
135
+ Return a copy of the tensor with the specified device and dtype.
136
+
137
+ Args:
138
+ *args (Any): Variable length argument list to be passed to torch.Tensor.to().
139
+ **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to().
140
+
141
+ Returns:
142
+ (BaseTensor): A new BaseTensor instance with the data moved to the specified device and/or dtype.
143
+
144
+ Examples:
145
+ >>> base_tensor = BaseTensor(torch.randn(3, 4), orig_shape=(480, 640))
146
+ >>> cuda_tensor = base_tensor.to('cuda')
147
+ >>> float16_tensor = base_tensor.to(dtype=torch.float16)
148
+ """
68
149
  return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
69
150
 
70
151
  def __len__(self): # override len(results)
71
- """Return the length of the underlying data tensor."""
152
+ """
153
+ Returns the length of the underlying data tensor.
154
+
155
+ Returns:
156
+ (int): The number of elements in the first dimension of the data tensor.
157
+
158
+ Examples:
159
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
160
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
161
+ >>> len(base_tensor)
162
+ 2
163
+ """
72
164
  return len(self.data)
73
165
 
74
166
  def __getitem__(self, idx):
75
- """Return a new BaseTensor instance containing the specified indexed elements of the data tensor."""
167
+ """
168
+ Returns a new BaseTensor instance containing the specified indexed elements of the data tensor.
169
+
170
+ Args:
171
+ idx (int | List[int] | torch.Tensor): Index or indices to select from the data tensor.
172
+
173
+ Returns:
174
+ (BaseTensor): A new BaseTensor instance containing the indexed data.
175
+
176
+ Examples:
177
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
178
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
179
+ >>> result = base_tensor[0] # Select the first row
180
+ >>> print(result.data)
181
+ tensor([1, 2, 3])
182
+ """
76
183
  return self.__class__(self.data[idx], self.orig_shape)
77
184
 
78
185
 
@@ -80,31 +187,43 @@ class Results(SimpleClass):
80
187
  """
81
188
  A class for storing and manipulating inference results.
82
189
 
190
+ This class encapsulates the functionality for handling detection, segmentation, pose estimation,
191
+ and classification results from YOLO models.
192
+
83
193
  Attributes:
84
194
  orig_img (numpy.ndarray): Original image as a numpy array.
85
- orig_shape (tuple): Original image shape in (height, width) format.
86
- boxes (Boxes, optional): Object containing detection bounding boxes.
87
- masks (Masks, optional): Object containing detection masks.
88
- probs (Probs, optional): Object containing class probabilities for classification tasks.
89
- keypoints (Keypoints, optional): Object containing detected keypoints for each object.
90
- speed (dict): Dictionary of preprocess, inference, and postprocess speeds (ms/image).
91
- names (dict): Dictionary of class names.
195
+ orig_shape (Tuple[int, int]): Original image shape in (height, width) format.
196
+ boxes (Boxes | None): Object containing detection bounding boxes.
197
+ masks (Masks | None): Object containing detection masks.
198
+ probs (Probs | None): Object containing class probabilities for classification tasks.
199
+ keypoints (Keypoints | None): Object containing detected keypoints for each object.
200
+ obb (OBB | None): Object containing oriented bounding boxes.
201
+ speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.
202
+ names (Dict[int, str]): Dictionary mapping class IDs to class names.
92
203
  path (str): Path to the image file.
204
+ _keys (Tuple[str, ...]): Tuple of attribute names for internal use.
93
205
 
94
206
  Methods:
95
- update(boxes=None, masks=None, probs=None, obb=None): Updates object attributes with new detection results.
96
- cpu(): Returns a copy of the Results object with all tensors on CPU memory.
97
- numpy(): Returns a copy of the Results object with all tensors as numpy arrays.
98
- cuda(): Returns a copy of the Results object with all tensors on GPU memory.
99
- to(*args, **kwargs): Returns a copy of the Results object with tensors on a specified device and dtype.
100
- new(): Returns a new Results object with the same image, path, and names.
101
- plot(...): Plots detection results on an input image, returning an annotated image.
102
- show(): Show annotated results to screen.
103
- save(filename): Save annotated results to file.
104
- verbose(): Returns a log string for each task, detailing detections and classifications.
105
- save_txt(txt_file, save_conf=False): Saves detection results to a text file.
106
- save_crop(save_dir, file_name=Path("im.jpg")): Saves cropped detection images.
107
- tojson(normalize=False): Converts detection results to JSON format.
207
+ update: Updates object attributes with new detection results.
208
+ cpu: Returns a copy of the Results object with all tensors on CPU memory.
209
+ numpy: Returns a copy of the Results object with all tensors as numpy arrays.
210
+ cuda: Returns a copy of the Results object with all tensors on GPU memory.
211
+ to: Returns a copy of the Results object with tensors on a specified device and dtype.
212
+ new: Returns a new Results object with the same image, path, and names.
213
+ plot: Plots detection results on an input image, returning an annotated image.
214
+ show: Shows annotated results on screen.
215
+ save: Saves annotated results to file.
216
+ verbose: Returns a log string for each task, detailing detections and classifications.
217
+ save_txt: Saves detection results to a text file.
218
+ save_crop: Saves cropped detection images.
219
+ tojson: Converts detection results to JSON format.
220
+
221
+ Examples:
222
+ >>> results = model("path/to/image.jpg")
223
+ >>> for result in results:
224
+ ... print(result.boxes) # Print detection boxes
225
+ ... result.show() # Display the annotated image
226
+ ... result.save(filename='result.jpg') # Save annotated image
108
227
  """
109
228
 
110
229
  def __init__(
@@ -116,26 +235,26 @@ class Results(SimpleClass):
116
235
  Args:
117
236
  orig_img (numpy.ndarray): The original image as a numpy array.
118
237
  path (str): The path to the image file.
119
- names (dict): A dictionary of class names.
120
- boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection.
121
- masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image.
122
- probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
123
- keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection. For default pose
124
- model, Keypoint indices for human body pose estimation are:
125
- 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear
126
- 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow
127
- 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip
128
- 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle
129
- obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection.
130
- speed (dict, optional): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
131
-
132
- Returns:
133
- None
134
-
135
- Example:
136
- ```python
137
- results = model("path/to/image.jpg")
138
- ```
238
+ names (Dict): A dictionary of class names.
239
+ boxes (torch.Tensor | None): A 2D tensor of bounding box coordinates for each detection.
240
+ masks (torch.Tensor | None): A 3D tensor of detection masks, where each mask is a binary image.
241
+ probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task.
242
+ keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection.
243
+ obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
244
+ speed (Dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
245
+
246
+ Examples:
247
+ >>> results = model("path/to/image.jpg")
248
+ >>> result = results[0] # Get the first result
249
+ >>> boxes = result.boxes # Get the boxes for the first result
250
+ >>> masks = result.masks # Get the masks for the first result
251
+
252
+ Notes:
253
+ For the default pose model, keypoint indices for human body pose estimation are:
254
+ 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear
255
+ 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow
256
+ 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip
257
+ 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle
139
258
  """
140
259
  self.orig_img = orig_img
141
260
  self.orig_shape = orig_img.shape[:2]
@@ -151,18 +270,59 @@ class Results(SimpleClass):
151
270
  self._keys = "boxes", "masks", "probs", "keypoints", "obb"
152
271
 
153
272
  def __getitem__(self, idx):
154
- """Return a Results object for a specific index of inference results."""
273
+ """
274
+ Return a Results object for a specific index of inference results.
275
+
276
+ Args:
277
+ idx (int | slice): Index or slice to retrieve from the Results object.
278
+
279
+ Returns:
280
+ (Results): A new Results object containing the specified subset of inference results.
281
+
282
+ Examples:
283
+ >>> results = model('path/to/image.jpg') # Perform inference
284
+ >>> single_result = results[0] # Get the first result
285
+ >>> subset_results = results[1:4] # Get a slice of results
286
+ """
155
287
  return self._apply("__getitem__", idx)
156
288
 
157
289
  def __len__(self):
158
- """Return the number of detections in the Results object from a non-empty attribute set (boxes, masks, etc.)."""
290
+ """
291
+ Return the number of detections in the Results object.
292
+
293
+ Returns:
294
+ (int): The number of detections, determined by the length of the first non-empty attribute
295
+ (boxes, masks, probs, keypoints, or obb).
296
+
297
+ Examples:
298
+ >>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4))
299
+ >>> len(results)
300
+ 5
301
+ """
159
302
  for k in self._keys:
160
303
  v = getattr(self, k)
161
304
  if v is not None:
162
305
  return len(v)
163
306
 
164
307
  def update(self, boxes=None, masks=None, probs=None, obb=None):
165
- """Updates detection results attributes including boxes, masks, probs, and obb with new data."""
308
+ """
309
+ Updates the Results object with new detection data.
310
+
311
+ This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the
312
+ Results object. It ensures that boxes are clipped to the original image shape.
313
+
314
+ Args:
315
+ boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and
316
+ confidence scores. The format is (x1, y1, x2, y2, conf, class).
317
+ masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks.
318
+ probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities.
319
+ obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
320
+
321
+ Examples:
322
+ >>> results = model('image.jpg')
323
+ >>> new_boxes = torch.tensor([[100, 100, 200, 200, 0.9, 0]])
324
+ >>> results[0].update(boxes=new_boxes)
325
+ """
166
326
  if boxes is not None:
167
327
  self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
168
328
  if masks is not None:
@@ -174,24 +334,23 @@ class Results(SimpleClass):
174
334
 
175
335
  def _apply(self, fn, *args, **kwargs):
176
336
  """
177
- Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
178
- function is internally called by methods like .to(), .cuda(), .cpu(), etc.
337
+ Applies a function to all non-empty attributes and returns a new Results object with modified attributes.
338
+
339
+ This method is internally called by methods like .to(), .cuda(), .cpu(), etc.
179
340
 
180
341
  Args:
181
342
  fn (str): The name of the function to apply.
182
- *args: Variable length argument list to pass to the function.
183
- **kwargs: Arbitrary keyword arguments to pass to the function.
343
+ *args (Any): Variable length argument list to pass to the function.
344
+ **kwargs (Any): Arbitrary keyword arguments to pass to the function.
184
345
 
185
346
  Returns:
186
347
  (Results): A new Results object with attributes modified by the applied function.
187
348
 
188
- Example:
189
- ```python
190
- results = model("path/to/image.jpg")
191
- for result in results:
192
- result_cuda = result.cuda()
193
- result_cpu = result.cpu()
194
- ```
349
+ Examples:
350
+ >>> results = model("path/to/image.jpg")
351
+ >>> for result in results:
352
+ ... result_cuda = result.cuda()
353
+ ... result_cpu = result.cpu()
195
354
  """
196
355
  r = self.new()
197
356
  for k in self._keys:
@@ -201,23 +360,86 @@ class Results(SimpleClass):
201
360
  return r
202
361
 
203
362
  def cpu(self):
204
- """Returns a copy of the Results object with all its tensors moved to CPU memory."""
363
+ """
364
+ Returns a copy of the Results object with all its tensors moved to CPU memory.
365
+
366
+ This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb)
367
+ transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving.
368
+
369
+ Returns:
370
+ (Results): A new Results object with all tensor attributes on CPU memory.
371
+
372
+ Examples:
373
+ >>> results = model('path/to/image.jpg') # Perform inference
374
+ >>> cpu_result = results[0].cpu() # Move the first result to CPU
375
+ >>> print(cpu_result.boxes.device) # Output: cpu
376
+ """
205
377
  return self._apply("cpu")
206
378
 
207
379
  def numpy(self):
208
- """Returns a copy of the Results object with all tensors as numpy arrays."""
380
+ """
381
+ Converts all tensors in the Results object to numpy arrays.
382
+
383
+ Returns:
384
+ (Results): A new Results object with all tensors converted to numpy arrays.
385
+
386
+ Examples:
387
+ >>> results = model('path/to/image.jpg')
388
+ >>> numpy_result = results[0].numpy()
389
+ >>> type(numpy_result.boxes.data)
390
+ <class 'numpy.ndarray'>
391
+
392
+ Notes:
393
+ This method creates a new Results object, leaving the original unchanged. It's useful for
394
+ interoperability with numpy-based libraries or when CPU-based operations are required.
395
+ """
209
396
  return self._apply("numpy")
210
397
 
211
398
  def cuda(self):
212
- """Moves all tensors in the Results object to GPU memory."""
399
+ """
400
+ Moves all tensors in the Results object to GPU memory.
401
+
402
+ Returns:
403
+ (Results): A new Results object with all tensors moved to CUDA device.
404
+
405
+ Examples:
406
+ >>> results = model("path/to/image.jpg")
407
+ >>> cuda_results = results[0].cuda() # Move first result to GPU
408
+ >>> for result in results:
409
+ ... result_cuda = result.cuda() # Move each result to GPU
410
+ """
213
411
  return self._apply("cuda")
214
412
 
215
413
  def to(self, *args, **kwargs):
216
- """Moves all tensors in the Results object to the specified device and dtype."""
414
+ """
415
+ Moves all tensors in the Results object to the specified device and dtype.
416
+
417
+ Args:
418
+ *args (Any): Variable length argument list to be passed to torch.Tensor.to().
419
+ **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to().
420
+
421
+ Returns:
422
+ (Results): A new Results object with all tensors moved to the specified device and dtype.
423
+
424
+ Examples:
425
+ >>> results = model("path/to/image.jpg")
426
+ >>> result_cuda = results[0].to("cuda") # Move first result to GPU
427
+ >>> result_cpu = results[0].to("cpu") # Move first result to CPU
428
+ >>> result_half = results[0].to(dtype=torch.float16) # Convert first result to half precision
429
+ """
217
430
  return self._apply("to", *args, **kwargs)
218
431
 
219
432
  def new(self):
220
- """Returns a new Results object with the same image, path, names, and speed attributes."""
433
+ """
434
+ Creates a new Results object with the same image, path, names, and speed attributes.
435
+
436
+ Returns:
437
+ (Results): A new Results object with copied attributes from the original instance.
438
+
439
+ Examples:
440
+ >>> results = model("path/to/image.jpg")
441
+ >>> new_result = results[0].new()
442
+ """
221
443
  return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed)
222
444
 
223
445
  def plot(
@@ -240,42 +462,34 @@ class Results(SimpleClass):
240
462
  filename=None,
241
463
  ):
242
464
  """
243
- Plots the detection results on an input RGB image. Accepts a numpy array (cv2) or a PIL Image.
465
+ Plots detection results on an input RGB image.
244
466
 
245
467
  Args:
246
- conf (bool): Whether to plot the detection confidence score.
247
- line_width (float, optional): The line width of the bounding boxes. If None, it is scaled to the image size.
248
- font_size (float, optional): The font size of the text. If None, it is scaled to the image size.
249
- font (str): The font to use for the text.
468
+ conf (bool): Whether to plot detection confidence scores.
469
+ line_width (float | None): Line width of bounding boxes. If None, scaled to image size.
470
+ font_size (float | None): Font size for text. If None, scaled to image size.
471
+ font (str): Font to use for text.
250
472
  pil (bool): Whether to return the image as a PIL Image.
251
- img (numpy.ndarray): Plot to another image. if not, plot to original image.
252
- im_gpu (torch.Tensor): Normalized image in gpu with shape (1, 3, 640, 640), for faster mask plotting.
253
- kpt_radius (int, optional): Radius of the drawn keypoints. Default is 5.
473
+ img (np.ndarray | None): Image to plot on. If None, uses original image.
474
+ im_gpu (torch.Tensor | None): Normalized image on GPU for faster mask plotting.
475
+ kpt_radius (int): Radius of drawn keypoints.
254
476
  kpt_line (bool): Whether to draw lines connecting keypoints.
255
- labels (bool): Whether to plot the label of bounding boxes.
256
- boxes (bool): Whether to plot the bounding boxes.
257
- masks (bool): Whether to plot the masks.
258
- probs (bool): Whether to plot classification probability.
259
- show (bool): Whether to display the annotated image directly.
260
- save (bool): Whether to save the annotated image to `filename`.
261
- filename (str): Filename to save image to if save is True.
262
-
263
- Returns:
264
- (numpy.ndarray): A numpy array of the annotated image.
265
-
266
- Example:
267
- ```python
268
- from PIL import Image
269
- from ultralytics import YOLO
270
-
271
- model = YOLO('yolov8n.pt')
272
- results = model('bus.jpg') # results list
273
- for r in results:
274
- im_array = r.plot() # plot a BGR numpy array of predictions
275
- im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
276
- im.show() # show image
277
- im.save('results.jpg') # save image
278
- ```
477
+ labels (bool): Whether to plot labels of bounding boxes.
478
+ boxes (bool): Whether to plot bounding boxes.
479
+ masks (bool): Whether to plot masks.
480
+ probs (bool): Whether to plot classification probabilities.
481
+ show (bool): Whether to display the annotated image.
482
+ save (bool): Whether to save the annotated image.
483
+ filename (str | None): Filename to save image if save is True.
484
+
485
+ Returns:
486
+ (np.ndarray): Annotated image as a numpy array.
487
+
488
+ Examples:
489
+ >>> results = model('image.jpg')
490
+ >>> for result in results:
491
+ ... im = result.plot()
492
+ ... im.show()
279
493
  """
280
494
  if img is None and isinstance(self.orig_img, torch.Tensor):
281
495
  img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
@@ -339,18 +553,73 @@ class Results(SimpleClass):
339
553
  return annotator.result()
340
554
 
341
555
  def show(self, *args, **kwargs):
342
- """Show the image with annotated inference results."""
556
+ """
557
+ Display the image with annotated inference results.
558
+
559
+ This method plots the detection results on the original image and displays it. It's a convenient way to
560
+ visualize the model's predictions directly.
561
+
562
+ Args:
563
+ *args (Any): Variable length argument list to be passed to the `plot()` method.
564
+ **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot()` method.
565
+
566
+ Examples:
567
+ >>> results = model('path/to/image.jpg')
568
+ >>> results[0].show() # Display the first result
569
+ >>> for result in results:
570
+ ... result.show() # Display all results
571
+ """
343
572
  self.plot(show=True, *args, **kwargs)
344
573
 
345
574
  def save(self, filename=None, *args, **kwargs):
346
- """Save annotated inference results image to file."""
575
+ """
576
+ Saves annotated inference results image to file.
577
+
578
+ This method plots the detection results on the original image and saves the annotated image to a file. It
579
+ utilizes the `plot` method to generate the annotated image and then saves it to the specified filename.
580
+
581
+ Args:
582
+ filename (str | Path | None): The filename to save the annotated image. If None, a default filename
583
+ is generated based on the original image path.
584
+ *args (Any): Variable length argument list to be passed to the `plot` method.
585
+ **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method.
586
+
587
+ Examples:
588
+ >>> results = model('path/to/image.jpg')
589
+ >>> for result in results:
590
+ ... result.save('annotated_image.jpg')
591
+ >>> # Or with custom plot arguments
592
+ >>> for result in results:
593
+ ... result.save('annotated_image.jpg', conf=False, line_width=2)
594
+ """
347
595
  if not filename:
348
596
  filename = f"results_{Path(self.path).name}"
349
597
  self.plot(save=True, filename=filename, *args, **kwargs)
350
598
  return filename
351
599
 
352
600
  def verbose(self):
353
- """Returns a log string for each task in the results, detailing detection and classification outcomes."""
601
+ """
602
+ Returns a log string for each task in the results, detailing detection and classification outcomes.
603
+
604
+ This method generates a human-readable string summarizing the detection and classification results. It includes
605
+ the number of detections for each class and the top probabilities for classification tasks.
606
+
607
+ Returns:
608
+ (str): A formatted string containing a summary of the results. For detection tasks, it includes the
609
+ number of detections per class. For classification tasks, it includes the top 5 class probabilities.
610
+
611
+ Examples:
612
+ >>> results = model('path/to/image.jpg')
613
+ >>> for result in results:
614
+ ... print(result.verbose())
615
+ 2 persons, 1 car, 3 traffic lights,
616
+ dog 0.92, cat 0.78, horse 0.64,
617
+
618
+ Notes:
619
+ - If there are no detections, the method returns "(no detections), " for detection tasks.
620
+ - For classification tasks, it returns the top 5 class probabilities and their corresponding class names.
621
+ - The returned string is comma-separated and ends with a comma and a space.
622
+ """
354
623
  log_string = ""
355
624
  probs = self.probs
356
625
  boxes = self.boxes
@@ -369,31 +638,26 @@ class Results(SimpleClass):
369
638
  Save detection results to a text file.
370
639
 
371
640
  Args:
372
- txt_file (str): Path to the output text file.
641
+ txt_file (str | Path): Path to the output text file.
373
642
  save_conf (bool): Whether to include confidence scores in the output.
374
643
 
375
644
  Returns:
376
645
  (str): Path to the saved text file.
377
646
 
378
- Example:
379
- ```python
380
- from ultralytics import YOLO
381
-
382
- model = YOLO('yolov8n.pt')
383
- results = model("path/to/image.jpg")
384
- for result in results:
385
- result.save_txt("output.txt")
386
- ```
647
+ Examples:
648
+ >>> from ultralytics import YOLO
649
+ >>> model = YOLO('yolov8n.pt')
650
+ >>> results = model("path/to/image.jpg")
651
+ >>> for result in results:
652
+ ... result.save_txt("output.txt")
387
653
 
388
654
  Notes:
389
655
  - The file will contain one line per detection or classification with the following structure:
390
- - For detections: `class confidence x_center y_center width height`
391
- - For classifications: `confidence class_name`
392
- - For masks and keypoints, the specific formats will vary accordingly.
393
-
656
+ - For detections: `class confidence x_center y_center width height`
657
+ - For classifications: `confidence class_name`
658
+ - For masks and keypoints, the specific formats will vary accordingly.
394
659
  - The function will create the output directory if it does not exist.
395
660
  - If save_conf is False, the confidence scores will be excluded from the output.
396
-
397
661
  - Existing contents of the file will not be overwritten; new results will be appended.
398
662
  """
399
663
  is_obb = self.obb is not None
@@ -426,27 +690,25 @@ class Results(SimpleClass):
426
690
 
427
691
  def save_crop(self, save_dir, file_name=Path("im.jpg")):
428
692
  """
429
- Save cropped detection images to `save_dir/cls/file_name.jpg`.
693
+ Saves cropped detection images to specified directory.
694
+
695
+ This method saves cropped images of detected objects to a specified directory. Each crop is saved in a
696
+ subdirectory named after the object's class, with the filename based on the input file_name.
430
697
 
431
698
  Args:
432
- save_dir (str | pathlib.Path): Directory path where the cropped images should be saved.
433
- file_name (str | pathlib.Path): Filename for the saved cropped image.
699
+ save_dir (str | Path): Directory path where cropped images will be saved.
700
+ file_name (str | Path): Base filename for the saved cropped images. Default is Path("im.jpg").
434
701
 
435
702
  Notes:
436
- This function does not support Classify or Oriented Bounding Box (OBB) tasks. It will warn and exit if
437
- called for such tasks.
438
-
439
- Example:
440
- ```python
441
- from ultralytics import YOLO
442
-
443
- model = YOLO("yolov8n.pt")
444
- results = model("path/to/image.jpg")
445
-
446
- # Save cropped images to the specified directory
447
- for result in results:
448
- result.save_crop(save_dir="path/to/save/crops", file_name="crop")
449
- ```
703
+ - This method does not support Classify or Oriented Bounding Box (OBB) tasks.
704
+ - Crops are saved as 'save_dir/class_name/file_name.jpg'.
705
+ - The method will create necessary subdirectories if they don't exist.
706
+ - Original image is copied before cropping to avoid modifying the original.
707
+
708
+ Examples:
709
+ >>> results = model("path/to/image.jpg")
710
+ >>> for result in results:
711
+ ... result.save_crop(save_dir="path/to/crops", file_name="detection")
450
712
  """
451
713
  if self.probs is not None:
452
714
  LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
@@ -463,7 +725,28 @@ class Results(SimpleClass):
463
725
  )
464
726
 
465
727
  def summary(self, normalize=False, decimals=5):
466
- """Convert inference results to a summarized dictionary with optional normalization for box coordinates."""
728
+ """
729
+ Converts inference results to a summarized dictionary with optional normalization for box coordinates.
730
+
731
+ This method creates a list of detection dictionaries, each containing information about a single
732
+ detection or classification result. For classification tasks, it returns the top class and its
733
+ confidence. For detection tasks, it includes class information, bounding box coordinates, and
734
+ optionally mask segments and keypoints.
735
+
736
+ Args:
737
+ normalize (bool): Whether to normalize bounding box coordinates by image dimensions. Defaults to False.
738
+ decimals (int): Number of decimal places to round the output values to. Defaults to 5.
739
+
740
+ Returns:
741
+ (List[Dict]): A list of dictionaries, each containing summarized information for a single
742
+ detection or classification result. The structure of each dictionary varies based on the
743
+ task type (classification or detection) and available information (boxes, masks, keypoints).
744
+
745
+ Examples:
746
+ >>> results = model('image.jpg')
747
+ >>> summary = results[0].summary()
748
+ >>> print(summary)
749
+ """
467
750
  # Create list of detection dictionaries
468
751
  results = []
469
752
  if self.probs is not None:
@@ -507,7 +790,34 @@ class Results(SimpleClass):
507
790
  return results
508
791
 
509
792
  def tojson(self, normalize=False, decimals=5):
510
- """Converts detection results to JSON format."""
793
+ """
794
+ Converts detection results to JSON format.
795
+
796
+ This method serializes the detection results into a JSON-compatible format. It includes information
797
+ about detected objects such as bounding boxes, class names, confidence scores, and optionally
798
+ segmentation masks and keypoints.
799
+
800
+ Args:
801
+ normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
802
+ If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
803
+ decimals (int): Number of decimal places to round the output values to. Defaults to 5.
804
+
805
+ Returns:
806
+ (str): A JSON string containing the serialized detection results.
807
+
808
+ Examples:
809
+ >>> results = model("path/to/image.jpg")
810
+ >>> json_result = results[0].tojson()
811
+ >>> print(json_result)
812
+
813
+ Notes:
814
+ - For classification tasks, the JSON will contain class probabilities instead of bounding boxes.
815
+ - For object detection tasks, the JSON will include bounding box coordinates, class names, and
816
+ confidence scores.
817
+ - If available, segmentation masks and keypoints will also be included in the JSON output.
818
+ - The method uses the `summary` method internally to generate the data structure before
819
+ converting it to JSON.
820
+ """
511
821
  import json
512
822
 
513
823
  return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
@@ -515,43 +825,67 @@ class Results(SimpleClass):
515
825
 
516
826
  class Boxes(BaseTensor):
517
827
  """
518
- Manages detection boxes, providing easy access and manipulation of box coordinates, confidence scores, class
519
- identifiers, and optional tracking IDs. Supports multiple formats for box coordinates, including both absolute and
520
- normalized forms.
828
+ A class for managing and manipulating detection boxes.
521
829
 
522
- Attributes:
523
- data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
524
- orig_shape (tuple): The original image size as a tuple (height, width), used for normalization.
525
- is_track (bool): Indicates whether tracking IDs are included in the box data.
830
+ This class provides functionality for handling detection boxes, including their coordinates, confidence scores,
831
+ class labels, and optional tracking IDs. It supports various box formats and offers methods for easy manipulation
832
+ and conversion between different coordinate systems.
526
833
 
527
834
  Attributes:
835
+ data (torch.Tensor | numpy.ndarray): The raw tensor containing detection boxes and associated data.
836
+ orig_shape (Tuple[int, int]): The original image dimensions (height, width).
837
+ is_track (bool): Indicates whether tracking IDs are included in the box data.
528
838
  xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
529
839
  conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
530
840
  cls (torch.Tensor | numpy.ndarray): Class labels for each box.
531
- id (torch.Tensor | numpy.ndarray, optional): Tracking IDs for each box, if available.
532
- xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, calculated on demand.
533
- xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes, relative to `orig_shape`.
534
- xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes, relative to `orig_shape`.
841
+ id (torch.Tensor | numpy.ndarray): Tracking IDs for each box (if available).
842
+ xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format.
843
+ xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape.
844
+ xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape.
535
845
 
536
846
  Methods:
537
- cpu(): Moves the boxes to CPU memory.
538
- numpy(): Converts the boxes to a numpy array format.
539
- cuda(): Moves the boxes to CUDA (GPU) memory.
540
- to(device, dtype=None): Moves the boxes to the specified device.
847
+ cpu(): Returns a copy of the object with all tensors on CPU memory.
848
+ numpy(): Returns a copy of the object with all tensors as numpy arrays.
849
+ cuda(): Returns a copy of the object with all tensors on GPU memory.
850
+ to(*args, **kwargs): Returns a copy of the object with tensors on specified device and dtype.
851
+
852
+ Examples:
853
+ >>> import torch
854
+ >>> boxes_data = torch.tensor([[100, 50, 150, 100, 0.9, 0], [200, 150, 300, 250, 0.8, 1]])
855
+ >>> orig_shape = (480, 640) # height, width
856
+ >>> boxes = Boxes(boxes_data, orig_shape)
857
+ >>> print(boxes.xyxy)
858
+ >>> print(boxes.conf)
859
+ >>> print(boxes.cls)
860
+ >>> print(boxes.xywhn)
541
861
  """
542
862
 
543
863
  def __init__(self, boxes, orig_shape) -> None:
544
864
  """
545
865
  Initialize the Boxes class with detection box data and the original image shape.
546
866
 
547
- Args:
548
- boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape (num_boxes, 6)
549
- or (num_boxes, 7). Columns should contain [x1, y1, x2, y2, confidence, class, (optional) track_id].
550
- The track ID column is included if present.
551
- orig_shape (tuple): The original image shape as (height, width). Used for normalization.
867
+ This class manages detection boxes, providing easy access and manipulation of box coordinates,
868
+ confidence scores, class identifiers, and optional tracking IDs. It supports multiple formats
869
+ for box coordinates, including both absolute and normalized forms.
552
870
 
553
- Returns:
554
- (None)
871
+ Args:
872
+ boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
873
+ (num_boxes, 6) or (num_boxes, 7). Columns should contain
874
+ [x1, y1, x2, y2, confidence, class, (optional) track_id].
875
+ orig_shape (Tuple[int, int]): The original image shape as (height, width). Used for normalization.
876
+
877
+ Attributes:
878
+ data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
879
+ orig_shape (Tuple[int, int]): The original image size, used for normalization.
880
+ is_track (bool): Indicates whether tracking IDs are included in the box data.
881
+
882
+ Examples:
883
+ >>> import torch
884
+ >>> boxes = torch.tensor([[100, 50, 150, 100, 0.9, 0]])
885
+ >>> orig_shape = (480, 640)
886
+ >>> detection_boxes = Boxes(boxes, orig_shape)
887
+ >>> print(detection_boxes.xyxy)
888
+ tensor([[100., 50., 150., 100.]])
555
889
  """
556
890
  if boxes.ndim == 1:
557
891
  boxes = boxes[None, :]
@@ -563,34 +897,119 @@ class Boxes(BaseTensor):
563
897
 
564
898
  @property
565
899
  def xyxy(self):
566
- """Returns bounding boxes in [x1, y1, x2, y2] format."""
900
+ """
901
+ Returns bounding boxes in [x1, y1, x2, y2] format.
902
+
903
+ Returns:
904
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box
905
+ coordinates in [x1, y1, x2, y2] format, where n is the number of boxes.
906
+
907
+ Examples:
908
+ >>> results = model('image.jpg')
909
+ >>> boxes = results[0].boxes
910
+ >>> xyxy = boxes.xyxy
911
+ >>> print(xyxy)
912
+ """
567
913
  return self.data[:, :4]
568
914
 
569
915
  @property
570
916
  def conf(self):
571
- """Returns the confidence scores for each detection box."""
917
+ """
918
+ Returns the confidence scores for each detection box.
919
+
920
+ Returns:
921
+ (torch.Tensor | numpy.ndarray): A 1D tensor or array containing confidence scores for each detection,
922
+ with shape (N,) where N is the number of detections.
923
+
924
+ Examples:
925
+ >>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100))
926
+ >>> conf_scores = boxes.conf
927
+ >>> print(conf_scores)
928
+ tensor([0.9000])
929
+ """
572
930
  return self.data[:, -2]
573
931
 
574
932
  @property
575
933
  def cls(self):
576
- """Class ID tensor representing category predictions for each bounding box."""
934
+ """
935
+ Returns the class ID tensor representing category predictions for each bounding box.
936
+
937
+ Returns:
938
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class IDs for each detection box.
939
+ The shape is (N,), where N is the number of boxes.
940
+
941
+ Examples:
942
+ >>> results = model('image.jpg')
943
+ >>> boxes = results[0].boxes
944
+ >>> class_ids = boxes.cls
945
+ >>> print(class_ids) # tensor([0., 2., 1.])
946
+ """
577
947
  return self.data[:, -1]
578
948
 
579
949
  @property
580
950
  def id(self):
581
- """Return the tracking IDs for each box if available."""
951
+ """
952
+ Returns the tracking IDs for each detection box if available.
953
+
954
+ Returns:
955
+ (torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled,
956
+ otherwise None. Shape is (N,) where N is the number of boxes.
957
+
958
+ Examples:
959
+ >>> results = model.track('path/to/video.mp4')
960
+ >>> for result in results:
961
+ ... boxes = result.boxes
962
+ ... if boxes.is_track:
963
+ ... track_ids = boxes.id
964
+ ... print(f"Tracking IDs: {track_ids}")
965
+ ... else:
966
+ ... print("Tracking is not enabled for these boxes.")
967
+
968
+ Notes:
969
+ - This property is only available when tracking is enabled (i.e., when `is_track` is True).
970
+ - The tracking IDs are typically used to associate detections across multiple frames in video analysis.
971
+ """
582
972
  return self.data[:, -3] if self.is_track else None
583
973
 
584
974
  @property
585
975
  @lru_cache(maxsize=2) # maxsize 1 should suffice
586
976
  def xywh(self):
587
- """Returns boxes in [x, y, width, height] format."""
977
+ """
978
+ Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
979
+
980
+ Returns:
981
+ (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, where x, y are the coordinates of
982
+ the top-left corner of the bounding box, width, height are the dimensions of the bounding box and the
983
+ shape of the returned tensor is (N, 4), where N is the number of boxes.
984
+
985
+ Examples:
986
+ >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640))
987
+ >>> xywh = boxes.xywh
988
+ >>> print(xywh)
989
+ tensor([[100.0000, 50.0000, 50.0000, 50.0000],
990
+ [200.0000, 150.0000, 100.0000, 100.0000]])
991
+ """
588
992
  return ops.xyxy2xywh(self.xyxy)
589
993
 
590
994
  @property
591
995
  @lru_cache(maxsize=2)
592
996
  def xyxyn(self):
593
- """Normalize box coordinates to [x1, y1, x2, y2] relative to the original image size."""
997
+ """
998
+ Returns normalized bounding box coordinates relative to the original image size.
999
+
1000
+ This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format,
1001
+ normalized to the range [0, 1] based on the original image dimensions.
1002
+
1003
+ Returns:
1004
+ (torch.Tensor | numpy.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is
1005
+ the number of boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
1006
+
1007
+ Examples:
1008
+ >>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640))
1009
+ >>> normalized = boxes.xyxyn
1010
+ >>> print(normalized)
1011
+ tensor([[0.1562, 0.1042, 0.4688, 0.8333]])
1012
+ """
594
1013
  xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
595
1014
  xyxy[..., [0, 2]] /= self.orig_shape[1]
596
1015
  xyxy[..., [1, 3]] /= self.orig_shape[0]
@@ -599,7 +1018,23 @@ class Boxes(BaseTensor):
599
1018
  @property
600
1019
  @lru_cache(maxsize=2)
601
1020
  def xywhn(self):
602
- """Returns normalized bounding boxes in [x, y, width, height] format."""
1021
+ """
1022
+ Returns normalized bounding boxes in [x, y, width, height] format.
1023
+
1024
+ This property calculates and returns the normalized bounding box coordinates in the format
1025
+ [x_center, y_center, width, height], where all values are relative to the original image dimensions.
1026
+
1027
+ Returns:
1028
+ (torch.Tensor | numpy.ndarray): Normalized bounding boxes with shape (N, 4), where N is the
1029
+ number of boxes. Each row contains [x_center, y_center, width, height] values normalized
1030
+ to [0, 1] based on the original image dimensions.
1031
+
1032
+ Examples:
1033
+ >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640))
1034
+ >>> normalized = boxes.xywhn
1035
+ >>> print(normalized)
1036
+ tensor([[0.1953, 0.1562, 0.0781, 0.1042]])
1037
+ """
603
1038
  xywh = ops.xyxy2xywh(self.xyxy)
604
1039
  xywh[..., [0, 2]] /= self.orig_shape[1]
605
1040
  xywh[..., [1, 3]] /= self.orig_shape[0]
@@ -610,19 +1045,44 @@ class Masks(BaseTensor):
610
1045
  """
611
1046
  A class for storing and manipulating detection masks.
612
1047
 
1048
+ This class extends BaseTensor and provides functionality for handling segmentation masks,
1049
+ including methods for converting between pixel and normalized coordinates.
1050
+
613
1051
  Attributes:
614
- xy (list): A list of segments in pixel coordinates.
615
- xyn (list): A list of normalized segments.
1052
+ data (torch.Tensor | numpy.ndarray): The raw tensor or array containing mask data.
1053
+ orig_shape (tuple): Original image shape in (height, width) format.
1054
+ xy (List[numpy.ndarray]): A list of segments in pixel coordinates.
1055
+ xyn (List[numpy.ndarray]): A list of normalized segments.
616
1056
 
617
1057
  Methods:
618
- cpu(): Returns the masks tensor on CPU memory.
619
- numpy(): Returns the masks tensor as a numpy array.
620
- cuda(): Returns the masks tensor on GPU memory.
621
- to(device, dtype): Returns the masks tensor with the specified device and dtype.
1058
+ cpu(): Returns a copy of the Masks object with the mask tensor on CPU memory.
1059
+ numpy(): Returns a copy of the Masks object with the mask tensor as a numpy array.
1060
+ cuda(): Returns a copy of the Masks object with the mask tensor on GPU memory.
1061
+ to(*args, **kwargs): Returns a copy of the Masks object with the mask tensor on specified device and dtype.
1062
+
1063
+ Examples:
1064
+ >>> masks_data = torch.rand(1, 160, 160)
1065
+ >>> orig_shape = (720, 1280)
1066
+ >>> masks = Masks(masks_data, orig_shape)
1067
+ >>> pixel_coords = masks.xy
1068
+ >>> normalized_coords = masks.xyn
622
1069
  """
623
1070
 
624
1071
  def __init__(self, masks, orig_shape) -> None:
625
- """Initializes the Masks class with a masks tensor and original image shape."""
1072
+ """
1073
+ Initialize the Masks class with detection mask data and the original image shape.
1074
+
1075
+ Args:
1076
+ masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width).
1077
+ orig_shape (tuple): The original image shape as (height, width). Used for normalization.
1078
+
1079
+ Examples:
1080
+ >>> import torch
1081
+ >>> from ultralytics.engine.results import Masks
1082
+ >>> masks = torch.rand(10, 160, 160) # 10 masks of 160x160 resolution
1083
+ >>> orig_shape = (720, 1280) # Original image shape
1084
+ >>> mask_obj = Masks(masks, orig_shape)
1085
+ """
626
1086
  if masks.ndim == 2:
627
1087
  masks = masks[None, :]
628
1088
  super().__init__(masks, orig_shape)
@@ -630,7 +1090,23 @@ class Masks(BaseTensor):
630
1090
  @property
631
1091
  @lru_cache(maxsize=1)
632
1092
  def xyn(self):
633
- """Return normalized xy-coordinates of the segmentation masks."""
1093
+ """
1094
+ Returns normalized xy-coordinates of the segmentation masks.
1095
+
1096
+ This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates
1097
+ are normalized relative to the original image shape.
1098
+
1099
+ Returns:
1100
+ (List[numpy.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
1101
+ of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
1102
+ mask contour.
1103
+
1104
+ Examples:
1105
+ >>> results = model('image.jpg')
1106
+ >>> masks = results[0].masks
1107
+ >>> normalized_coords = masks.xyn
1108
+ >>> print(normalized_coords[0]) # Normalized coordinates of the first mask
1109
+ """
634
1110
  return [
635
1111
  ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
636
1112
  for x in ops.masks2segments(self.data)
@@ -639,7 +1115,24 @@ class Masks(BaseTensor):
639
1115
  @property
640
1116
  @lru_cache(maxsize=1)
641
1117
  def xy(self):
642
- """Returns the [x, y] normalized mask coordinates for each segment in the mask tensor."""
1118
+ """
1119
+ Returns the [x, y] pixel coordinates for each segment in the mask tensor.
1120
+
1121
+ This property calculates and returns a list of pixel coordinates for each segmentation mask in the
1122
+ Masks object. The coordinates are scaled to match the original image dimensions.
1123
+
1124
+ Returns:
1125
+ (List[numpy.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
1126
+ coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the
1127
+ number of points in the segment.
1128
+
1129
+ Examples:
1130
+ >>> results = model('image.jpg')
1131
+ >>> masks = results[0].masks
1132
+ >>> xy_coords = masks.xy
1133
+ >>> print(len(xy_coords)) # Number of masks
1134
+ >>> print(xy_coords[0].shape) # Shape of first mask's coordinates
1135
+ """
643
1136
  return [
644
1137
  ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
645
1138
  for x in ops.masks2segments(self.data)
@@ -650,21 +1143,53 @@ class Keypoints(BaseTensor):
650
1143
  """
651
1144
  A class for storing and manipulating detection keypoints.
652
1145
 
653
- Attributes
654
- xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection.
655
- xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1].
656
- conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None.
1146
+ This class encapsulates functionality for handling keypoint data, including coordinate manipulation,
1147
+ normalization, and confidence values.
1148
+
1149
+ Attributes:
1150
+ data (torch.Tensor): The raw tensor containing keypoint data.
1151
+ orig_shape (Tuple[int, int]): The original image dimensions (height, width).
1152
+ has_visible (bool): Indicates whether visibility information is available for keypoints.
1153
+ xy (torch.Tensor): Keypoint coordinates in [x, y] format.
1154
+ xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape.
1155
+ conf (torch.Tensor): Confidence values for each keypoint, if available.
657
1156
 
658
1157
  Methods:
659
1158
  cpu(): Returns a copy of the keypoints tensor on CPU memory.
660
1159
  numpy(): Returns a copy of the keypoints tensor as a numpy array.
661
1160
  cuda(): Returns a copy of the keypoints tensor on GPU memory.
662
- to(device, dtype): Returns a copy of the keypoints tensor with the specified device and dtype.
1161
+ to(*args, **kwargs): Returns a copy of the keypoints tensor with specified device and dtype.
1162
+
1163
+ Examples:
1164
+ >>> import torch
1165
+ >>> from ultralytics.engine.results import Keypoints
1166
+ >>> keypoints_data = torch.rand(1, 17, 3) # 1 detection, 17 keypoints, (x, y, conf)
1167
+ >>> orig_shape = (480, 640) # Original image shape (height, width)
1168
+ >>> keypoints = Keypoints(keypoints_data, orig_shape)
1169
+ >>> print(keypoints.xy.shape) # Access xy coordinates
1170
+ >>> print(keypoints.conf) # Access confidence values
1171
+ >>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU
663
1172
  """
664
1173
 
665
1174
  @smart_inference_mode() # avoid keypoints < conf in-place error
666
1175
  def __init__(self, keypoints, orig_shape) -> None:
667
- """Initializes the Keypoints object with detection keypoints and original image dimensions."""
1176
+ """
1177
+ Initializes the Keypoints object with detection keypoints and original image dimensions.
1178
+
1179
+ This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors
1180
+ (x, y, confidence), it masks out low-confidence keypoints by setting their coordinates to zero.
1181
+
1182
+ Args:
1183
+ keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
1184
+ - (num_objects, num_keypoints, 2) for x, y coordinates only
1185
+ - (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
1186
+ orig_shape (Tuple[int, int]): The original image dimensions (height, width).
1187
+
1188
+ Examples:
1189
+ >>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf
1190
+ >>> orig_shape = (720, 1280) # Original image height, width
1191
+ >>> keypoints = Keypoints(kpts, orig_shape)
1192
+ """
668
1193
  if keypoints.ndim == 2:
669
1194
  keypoints = keypoints[None, :]
670
1195
  if keypoints.shape[2] == 3: # x, y, conf
@@ -676,13 +1201,44 @@ class Keypoints(BaseTensor):
676
1201
  @property
677
1202
  @lru_cache(maxsize=1)
678
1203
  def xy(self):
679
- """Returns x, y coordinates of keypoints."""
1204
+ """
1205
+ Returns x, y coordinates of keypoints.
1206
+
1207
+ Returns:
1208
+ (torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is
1209
+ the number of detections and K is the number of keypoints per detection.
1210
+
1211
+ Examples:
1212
+ >>> results = model('image.jpg')
1213
+ >>> keypoints = results[0].keypoints
1214
+ >>> xy = keypoints.xy
1215
+ >>> print(xy.shape) # (N, K, 2)
1216
+ >>> print(xy[0]) # x, y coordinates of keypoints for first detection
1217
+
1218
+ Notes:
1219
+ - The returned coordinates are in pixel units relative to the original image dimensions.
1220
+ - If keypoints were initialized with confidence values, only keypoints with confidence >= 0.5 are returned.
1221
+ - This property uses LRU caching to improve performance on repeated access.
1222
+ """
680
1223
  return self.data[..., :2]
681
1224
 
682
1225
  @property
683
1226
  @lru_cache(maxsize=1)
684
1227
  def xyn(self):
685
- """Returns normalized coordinates (x, y) of keypoints relative to the original image size."""
1228
+ """
1229
+ Returns normalized coordinates (x, y) of keypoints relative to the original image size.
1230
+
1231
+ Returns:
1232
+ (torch.Tensor | numpy.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
1233
+ coordinates, where N is the number of instances, K is the number of keypoints, and the last
1234
+ dimension contains [x, y] values in the range [0, 1].
1235
+
1236
+ Examples:
1237
+ >>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640))
1238
+ >>> normalized_kpts = keypoints.xyn
1239
+ >>> print(normalized_kpts.shape)
1240
+ torch.Size([1, 17, 2])
1241
+ """
686
1242
  xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
687
1243
  xy[..., 0] /= self.orig_shape[1]
688
1244
  xy[..., 1] /= self.orig_shape[0]
@@ -691,53 +1247,160 @@ class Keypoints(BaseTensor):
691
1247
  @property
692
1248
  @lru_cache(maxsize=1)
693
1249
  def conf(self):
694
- """Returns confidence values for each keypoint."""
1250
+ """
1251
+ Returns confidence values for each keypoint.
1252
+
1253
+ Returns:
1254
+ (torch.Tensor | None): A tensor containing confidence scores for each keypoint if available,
1255
+ otherwise None. Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,)
1256
+ for single detection.
1257
+
1258
+ Examples:
1259
+ >>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints
1260
+ >>> conf = keypoints.conf
1261
+ >>> print(conf.shape) # torch.Size([1, 17])
1262
+ """
695
1263
  return self.data[..., 2] if self.has_visible else None
696
1264
 
697
1265
 
698
1266
  class Probs(BaseTensor):
699
1267
  """
700
- A class for storing and manipulating classification predictions.
1268
+ A class for storing and manipulating classification probabilities.
701
1269
 
702
- Attributes
703
- top1 (int): Index of the top 1 class.
704
- top5 (list[int]): Indices of the top 5 classes.
705
- top1conf (torch.Tensor): Confidence of the top 1 class.
706
- top5conf (torch.Tensor): Confidences of the top 5 classes.
1270
+ This class extends BaseTensor and provides methods for accessing and manipulating
1271
+ classification probabilities, including top-1 and top-5 predictions.
1272
+
1273
+ Attributes:
1274
+ data (torch.Tensor | numpy.ndarray): The raw tensor or array containing classification probabilities.
1275
+ orig_shape (tuple | None): The original image shape as (height, width). Not used in this class.
1276
+ top1 (int): Index of the class with the highest probability.
1277
+ top5 (List[int]): Indices of the top 5 classes by probability.
1278
+ top1conf (torch.Tensor | numpy.ndarray): Confidence score of the top 1 class.
1279
+ top5conf (torch.Tensor | numpy.ndarray): Confidence scores of the top 5 classes.
707
1280
 
708
1281
  Methods:
709
- cpu(): Returns a copy of the probs tensor on CPU memory.
710
- numpy(): Returns a copy of the probs tensor as a numpy array.
711
- cuda(): Returns a copy of the probs tensor on GPU memory.
712
- to(): Returns a copy of the probs tensor with the specified device and dtype.
1282
+ cpu(): Returns a copy of the probabilities tensor on CPU memory.
1283
+ numpy(): Returns a copy of the probabilities tensor as a numpy array.
1284
+ cuda(): Returns a copy of the probabilities tensor on GPU memory.
1285
+ to(*args, **kwargs): Returns a copy of the probabilities tensor with specified device and dtype.
1286
+
1287
+ Examples:
1288
+ >>> probs = torch.tensor([0.1, 0.3, 0.6])
1289
+ >>> p = Probs(probs)
1290
+ >>> print(p.top1)
1291
+ 2
1292
+ >>> print(p.top5)
1293
+ [2, 1, 0]
1294
+ >>> print(p.top1conf)
1295
+ tensor(0.6000)
1296
+ >>> print(p.top5conf)
1297
+ tensor([0.6000, 0.3000, 0.1000])
713
1298
  """
714
1299
 
715
1300
  def __init__(self, probs, orig_shape=None) -> None:
716
- """Initialize Probs with classification probabilities and optional original image shape."""
1301
+ """
1302
+ Initialize the Probs class with classification probabilities.
1303
+
1304
+ This class stores and manages classification probabilities, providing easy access to top predictions and their
1305
+ confidences.
1306
+
1307
+ Args:
1308
+ probs (torch.Tensor | np.ndarray): A 1D tensor or array of classification probabilities.
1309
+ orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept for
1310
+ consistency with other result classes.
1311
+
1312
+ Attributes:
1313
+ data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
1314
+ top1 (int): Index of the top 1 class.
1315
+ top5 (List[int]): Indices of the top 5 classes.
1316
+ top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
1317
+ top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
1318
+
1319
+ Examples:
1320
+ >>> import torch
1321
+ >>> probs = torch.tensor([0.1, 0.3, 0.2, 0.4])
1322
+ >>> p = Probs(probs)
1323
+ >>> print(p.top1)
1324
+ 3
1325
+ >>> print(p.top1conf)
1326
+ tensor(0.4000)
1327
+ >>> print(p.top5)
1328
+ [3, 1, 2, 0]
1329
+ """
717
1330
  super().__init__(probs, orig_shape)
718
1331
 
719
1332
  @property
720
1333
  @lru_cache(maxsize=1)
721
1334
  def top1(self):
722
- """Return the index of the class with the highest probability."""
1335
+ """
1336
+ Returns the index of the class with the highest probability.
1337
+
1338
+ Returns:
1339
+ (int): Index of the class with the highest probability.
1340
+
1341
+ Examples:
1342
+ >>> probs = Probs(torch.tensor([0.1, 0.3, 0.6]))
1343
+ >>> probs.top1
1344
+ 2
1345
+ """
723
1346
  return int(self.data.argmax())
724
1347
 
725
1348
  @property
726
1349
  @lru_cache(maxsize=1)
727
1350
  def top5(self):
728
- """Return the indices of the top 5 class probabilities."""
1351
+ """
1352
+ Returns the indices of the top 5 class probabilities.
1353
+
1354
+ Returns:
1355
+ (List[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
1356
+
1357
+ Examples:
1358
+ >>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]))
1359
+ >>> print(probs.top5)
1360
+ [4, 3, 2, 1, 0]
1361
+ """
729
1362
  return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy.
730
1363
 
731
1364
  @property
732
1365
  @lru_cache(maxsize=1)
733
1366
  def top1conf(self):
734
- """Retrieves the confidence score of the highest probability class."""
1367
+ """
1368
+ Returns the confidence score of the highest probability class.
1369
+
1370
+ This property retrieves the confidence score (probability) of the class with the highest predicted probability
1371
+ from the classification results.
1372
+
1373
+ Returns:
1374
+ (torch.Tensor | numpy.ndarray): A tensor containing the confidence score of the top 1 class.
1375
+
1376
+ Examples:
1377
+ >>> results = model('image.jpg') # classify an image
1378
+ >>> probs = results[0].probs # get classification probabilities
1379
+ >>> top1_confidence = probs.top1conf # get confidence of top 1 class
1380
+ >>> print(f"Top 1 class confidence: {top1_confidence.item():.4f}")
1381
+ """
735
1382
  return self.data[self.top1]
736
1383
 
737
1384
  @property
738
1385
  @lru_cache(maxsize=1)
739
1386
  def top5conf(self):
740
- """Returns confidence scores for the top 5 classification predictions."""
1387
+ """
1388
+ Returns confidence scores for the top 5 classification predictions.
1389
+
1390
+ This property retrieves the confidence scores corresponding to the top 5 class probabilities
1391
+ predicted by the model. It provides a quick way to access the most likely class predictions
1392
+ along with their associated confidence levels.
1393
+
1394
+ Returns:
1395
+ (torch.Tensor | numpy.ndarray): A tensor or array containing the confidence scores for the
1396
+ top 5 predicted classes, sorted in descending order of probability.
1397
+
1398
+ Examples:
1399
+ >>> results = model('image.jpg')
1400
+ >>> probs = results[0].probs
1401
+ >>> top5_conf = probs.top5conf
1402
+ >>> print(top5_conf) # Prints confidence scores for top 5 classes
1403
+ """
741
1404
  return self.data[self.top5]
742
1405
 
743
1406
 
@@ -745,31 +1408,63 @@ class OBB(BaseTensor):
745
1408
  """
746
1409
  A class for storing and manipulating Oriented Bounding Boxes (OBB).
747
1410
 
748
- Args:
749
- boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
750
- with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
751
- If present, the third last column contains track IDs, and the fifth column from the left contains rotation.
752
- orig_shape (tuple): Original image size, in the format (height, width).
753
-
754
- Attributes
755
- xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format.
756
- conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
757
- cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
758
- id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
759
- xyxyxyxyn (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format normalized by orig image size.
760
- xyxyxyxy (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format.
761
- xyxy (torch.Tensor | numpy.ndarray): The horizontal boxes in xyxyxyxy format.
762
- data (torch.Tensor): The raw OBB tensor (alias for `boxes`).
1411
+ This class provides functionality to handle oriented bounding boxes, including conversion between
1412
+ different formats, normalization, and access to various properties of the boxes.
1413
+
1414
+ Attributes:
1415
+ data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data.
1416
+ orig_shape (tuple): Original image size as (height, width).
1417
+ is_track (bool): Indicates whether tracking IDs are included in the box data.
1418
+ xywhr (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height, rotation] format.
1419
+ conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
1420
+ cls (torch.Tensor | numpy.ndarray): Class labels for each box.
1421
+ id (torch.Tensor | numpy.ndarray): Tracking IDs for each box, if available.
1422
+ xyxyxyxy (torch.Tensor | numpy.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format.
1423
+ xyxyxyxyn (torch.Tensor | numpy.ndarray): Normalized 8-point coordinates relative to orig_shape.
1424
+ xyxy (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format.
763
1425
 
764
1426
  Methods:
765
- cpu(): Move the object to CPU memory.
766
- numpy(): Convert the object to a numpy array.
767
- cuda(): Move the object to CUDA memory.
768
- to(*args, **kwargs): Move the object to the specified device.
1427
+ cpu(): Returns a copy of the OBB object with all tensors on CPU memory.
1428
+ numpy(): Returns a copy of the OBB object with all tensors as numpy arrays.
1429
+ cuda(): Returns a copy of the OBB object with all tensors on GPU memory.
1430
+ to(*args, **kwargs): Returns a copy of the OBB object with tensors on specified device and dtype.
1431
+
1432
+ Examples:
1433
+ >>> boxes = torch.tensor([[100, 50, 150, 100, 30, 0.9, 0]]) # xywhr, conf, cls
1434
+ >>> obb = OBB(boxes, orig_shape=(480, 640))
1435
+ >>> print(obb.xyxyxyxy)
1436
+ >>> print(obb.conf)
1437
+ >>> print(obb.cls)
769
1438
  """
770
1439
 
771
1440
  def __init__(self, boxes, orig_shape) -> None:
772
- """Initialize an OBB instance with oriented bounding box data and original image shape."""
1441
+ """
1442
+ Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
1443
+
1444
+ This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides
1445
+ various properties and methods to access and transform the OBB data.
1446
+
1447
+ Args:
1448
+ boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
1449
+ with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
1450
+ If present, the third last column contains track IDs, and the fifth column contains rotation.
1451
+ orig_shape (Tuple[int, int]): Original image size, in the format (height, width).
1452
+
1453
+ Attributes:
1454
+ data (torch.Tensor | numpy.ndarray): The raw OBB tensor.
1455
+ orig_shape (Tuple[int, int]): The original image shape.
1456
+ is_track (bool): Whether the boxes include tracking IDs.
1457
+
1458
+ Raises:
1459
+ AssertionError: If the number of values per box is not 7 or 8.
1460
+
1461
+ Examples:
1462
+ >>> import torch
1463
+ >>> boxes = torch.rand(3, 7) # 3 boxes with 7 values each
1464
+ >>> orig_shape = (640, 480)
1465
+ >>> obb = OBB(boxes, orig_shape)
1466
+ >>> print(obb.xywhr) # Access the boxes in xywhr format
1467
+ """
773
1468
  if boxes.ndim == 1:
774
1469
  boxes = boxes[None, :]
775
1470
  n = boxes.shape[-1]
@@ -780,34 +1475,115 @@ class OBB(BaseTensor):
780
1475
 
781
1476
  @property
782
1477
  def xywhr(self):
783
- """Return boxes in [x_center, y_center, width, height, rotation] format."""
1478
+ """
1479
+ Returns boxes in [x_center, y_center, width, height, rotation] format.
1480
+
1481
+ Returns:
1482
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
1483
+ [x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes.
1484
+
1485
+ Examples:
1486
+ >>> results = model('image.jpg')
1487
+ >>> obb = results[0].obb
1488
+ >>> xywhr = obb.xywhr
1489
+ >>> print(xywhr.shape)
1490
+ torch.Size([3, 5])
1491
+ """
784
1492
  return self.data[:, :5]
785
1493
 
786
1494
  @property
787
1495
  def conf(self):
788
- """Gets the confidence values of Oriented Bounding Boxes (OBBs)."""
1496
+ """
1497
+ Returns the confidence scores for Oriented Bounding Boxes (OBBs).
1498
+
1499
+ This property retrieves the confidence values associated with each OBB detection. The confidence score
1500
+ represents the model's certainty in the detection.
1501
+
1502
+ Returns:
1503
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (N,) containing confidence scores
1504
+ for N detections, where each score is in the range [0, 1].
1505
+
1506
+ Examples:
1507
+ >>> results = model('image.jpg')
1508
+ >>> obb_result = results[0].obb
1509
+ >>> confidence_scores = obb_result.conf
1510
+ >>> print(confidence_scores)
1511
+ """
789
1512
  return self.data[:, -2]
790
1513
 
791
1514
  @property
792
1515
  def cls(self):
793
- """Returns the class values of the oriented bounding boxes."""
1516
+ """
1517
+ Returns the class values of the oriented bounding boxes.
1518
+
1519
+ Returns:
1520
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class values for each oriented
1521
+ bounding box. The shape is (N,), where N is the number of boxes.
1522
+
1523
+ Examples:
1524
+ >>> results = model('image.jpg')
1525
+ >>> result = results[0]
1526
+ >>> obb = result.obb
1527
+ >>> class_values = obb.cls
1528
+ >>> print(class_values)
1529
+ """
794
1530
  return self.data[:, -1]
795
1531
 
796
1532
  @property
797
1533
  def id(self):
798
- """Return the tracking IDs of the oriented bounding boxes (if available)."""
1534
+ """
1535
+ Returns the tracking IDs of the oriented bounding boxes (if available).
1536
+
1537
+ Returns:
1538
+ (torch.Tensor | numpy.ndarray | None): A tensor or numpy array containing the tracking IDs for each
1539
+ oriented bounding box. Returns None if tracking IDs are not available.
1540
+
1541
+ Examples:
1542
+ >>> results = model('image.jpg', tracker=True) # Run inference with tracking
1543
+ >>> for result in results:
1544
+ ... if result.obb is not None:
1545
+ ... track_ids = result.obb.id
1546
+ ... if track_ids is not None:
1547
+ ... print(f"Tracking IDs: {track_ids}")
1548
+ """
799
1549
  return self.data[:, -3] if self.is_track else None
800
1550
 
801
1551
  @property
802
1552
  @lru_cache(maxsize=2)
803
1553
  def xyxyxyxy(self):
804
- """Convert OBB format to 8-point (xyxyxyxy) coordinate format of shape (N, 4, 2) for rotated bounding boxes."""
1554
+ """
1555
+ Converts OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
1556
+
1557
+ Returns:
1558
+ (torch.Tensor | numpy.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is
1559
+ the number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
1560
+ moving clockwise.
1561
+
1562
+ Examples:
1563
+ >>> obb = OBB(torch.tensor([[100, 100, 50, 30, 0.5, 0.9, 0]]), orig_shape=(640, 640))
1564
+ >>> xyxyxyxy = obb.xyxyxyxy
1565
+ >>> print(xyxyxyxy.shape)
1566
+ torch.Size([1, 4, 2])
1567
+ """
805
1568
  return ops.xywhr2xyxyxyxy(self.xywhr)
806
1569
 
807
1570
  @property
808
1571
  @lru_cache(maxsize=2)
809
1572
  def xyxyxyxyn(self):
810
- """Converts rotated bounding boxes to normalized xyxyxyxy format of shape (N, 4, 2)."""
1573
+ """
1574
+ Converts rotated bounding boxes to normalized xyxyxyxy format.
1575
+
1576
+ Returns:
1577
+ (torch.Tensor | numpy.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
1578
+ where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to
1579
+ the original image dimensions.
1580
+
1581
+ Examples:
1582
+ >>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs
1583
+ >>> normalized_boxes = obb.xyxyxyxyn
1584
+ >>> print(normalized_boxes.shape)
1585
+ torch.Size([10, 4, 2])
1586
+ """
811
1587
  xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
812
1588
  xyxyxyxyn[..., 0] /= self.orig_shape[1]
813
1589
  xyxyxyxyn[..., 1] /= self.orig_shape[0]
@@ -817,28 +1593,31 @@ class OBB(BaseTensor):
817
1593
  @lru_cache(maxsize=2)
818
1594
  def xyxy(self):
819
1595
  """
820
- Convert the oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format (x1, y1, x2, y2).
1596
+ Converts oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
821
1597
 
822
- Returns:
823
- (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (num_boxes, 4).
824
-
825
- Example:
826
- ```python
827
- import torch
828
- from ultralytics import YOLO
1598
+ This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in
1599
+ xyxy format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such
1600
+ as IoU calculation with non-rotated boxes.
829
1601
 
830
- model = YOLO('yolov8n.pt')
831
- results = model('path/to/image.jpg')
832
- for result in results:
833
- obb = result.obb
834
- if obb is not None:
835
- xyxy_boxes = obb.xyxy
836
- # Do something with xyxy_boxes
837
- ```
1602
+ Returns:
1603
+ (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N
1604
+ is the number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
1605
+
1606
+ Examples:
1607
+ >>> import torch
1608
+ >>> from ultralytics import YOLO
1609
+ >>> model = YOLO('yolov8n-obb.pt')
1610
+ >>> results = model('path/to/image.jpg')
1611
+ >>> for result in results:
1612
+ ... obb = result.obb
1613
+ ... if obb is not None:
1614
+ ... xyxy_boxes = obb.xyxy
1615
+ ... print(xyxy_boxes.shape) # (N, 4)
838
1616
 
839
- Note:
840
- This method is useful to perform operations that require axis-aligned bounding boxes, such as IoU
841
- calculation with non-rotated boxes. The conversion approximates the OBB by the minimal enclosing rectangle.
1617
+ Notes:
1618
+ - This method approximates the OBB by its minimal enclosing rectangle.
1619
+ - The returned format is compatible with standard object detection metrics and visualization tools.
1620
+ - The property uses caching to improve performance for repeated access.
842
1621
  """
843
1622
  x = self.xyxyxyxy[..., 0]
844
1623
  y = self.xyxyxyxy[..., 1]