ultralytics 8.0.65__py3-none-any.whl → 8.0.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (41) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/datasets/coco-pose.yaml +38 -0
  3. ultralytics/datasets/coco8-pose.yaml +25 -0
  4. ultralytics/models/v8/yolov8-pose-p6.yaml +57 -0
  5. ultralytics/models/v8/yolov8-pose.yaml +47 -0
  6. ultralytics/nn/autobackend.py +7 -2
  7. ultralytics/nn/modules.py +33 -2
  8. ultralytics/nn/tasks.py +24 -7
  9. ultralytics/tracker/track.py +2 -3
  10. ultralytics/yolo/cfg/__init__.py +4 -4
  11. ultralytics/yolo/cfg/default.yaml +2 -0
  12. ultralytics/yolo/data/augment.py +24 -19
  13. ultralytics/yolo/data/build.py +4 -4
  14. ultralytics/yolo/data/dataset.py +9 -3
  15. ultralytics/yolo/data/utils.py +108 -33
  16. ultralytics/yolo/engine/exporter.py +9 -7
  17. ultralytics/yolo/engine/model.py +5 -4
  18. ultralytics/yolo/engine/predictor.py +1 -0
  19. ultralytics/yolo/engine/results.py +70 -56
  20. ultralytics/yolo/utils/benchmarks.py +4 -2
  21. ultralytics/yolo/utils/downloads.py +3 -3
  22. ultralytics/yolo/utils/instance.py +1 -1
  23. ultralytics/yolo/utils/loss.py +14 -0
  24. ultralytics/yolo/utils/metrics.py +111 -13
  25. ultralytics/yolo/utils/ops.py +30 -50
  26. ultralytics/yolo/utils/plotting.py +79 -4
  27. ultralytics/yolo/utils/torch_utils.py +11 -9
  28. ultralytics/yolo/v8/__init__.py +2 -2
  29. ultralytics/yolo/v8/detect/train.py +1 -1
  30. ultralytics/yolo/v8/detect/val.py +2 -2
  31. ultralytics/yolo/v8/pose/__init__.py +7 -0
  32. ultralytics/yolo/v8/pose/predict.py +103 -0
  33. ultralytics/yolo/v8/pose/train.py +170 -0
  34. ultralytics/yolo/v8/pose/val.py +213 -0
  35. ultralytics/yolo/v8/segment/val.py +3 -4
  36. {ultralytics-8.0.65.dist-info → ultralytics-8.0.66.dist-info}/METADATA +27 -2
  37. {ultralytics-8.0.65.dist-info → ultralytics-8.0.66.dist-info}/RECORD +41 -33
  38. {ultralytics-8.0.65.dist-info → ultralytics-8.0.66.dist-info}/LICENSE +0 -0
  39. {ultralytics-8.0.65.dist-info → ultralytics-8.0.66.dist-info}/WHEEL +0 -0
  40. {ultralytics-8.0.65.dist-info → ultralytics-8.0.66.dist-info}/entry_points.txt +0 -0
  41. {ultralytics-8.0.65.dist-info → ultralytics-8.0.66.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,8 @@ import torch.nn as nn
13
13
 
14
14
  from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept
15
15
 
16
+ OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
17
+
16
18
 
17
19
  # boxes
18
20
  def box_area(box):
@@ -108,8 +110,8 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7
108
110
 
109
111
  def mask_iou(mask1, mask2, eps=1e-7):
110
112
  """
111
- mask1: [N, n] m1 means number of predicted objects
112
- mask2: [M, n] m2 means number of gt objects
113
+ mask1: [N, n] m1 means number of gt objects
114
+ mask2: [M, n] m2 means number of predicted objects
113
115
  Note: n means image_w x image_h
114
116
  Returns: masks iou, [N, M]
115
117
  """
@@ -118,16 +120,18 @@ def mask_iou(mask1, mask2, eps=1e-7):
118
120
  return intersection / (union + eps)
119
121
 
120
122
 
121
- def masks_iou(mask1, mask2, eps=1e-7):
122
- """
123
- mask1: [N, n] m1 means number of predicted objects
124
- mask2: [N, n] m2 means number of gt objects
125
- Note: n means image_w x image_h
126
- Returns: masks iou, (N, )
123
+ def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7):
124
+ """OKS
125
+ kpt1: [N, 17, 3], gt
126
+ kpt2: [M, 17, 3], pred
127
+ area: [N], areas from gt
127
128
  """
128
- intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
129
- union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
130
- return intersection / (union + eps)
129
+ d = (kpt1[:, None, :, 0] - kpt2[..., 0]) ** 2 + (kpt1[:, None, :, 1] - kpt2[..., 1]) ** 2 # (N, M, 17)
130
+ sigma = torch.tensor(sigma, device=kpt1.device, dtype=kpt1.dtype) # (17, )
131
+ kpt_mask = kpt1[..., 2] != 0 # (N, 17)
132
+ e = d / (2 * sigma) ** 2 / (area[:, None, None] + eps) / 2 # from cocoeval
133
+ # e = d / ((area[None, :, None] + eps) * sigma) ** 2 / 2 # from formula
134
+ return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps)
131
135
 
132
136
 
133
137
  def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
@@ -649,13 +653,13 @@ class SegmentMetrics(SimpleClass):
649
653
  self.seg = Metric()
650
654
  self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
651
655
 
652
- def process(self, tp_m, tp_b, conf, pred_cls, target_cls):
656
+ def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
653
657
  """
654
658
  Processes the detection and segmentation metrics over the given set of predictions.
655
659
 
656
660
  Args:
657
- tp_m (list): List of True Positive masks.
658
661
  tp_b (list): List of True Positive boxes.
662
+ tp_m (list): List of True Positive masks.
659
663
  conf (list): List of confidence scores.
660
664
  pred_cls (list): List of predicted classes.
661
665
  target_cls (list): List of target classes.
@@ -712,6 +716,100 @@ class SegmentMetrics(SimpleClass):
712
716
  return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
713
717
 
714
718
 
719
+ class PoseMetrics(SegmentMetrics):
720
+ """
721
+ Calculates and aggregates detection and pose metrics over a given set of classes.
722
+
723
+ Args:
724
+ save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
725
+ plot (bool): Whether to save the detection and segmentation plots. Default is False.
726
+ names (list): List of class names. Default is an empty list.
727
+
728
+ Attributes:
729
+ save_dir (Path): Path to the directory where the output plots should be saved.
730
+ plot (bool): Whether to save the detection and segmentation plots.
731
+ names (list): List of class names.
732
+ box (Metric): An instance of the Metric class to calculate box detection metrics.
733
+ pose (Metric): An instance of the Metric class to calculate mask segmentation metrics.
734
+ speed (dict): Dictionary to store the time taken in different phases of inference.
735
+
736
+ Methods:
737
+ process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
738
+ mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
739
+ class_result(i): Returns the detection and segmentation metrics of class `i`.
740
+ maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
741
+ fitness: Returns the fitness scores, which are a single weighted combination of metrics.
742
+ ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
743
+ results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
744
+ """
745
+
746
+ def __init__(self, save_dir=Path('.'), plot=False, names=()) -> None:
747
+ super().__init__(save_dir, plot, names)
748
+ self.save_dir = save_dir
749
+ self.plot = plot
750
+ self.names = names
751
+ self.box = Metric()
752
+ self.pose = Metric()
753
+ self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
754
+
755
+ def __getattr__(self, attr):
756
+ name = self.__class__.__name__
757
+ raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
758
+
759
+ def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
760
+ """
761
+ Processes the detection and pose metrics over the given set of predictions.
762
+
763
+ Args:
764
+ tp_b (list): List of True Positive boxes.
765
+ tp_p (list): List of True Positive keypoints.
766
+ conf (list): List of confidence scores.
767
+ pred_cls (list): List of predicted classes.
768
+ target_cls (list): List of target classes.
769
+ """
770
+
771
+ results_pose = ap_per_class(tp_p,
772
+ conf,
773
+ pred_cls,
774
+ target_cls,
775
+ plot=self.plot,
776
+ save_dir=self.save_dir,
777
+ names=self.names,
778
+ prefix='Pose')[2:]
779
+ self.pose.nc = len(self.names)
780
+ self.pose.update(results_pose)
781
+ results_box = ap_per_class(tp_b,
782
+ conf,
783
+ pred_cls,
784
+ target_cls,
785
+ plot=self.plot,
786
+ save_dir=self.save_dir,
787
+ names=self.names,
788
+ prefix='Box')[2:]
789
+ self.box.nc = len(self.names)
790
+ self.box.update(results_box)
791
+
792
+ @property
793
+ def keys(self):
794
+ return [
795
+ 'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
796
+ 'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)']
797
+
798
+ def mean_results(self):
799
+ return self.box.mean_results() + self.pose.mean_results()
800
+
801
+ def class_result(self, i):
802
+ return self.box.class_result(i) + self.pose.class_result(i)
803
+
804
+ @property
805
+ def maps(self):
806
+ return self.box.maps + self.pose.maps
807
+
808
+ @property
809
+ def fitness(self):
810
+ return self.pose.fitness() + self.box.fitness()
811
+
812
+
715
813
  class ClassifyMetrics(SimpleClass):
716
814
  """
717
815
  Class for computing classification metrics including top-1 and top-5 accuracy.
@@ -281,28 +281,23 @@ def clip_boxes(boxes, shape):
281
281
  boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
282
282
 
283
283
 
284
- def clip_coords(boxes, shape):
284
+ def clip_coords(coords, shape):
285
285
  """
286
- Clip bounding xyxy bounding boxes to image shape (height, width).
286
+ Clip line coordinates to the image boundaries.
287
287
 
288
288
  Args:
289
- boxes (torch.Tensor or numpy.ndarray): Bounding boxes to be clipped.
290
- shape (tuple): The shape of the image. (height, width)
289
+ coords (torch.Tensor) or (numpy.ndarray): A list of line coordinates.
290
+ shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
291
291
 
292
292
  Returns:
293
- None
294
-
295
- Note:
296
- The input `boxes` is modified in-place, there is no return value.
293
+ (None): The function modifies the input `coordinates` in place, by clipping each coordinate to the image boundaries.
297
294
  """
298
- if isinstance(boxes, torch.Tensor): # faster individually
299
- boxes[:, 0].clamp_(0, shape[1]) # x1
300
- boxes[:, 1].clamp_(0, shape[0]) # y1
301
- boxes[:, 2].clamp_(0, shape[1]) # x2
302
- boxes[:, 3].clamp_(0, shape[0]) # y2
295
+ if isinstance(coords, torch.Tensor): # faster individually
296
+ coords[..., 0].clamp_(0, shape[1]) # x
297
+ coords[..., 1].clamp_(0, shape[0]) # y
303
298
  else: # np.array (faster grouped)
304
- boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
305
- boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
299
+ coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x
300
+ coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y
306
301
 
307
302
 
308
303
  def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
@@ -577,17 +572,18 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
577
572
 
578
573
  def process_mask(protos, masks_in, bboxes, shape, upsample=False):
579
574
  """
580
- It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces
581
- downsampled quality of mask
575
+ Apply masks to bounding boxes using the output of the mask head.
582
576
 
583
577
  Args:
584
- protos (torch.Tensor): [mask_dim, mask_h, mask_w]
585
- masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
586
- bboxes (torch.Tensor): [n, 4], n is number of masks after nms
587
- shape (tuple): the size of the input image (h,w)
578
+ protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
579
+ masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
580
+ bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
581
+ shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
582
+ upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
588
583
 
589
584
  Returns:
590
- (torch.Tensor): The processed masks.
585
+ (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
586
+ are the height and width of the input image. The mask is applied to the bounding boxes.
591
587
  """
592
588
 
593
589
  c, mh, mw = protos.shape # CHW
@@ -632,19 +628,19 @@ def process_mask_native(protos, masks_in, bboxes, shape):
632
628
  return masks.gt_(0.5)
633
629
 
634
630
 
635
- def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
631
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False):
636
632
  """
637
633
  Rescale segment coordinates (xyxy) from img1_shape to img0_shape
638
634
 
639
635
  Args:
640
- img1_shape (tuple): The shape of the image that the segments are from.
641
- segments (torch.Tensor): the segments to be scaled
636
+ img1_shape (tuple): The shape of the image that the coords are from.
637
+ coords (torch.Tensor): the coords to be scaled
642
638
  img0_shape (tuple): the shape of the image that the segmentation is being applied to
643
639
  ratio_pad (tuple): the ratio of the image size to the padded image size.
644
640
  normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
645
641
 
646
642
  Returns:
647
- segments (torch.Tensor): the segmented image.
643
+ coords (torch.Tensor): the segmented image.
648
644
  """
649
645
  if ratio_pad is None: # calculate from img0_shape
650
646
  gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
@@ -653,14 +649,15 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F
653
649
  gain = ratio_pad[0][0]
654
650
  pad = ratio_pad[1]
655
651
 
656
- segments[:, 0] -= pad[0] # x padding
657
- segments[:, 1] -= pad[1] # y padding
658
- segments /= gain
659
- clip_segments(segments, img0_shape)
652
+ coords[..., 0] -= pad[0] # x padding
653
+ coords[..., 1] -= pad[1] # y padding
654
+ coords[..., 0] /= gain
655
+ coords[..., 1] /= gain
656
+ clip_coords(coords, img0_shape)
660
657
  if normalize:
661
- segments[:, 0] /= img0_shape[1] # width
662
- segments[:, 1] /= img0_shape[0] # height
663
- return segments
658
+ coords[..., 0] /= img0_shape[1] # width
659
+ coords[..., 1] /= img0_shape[0] # height
660
+ return coords
664
661
 
665
662
 
666
663
  def masks2segments(masks, strategy='largest'):
@@ -688,23 +685,6 @@ def masks2segments(masks, strategy='largest'):
688
685
  return segments
689
686
 
690
687
 
691
- def clip_segments(segments, shape):
692
- """
693
- It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)
694
-
695
- Args:
696
- segments (list): a list of segments, each segment is a list of points, each point is a list of x,y
697
- coordinates
698
- shape (tuple): the shape of the image
699
- """
700
- if isinstance(segments, torch.Tensor): # faster individually
701
- segments[:, 0].clamp_(0, shape[1]) # x
702
- segments[:, 1].clamp_(0, shape[0]) # y
703
- else: # np.array (faster grouped)
704
- segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x
705
- segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y
706
-
707
-
708
688
  def clean_str(s):
709
689
  """
710
690
  Cleans a string by replacing special characters with underscore _
@@ -16,7 +16,7 @@ from ultralytics.yolo.utils import LOGGER, TryExcept, threaded
16
16
 
17
17
  from .checks import check_font, check_version, is_ascii
18
18
  from .files import increment_path
19
- from .ops import clip_coords, scale_image, xywh2xyxy, xyxy2xywh
19
+ from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh
20
20
 
21
21
  matplotlib.rc('font', **{'size': 11})
22
22
  matplotlib.use('Agg') # for writing to files only
@@ -30,6 +30,11 @@ class Colors:
30
30
  '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
31
31
  self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
32
32
  self.n = len(self.palette)
33
+ self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
34
+ [153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
35
+ [255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
36
+ [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
37
+ dtype=np.uint8)
33
38
 
34
39
  def __call__(self, i, bgr=False):
35
40
  c = self.palette[int(i) % self.n]
@@ -62,6 +67,12 @@ class Annotator:
62
67
  else: # use cv2
63
68
  self.im = im
64
69
  self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
70
+ # pose
71
+ self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
72
+ [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
73
+
74
+ self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
75
+ self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
65
76
 
66
77
  def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
67
78
  # Add one xyxy box to image with label
@@ -132,6 +143,49 @@ class Annotator:
132
143
  # convert im back to PIL and update draw
133
144
  self.fromarray(self.im)
134
145
 
146
+ def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
147
+ """Plot keypoints.
148
+ Args:
149
+ kpts (tensor): predicted kpts, shape: [17, 3]
150
+ shape (tuple): image shape, (h, w)
151
+ steps (int): keypoints step
152
+ radius (int): size of drawing points
153
+ """
154
+ if self.pil:
155
+ # convert to numpy first
156
+ self.im = np.asarray(self.im).copy()
157
+ nkpt, ndim = kpts.shape
158
+ is_pose = nkpt == 17 and ndim == 3
159
+ kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
160
+ for i, k in enumerate(kpts):
161
+ color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
162
+ x_coord, y_coord = k[0], k[1]
163
+ if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
164
+ if len(k) == 3:
165
+ conf = k[2]
166
+ if conf < 0.5:
167
+ continue
168
+ cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1)
169
+
170
+ if kpt_line:
171
+ ndim = kpts.shape[-1]
172
+ for sk_id, sk in enumerate(self.skeleton):
173
+ pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1]))
174
+ pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1]))
175
+ if ndim == 3:
176
+ conf1 = kpts[(sk[0] - 1), 2]
177
+ conf2 = kpts[(sk[1] - 1), 2]
178
+ if conf1 < 0.5 or conf2 < 0.5:
179
+ continue
180
+ if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
181
+ continue
182
+ if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
183
+ continue
184
+ cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[sk_id]], thickness=2)
185
+ if self.pil:
186
+ # convert im back to PIL and update draw
187
+ self.fromarray(self.im)
188
+
135
189
  def rectangle(self, xy, fill=None, outline=None, width=1):
136
190
  # Add rectangle to image (PIL-only)
137
191
  self.draw.rectangle(xy, fill, outline, width)
@@ -213,7 +267,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
213
267
  b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
214
268
  b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
215
269
  xyxy = xywh2xyxy(b).long()
216
- clip_coords(xyxy, im.shape)
270
+ clip_boxes(xyxy, im.shape)
217
271
  crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
218
272
  if save:
219
273
  file.parent.mkdir(parents=True, exist_ok=True) # make directory
@@ -229,6 +283,7 @@ def plot_images(images,
229
283
  cls,
230
284
  bboxes,
231
285
  masks=np.zeros(0, dtype=np.uint8),
286
+ kpts=np.zeros((0, 51), dtype=np.float32),
232
287
  paths=None,
233
288
  fname='images.jpg',
234
289
  names=None):
@@ -241,6 +296,8 @@ def plot_images(images,
241
296
  bboxes = bboxes.cpu().numpy()
242
297
  if isinstance(masks, torch.Tensor):
243
298
  masks = masks.cpu().numpy().astype(int)
299
+ if isinstance(kpts, torch.Tensor):
300
+ kpts = kpts.cpu().numpy()
244
301
  if isinstance(batch_idx, torch.Tensor):
245
302
  batch_idx = batch_idx.cpu().numpy()
246
303
 
@@ -300,6 +357,21 @@ def plot_images(images,
300
357
  label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
301
358
  annotator.box_label(box, label, color=color)
302
359
 
360
+ # Plot keypoints
361
+ if len(kpts):
362
+ kpts_ = kpts[idx].copy()
363
+ if len(kpts_):
364
+ if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01: # if normalized with tolerance .01
365
+ kpts_[..., 0] *= w # scale to pixels
366
+ kpts_[..., 1] *= h
367
+ elif scale < 1: # absolute coords need scale if image scales
368
+ kpts_ *= scale
369
+ kpts_[..., 0] += x
370
+ kpts_[..., 1] += y
371
+ for j in range(len(kpts_)):
372
+ if labels or conf[j] > 0.25: # 0.25 conf thresh
373
+ annotator.kpts(kpts_[j])
374
+
303
375
  # Plot masks
304
376
  if len(masks):
305
377
  if idx.shape[0] == masks.shape[0]: # overlap_masks=False
@@ -307,7 +379,7 @@ def plot_images(images,
307
379
  else: # overlap_masks=True
308
380
  image_masks = masks[[i]] # (1, 640, 640)
309
381
  nl = idx.sum()
310
- index = np.arange(nl).reshape(nl, 1, 1) + 1
382
+ index = np.arange(nl).reshape((nl, 1, 1)) + 1
311
383
  image_masks = np.repeat(image_masks, nl, axis=0)
312
384
  image_masks = np.where(image_masks == index, 1.0, 0.0)
313
385
 
@@ -328,13 +400,16 @@ def plot_images(images,
328
400
  annotator.im.save(fname) # save
329
401
 
330
402
 
331
- def plot_results(file='path/to/results.csv', dir='', segment=False):
403
+ def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
332
404
  # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
333
405
  import pandas as pd
334
406
  save_dir = Path(file).parent if file else Path(dir)
335
407
  if segment:
336
408
  fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
337
409
  index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
410
+ elif pose:
411
+ fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True)
412
+ index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13]
338
413
  else:
339
414
  fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
340
415
  index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7]
@@ -240,8 +240,8 @@ def copy_attr(a, b, include=(), exclude=()):
240
240
 
241
241
 
242
242
  def get_latest_opset():
243
- # Return max supported ONNX opset by this version of torch
244
- return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) # opset
243
+ # Return second-most (for maturity) recently supported ONNX opset by this version of torch
244
+ return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset
245
245
 
246
246
 
247
247
  def intersect_dicts(da, db, exclude=()):
@@ -318,18 +318,18 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
318
318
  """
319
319
  Strip optimizer from 'f' to finalize training, optionally save as 's'.
320
320
 
321
- Usage:
322
- from ultralytics.yolo.utils.torch_utils import strip_optimizer
323
- from pathlib import Path
324
- for f in Path('/Users/glennjocher/Downloads/weights').glob('*.pt'):
325
- strip_optimizer(f)
326
-
327
321
  Args:
328
322
  f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
329
323
  s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
330
324
 
331
325
  Returns:
332
326
  None
327
+
328
+ Usage:
329
+ from pathlib import Path
330
+ from ultralytics.yolo.utils.torch_utils import strip_optimizer
331
+ for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'):
332
+ strip_optimizer(f)
333
333
  """
334
334
  x = torch.load(f, map_location=torch.device('cpu'))
335
335
  args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args
@@ -349,7 +349,9 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
349
349
 
350
350
 
351
351
  def profile(input, ops, n=10, device=None):
352
- """ YOLOv8 speed/memory/FLOPs profiler
352
+ """
353
+ YOLOv8 speed/memory/FLOPs profiler
354
+
353
355
  Usage:
354
356
  input = torch.randn(16, 3, 640, 640)
355
357
  m1 = lambda x: x * torch.sigmoid(x)
@@ -1,5 +1,5 @@
1
1
  # Ultralytics YOLO 🚀, GPL-3.0 license
2
2
 
3
- from ultralytics.yolo.v8 import classify, detect, segment
3
+ from ultralytics.yolo.v8 import classify, detect, pose, segment
4
4
 
5
- __all__ = 'classify', 'segment', 'detect'
5
+ __all__ = 'classify', 'segment', 'detect', 'pose'
@@ -41,7 +41,7 @@ class DetectionTrainer(BaseTrainer):
41
41
  shuffle=mode == 'train',
42
42
  seed=self.args.seed)[0] if self.args.v5loader else \
43
43
  build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode,
44
- rect=mode == 'val', names=self.data['names'])[0]
44
+ rect=mode == 'val', data_info=self.data)[0]
45
45
 
46
46
  def preprocess_batch(self, batch):
47
47
  batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
@@ -41,7 +41,7 @@ class DetectionValidator(BaseValidator):
41
41
 
42
42
  def init_metrics(self, model):
43
43
  val = self.data.get(self.args.split, '') # validation path
44
- self.is_coco = isinstance(val, str) and val.endswith(f'coco{os.sep}val2017.txt') # is COCO dataset
44
+ self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO
45
45
  self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000))
46
46
  self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO
47
47
  self.names = model.names
@@ -179,7 +179,7 @@ class DetectionValidator(BaseValidator):
179
179
  prefix=colorstr(f'{self.args.mode}: '),
180
180
  shuffle=False,
181
181
  seed=self.args.seed)[0] if self.args.v5loader else \
182
- build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, names=self.data['names'],
182
+ build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, data_info=self.data,
183
183
  mode='val')[0]
184
184
 
185
185
  def plot_val_samples(self, batch, ni):
@@ -0,0 +1,7 @@
1
+ # Ultralytics YOLO 🚀, GPL-3.0 license
2
+
3
+ from .predict import PosePredictor, predict
4
+ from .train import PoseTrainer, train
5
+ from .val import PoseValidator, val
6
+
7
+ __all__ = 'PoseTrainer', 'train', 'PoseValidator', 'val', 'PosePredictor', 'predict'
@@ -0,0 +1,103 @@
1
+ # Ultralytics YOLO 🚀, GPL-3.0 license
2
+
3
+ from ultralytics.yolo.engine.results import Results
4
+ from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
5
+ from ultralytics.yolo.utils.plotting import colors, save_one_box
6
+ from ultralytics.yolo.v8.detect.predict import DetectionPredictor
7
+
8
+
9
+ class PosePredictor(DetectionPredictor):
10
+
11
+ def postprocess(self, preds, img, orig_img):
12
+ preds = ops.non_max_suppression(preds,
13
+ self.args.conf,
14
+ self.args.iou,
15
+ agnostic=self.args.agnostic_nms,
16
+ max_det=self.args.max_det,
17
+ classes=self.args.classes,
18
+ nc=len(self.model.names))
19
+
20
+ results = []
21
+ for i, pred in enumerate(preds):
22
+ orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img
23
+ shape = orig_img.shape
24
+ pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
25
+ pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
26
+ pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape)
27
+ path, _, _, _, _ = self.batch
28
+ img_path = path[i] if isinstance(path, list) else path
29
+ results.append(
30
+ Results(orig_img=orig_img,
31
+ path=img_path,
32
+ names=self.model.names,
33
+ boxes=pred[:, :6],
34
+ keypoints=pred_kpts))
35
+ return results
36
+
37
+ def write_results(self, idx, results, batch):
38
+ p, im, im0 = batch
39
+ log_string = ''
40
+ if len(im.shape) == 3:
41
+ im = im[None] # expand for batch dim
42
+ self.seen += 1
43
+ imc = im0.copy() if self.args.save_crop else im0
44
+ if self.source_type.webcam or self.source_type.from_img: # batch_size >= 1
45
+ log_string += f'{idx}: '
46
+ frame = self.dataset.count
47
+ else:
48
+ frame = getattr(self.dataset, 'frame', 0)
49
+ self.data_path = p
50
+ self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}')
51
+ log_string += '%gx%g ' % im.shape[2:] # print string
52
+ self.annotator = self.get_annotator(im0)
53
+
54
+ det = results[idx].boxes # TODO: make boxes inherit from tensors
55
+ if len(det) == 0:
56
+ return f'{log_string}(no detections), '
57
+ for c in det.cls.unique():
58
+ n = (det.cls == c).sum() # detections per class
59
+ log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, "
60
+
61
+ kpts = reversed(results[idx].keypoints)
62
+ for k in kpts:
63
+ self.annotator.kpts(k, shape=results[idx].orig_shape)
64
+
65
+ # write
66
+ for j, d in enumerate(reversed(det)):
67
+ c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
68
+ if self.args.save_txt: # Write to file
69
+ kpt = (kpts[j][:, :2] / d.orig_shape[[1, 0]]).reshape(-1).tolist()
70
+ box = d.xywhn.view(-1).tolist()
71
+ line = (c, *box, *kpt) + (conf, ) * self.args.save_conf + (() if id is None else (id, ))
72
+ with open(f'{self.txt_path}.txt', 'a') as f:
73
+ f.write(('%g ' * len(line)).rstrip() % line + '\n')
74
+ if self.args.save or self.args.show: # Add bbox to image
75
+ name = ('' if id is None else f'id:{id} ') + self.model.names[c]
76
+ label = (f'{name} {conf:.2f}' if self.args.show_conf else name) if self.args.show_labels else None
77
+ if self.args.boxes:
78
+ self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
79
+ if self.args.save_crop:
80
+ save_one_box(d.xyxy,
81
+ imc,
82
+ file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg',
83
+ BGR=True)
84
+
85
+ return log_string
86
+
87
+
88
+ def predict(cfg=DEFAULT_CFG, use_python=False):
89
+ model = cfg.model or 'yolov8n-pose.pt'
90
+ source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
91
+ else 'https://ultralytics.com/images/bus.jpg'
92
+
93
+ args = dict(model=model, source=source)
94
+ if use_python:
95
+ from ultralytics import YOLO
96
+ YOLO(model)(**args)
97
+ else:
98
+ predictor = PosePredictor(overrides=args)
99
+ predictor.predict_cli()
100
+
101
+
102
+ if __name__ == '__main__':
103
+ predict()