PyPI - ultralytics - Versions diffs - 8.0.64__py3-none-any.whl → 8.0.66__py3-none-any.whl - Mend

ultralytics 8.0.64py3-none-any.whl → 8.0.66py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (41) hide show

ultralytics/__init__.py +1 -1
ultralytics/datasets/coco-pose.yaml +38 -0
ultralytics/datasets/coco8-pose.yaml +25 -0
ultralytics/models/v8/yolov8-pose-p6.yaml +57 -0
ultralytics/models/v8/yolov8-pose.yaml +47 -0
ultralytics/nn/autobackend.py +7 -2
ultralytics/nn/modules.py +33 -2
ultralytics/nn/tasks.py +24 -7
ultralytics/tracker/track.py +2 -3
ultralytics/yolo/cfg/__init__.py +4 -4
ultralytics/yolo/cfg/default.yaml +2 -0
ultralytics/yolo/data/augment.py +24 -19
ultralytics/yolo/data/build.py +4 -4
ultralytics/yolo/data/dataset.py +9 -3
ultralytics/yolo/data/utils.py +110 -34
ultralytics/yolo/engine/exporter.py +9 -7
ultralytics/yolo/engine/model.py +5 -4
ultralytics/yolo/engine/predictor.py +1 -0
ultralytics/yolo/engine/results.py +70 -56
ultralytics/yolo/utils/benchmarks.py +4 -2
ultralytics/yolo/utils/downloads.py +3 -3
ultralytics/yolo/utils/instance.py +1 -1
ultralytics/yolo/utils/loss.py +14 -0
ultralytics/yolo/utils/metrics.py +111 -13
ultralytics/yolo/utils/ops.py +30 -50
ultralytics/yolo/utils/plotting.py +79 -4
ultralytics/yolo/utils/torch_utils.py +11 -9
ultralytics/yolo/v8/__init__.py +2 -2
ultralytics/yolo/v8/detect/train.py +1 -1
ultralytics/yolo/v8/detect/val.py +2 -2
ultralytics/yolo/v8/pose/__init__.py +7 -0
ultralytics/yolo/v8/pose/predict.py +103 -0
ultralytics/yolo/v8/pose/train.py +170 -0
ultralytics/yolo/v8/pose/val.py +213 -0
ultralytics/yolo/v8/segment/val.py +3 -4
{ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/METADATA +27 -2
{ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/RECORD +41 -33
{ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/LICENSE +0 -0
{ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/WHEEL +0 -0
{ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/entry_points.txt +0 -0
{ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/top_level.txt +0 -0

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, GPL-3.0 license
-__version__ = '8.0.64'
+__version__ = '8.0.66'
 from ultralytics.hub import start
 from ultralytics.yolo.engine.model import YOLO

ultralytics/datasets/coco-pose.yaml ADDED Viewed

@@ -0,0 +1,38 @@
+# Ultralytics YOLO 🚀, GPL-3.0 license
+# COCO 2017 dataset http://cocodataset.org by Microsoft
+# Example usage: yolo train data=coco-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco-pose  ← downloads here (20.1 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco-pose  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+# Keypoints
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+# Classes
+names:
+  0: person
+# Download script/URL (optional)
+download: |
+  from ultralytics.yolo.utils.downloads import download
+  from pathlib import Path
+  # Download labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + 'coco2017labels-pose.zip']  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)

ultralytics/datasets/coco8-pose.yaml ADDED Viewed

@@ -0,0 +1,25 @@
+# Ultralytics YOLO 🚀, GPL-3.0 license
+# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
+# Example usage: yolo train data=coco8-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco8-pose  ← downloads here (1 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco8-pose  # dataset root dir
+train: images/train  # train images (relative to 'path') 4 images
+val: images/val  # val images (relative to 'path') 4 images
+test:  # test images (optional)
+# Keypoints
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+# Classes
+names:
+  0: person
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco8-pose.zip

ultralytics/models/v8/yolov8-pose-p6.yaml ADDED Viewed

@@ -0,0 +1,57 @@
+# Ultralytics YOLO 🚀, GPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 1  # number of classes
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 11
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
+  - [-1, 3, C2, [768, False]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2, [512, False]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2, [768, False]]  # 26 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]]  # cat head P6
+  - [-1, 3, C2, [1024, False]]  # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]]  # Pose(P3, P4, P5, P6)

ultralytics/models/v8/yolov8-pose.yaml ADDED Viewed

@@ -0,0 +1,47 @@
+# Ultralytics YOLO 🚀, GPL-3.0 license
+# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+# Parameters
+nc: 1  # number of classes
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Pose, [nc, kpt_shape]]  # Pose(P3, P4, P5)

ultralytics/nn/autobackend.py CHANGED Viewed

@@ -91,8 +91,10 @@ class AutoBackend(nn.Module):
         if nn_module:
             model = weights.to(device)
             model = model.fuse(verbose=verbose) if fuse else model
-            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+            if hasattr(model, 'kpt_shape'):
+                kpt_shape = model.kpt_shape  # pose-only
             stride = max(int(model.stride.max()), 32)  # model stride
+            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()
             self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
             pt = True
@@ -102,6 +104,8 @@ class AutoBackend(nn.Module):
                                          device=device,
                                          inplace=True,
                                          fuse=fuse)
+            if hasattr(model, 'kpt_shape'):
+                kpt_shape = model.kpt_shape  # pose-only
             stride = max(int(model.stride.max()), 32)  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             model.half() if fp16 else model.float()
@@ -268,13 +272,14 @@ class AutoBackend(nn.Module):
             for k, v in metadata.items():
                 if k in ('stride', 'batch'):
                     metadata[k] = int(v)
-                elif k in ('imgsz', 'names') and isinstance(v, str):
+                elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str):
                     metadata[k] = eval(v)
             stride = metadata['stride']
             task = metadata['task']
             batch = metadata['batch']
             imgsz = metadata['imgsz']
             names = metadata['names']
+            kpt_shape = metadata.get('kpt_shape')
         elif not (pt or triton or nn_module):
             LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")

ultralytics/nn/modules.py CHANGED Viewed

@@ -378,7 +378,9 @@ class Ensemble(nn.ModuleList):
         return y, None  # inference, train output
-# heads
+# Model heads below ----------------------------------------------------------------------------------------------------
 class Detect(nn.Module):
     # YOLOv8 Detect head for detection models
     dynamic = False  # force grid reconstruction
@@ -394,7 +396,6 @@ class Detect(nn.Module):
         self.reg_max = 16  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
         self.no = nc + self.reg_max * 4  # number of outputs per anchor
         self.stride = torch.zeros(self.nl)  # strides computed during build
         c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc)  # channels
         self.cv2 = nn.ModuleList(
             nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
@@ -454,6 +455,36 @@ class Segment(Detect):
         return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
+class Pose(Detect):
+    # YOLOv8 Pose head for keypoints models
+    def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
+        super().__init__(nc, ch)
+        self.kpt_shape = kpt_shape  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+        self.nk = kpt_shape[0] * kpt_shape[1]  # number of keypoints total
+        self.detect = Detect.forward
+        c4 = max(ch[0] // 4, self.nk)
+        self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
+    def forward(self, x):
+        bs = x[0].shape[0]  # batch size
+        kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1)  # (bs, 17*3, h*w)
+        x = self.detect(self, x)
+        if self.training:
+            return x, kpt
+        pred_kpt = self.kpts_decode(kpt)
+        return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
+    def kpts_decode(self, kpts):
+        ndim = self.kpt_shape[1]
+        y = kpts.clone()
+        if ndim == 3:
+            y[:, 2::3].sigmoid_()  # inplace sigmoid
+        y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
+        y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
+        return y
 class Classify(nn.Module):
     # YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups

ultralytics/nn/tasks.py CHANGED Viewed

@@ -10,7 +10,7 @@ import torch.nn as nn
 from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify,
                                     Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus,
-                                    GhostBottleneck, GhostConv, Segment)
+                                    GhostBottleneck, GhostConv, Pose, Segment)
 from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
 from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml
 from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights,
@@ -183,10 +183,10 @@ class DetectionModel(BaseModel):
         # Build strides
         m = self.model[-1]  # Detect()
-        if isinstance(m, (Detect, Segment)):
+        if isinstance(m, (Detect, Segment, Pose)):
             s = 256  # 2x min stride
             m.inplace = self.inplace
-            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
+            forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x)
             m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
             self.stride = m.stride
             m.bias_init()  # only run once
@@ -242,12 +242,23 @@ class DetectionModel(BaseModel):
 class SegmentationModel(DetectionModel):
     # YOLOv8 segmentation model
     def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
-        super().__init__(cfg, ch, nc, verbose)
+        super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
     def _forward_augment(self, x):
         raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
+class PoseModel(DetectionModel):
+    # YOLOv8 pose model
+    def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
+        if not isinstance(cfg, dict):
+            cfg = yaml_model_load(cfg)  # load model YAML
+        if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
+            LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}")
+            cfg['kpt_shape'] = data_kpt_shape
+        super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
 class ClassificationModel(BaseModel):
     # YOLOv8 classification model
     def __init__(self,
@@ -425,7 +436,7 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
     # Args
     max_channels = float('inf')
     nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
-    depth, width = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple'))
+    depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
     if scales:
         scale = d.get('scale')
         if not scale:
@@ -464,7 +475,7 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
             args = [ch[f]]
         elif m is Concat:
             c2 = sum(ch[x] for x in f)
-        elif m in (Detect, Segment):
+        elif m in (Detect, Segment, Pose):
             args.append([ch[x] for x in f])
             if m is Segment:
                 args[2] = make_divisible(min(args[2], max_channels) * width, 8)
@@ -543,6 +554,8 @@ def guess_model_task(model):
             return 'detect'
         if m == 'segment':
             return 'segment'
+        if m == 'pose':
+            return 'pose'
     # Guess from model cfg
     if isinstance(model, dict):
@@ -565,6 +578,8 @@ def guess_model_task(model):
                 return 'segment'
             elif isinstance(m, Classify):
                 return 'classify'
+            elif isinstance(m, Pose):
+                return 'pose'
     # Guess from model filename
     if isinstance(model, (str, Path)):
@@ -573,10 +588,12 @@ def guess_model_task(model):
             return 'segment'
         elif '-cls' in model.stem or 'classify' in model.parts:
             return 'classify'
+        elif '-pose' in model.stem or 'pose' in model.parts:
+            return 'pose'
         elif 'detect' in model.parts:
             return 'detect'
     # Unable to determine task from model
     LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
-                   "Explicitly define task for your model, i.e. 'task=detect', 'task=segment' or 'task=classify'.")
+                   "Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.")
     return 'detect'  # assume detect

ultralytics/tracker/track.py CHANGED Viewed

@@ -33,10 +33,9 @@ def on_predict_postprocess_end(predictor):
         tracks = predictor.trackers[i].update(det, im0s[i])
         if len(tracks) == 0:
             continue
+        idx = tracks[:, -1].tolist()
+        predictor.results[i] = predictor.results[i][idx]
         predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
-        if predictor.results[i].masks is not None:
-            idx = tracks[:, -1].tolist()
-            predictor.results[i].masks = predictor.results[i].masks[idx]
 def register_tracker(model):

ultralytics/yolo/cfg/__init__.py CHANGED Viewed

@@ -18,13 +18,13 @@ TASKS = 'detect', 'segment', 'classify', 'pose'
 TASK2DATA = {
     'detect': 'coco128.yaml',
     'segment': 'coco128-seg.yaml',
-    'pose': 'coco128-pose.yaml',
-    'classify': 'imagenet100'}
+    'classify': 'imagenet100',
+    'pose': 'coco128-pose.yaml'}
 TASK2MODEL = {
     'detect': 'yolov8n.pt',
     'segment': 'yolov8n-seg.pt',
-    'pose': 'yolov8n-pose.yaml',
-    'classify': 'yolov8n-cls.pt'}  # temp
+    'classify': 'yolov8n-cls.pt',
+    'pose': 'yolov8n-pose.yaml'}
 CLI_HELP_MSG = \
     f"""

ultralytics/yolo/cfg/default.yaml CHANGED Viewed

@@ -88,6 +88,8 @@ warmup_bias_lr: 0.1  # warmup initial bias lr
 box: 7.5  # box loss gain
 cls: 0.5  # cls loss gain (scale with pixels)
 dfl: 1.5  # dfl loss gain
+pose: 12.0  # pose loss gain
+kobj: 1.0  # keypoint obj loss gain
 fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
 label_smoothing: 0.0  # label smoothing (fraction)
 nbs: 64  # nominal batch size

ultralytics/yolo/data/augment.py CHANGED Viewed

@@ -16,6 +16,8 @@ from ..utils.metrics import bbox_ioa
 from ..utils.ops import segment2box
 from .utils import polygons2masks, polygons2masks_overlap
+POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
 class BaseTransform:
@@ -309,27 +311,22 @@ class RandomPerspective:
         """apply affine to keypoints.
         Args:
-            keypoints(ndarray): keypoints, [N, 17, 2].
+            keypoints(ndarray): keypoints, [N, 17, 3].
             M(ndarray): affine matrix.
         Return:
-            new_keypoints(ndarray): keypoints after affine, [N, 17, 2].
+            new_keypoints(ndarray): keypoints after affine, [N, 17, 3].
         """
-        n = len(keypoints)
+        n, nkpt = keypoints.shape[:2]
         if n == 0:
             return keypoints
-        new_keypoints = np.ones((n * 17, 3))
-        new_keypoints[:, :2] = keypoints.reshape(n * 17, 2)  # num_kpt is hardcoded to 17
-        new_keypoints = new_keypoints @ M.T  # transform
-        new_keypoints = (new_keypoints[:, :2] / new_keypoints[:, 2:3]).reshape(n, 34)  # perspective rescale or affine
-        new_keypoints[keypoints.reshape(-1, 34) == 0] = 0
-        x_kpts = new_keypoints[:, list(range(0, 34, 2))]
-        y_kpts = new_keypoints[:, list(range(1, 34, 2))]
-        x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
-        y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
-        new_keypoints[:, list(range(0, 34, 2))] = x_kpts
-        new_keypoints[:, list(range(1, 34, 2))] = y_kpts
-        return new_keypoints.reshape(n, 17, 2)
+        xy = np.ones((n * nkpt, 3))
+        visible = keypoints[..., 2].reshape(n * nkpt, 1)
+        xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
+        xy = xy @ M.T  # transform
+        xy = xy[:, :2] / xy[:, 2:3]  # perspective rescale or affine
+        out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
+        visible[out_mask] = 0
+        return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
     def __call__(self, labels):
         """
@@ -415,12 +412,13 @@ class RandomHSV:
 class RandomFlip:
-    def __init__(self, p=0.5, direction='horizontal') -> None:
+    def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
         assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
         assert 0 <= p <= 1.0
         self.p = p
         self.direction = direction
+        self.flip_idx = flip_idx
     def __call__(self, labels):
         img = labels['img']
@@ -437,6 +435,9 @@ class RandomFlip:
         if self.direction == 'horizontal' and random.random() < self.p:
             img = np.fliplr(img)
             instances.fliplr(w)
+            # for keypoints
+            if self.flip_idx is not None and instances.keypoints is not None:
+                instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
         labels['img'] = np.ascontiguousarray(img)
         labels['instances'] = instances
         return labels
@@ -633,7 +634,7 @@ class Format:
         labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
         labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
         if self.return_keypoint:
-            labels['keypoints'] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2))
+            labels['keypoints'] = torch.from_numpy(instances.keypoints)
         # then we can use collate_fn
         if self.batch_idx:
             labels['batch_idx'] = torch.zeros(nl)
@@ -672,13 +673,17 @@ def v8_transforms(dataset, imgsz, hyp):
             perspective=hyp.perspective,
             pre_transform=LetterBox(new_shape=(imgsz, imgsz)),
         )])
+    flip_idx = dataset.data.get('flip_idx', None)  # for keypoints augmentation
+    if dataset.use_keypoints and flip_idx is None and hyp.fliplr > 0.0:
+        hyp.fliplr = 0.0
+        LOGGER.warning("WARNING ⚠️ No `flip_idx` provided while training keypoints, setting augmentation 'fliplr=0.0'")
     return Compose([
         pre_transform,
         MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
         Albumentations(p=1.0),
         RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
         RandomFlip(direction='vertical', p=hyp.flipud),
-        RandomFlip(direction='horizontal', p=hyp.fliplr)])  # transforms
+        RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)])  # transforms
 # Classification augmentations -----------------------------------------------------------------------------------------

ultralytics/yolo/data/build.py CHANGED Viewed

@@ -61,7 +61,7 @@ def seed_worker(worker_id):  # noqa
     random.seed(worker_seed)
-def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode='train'):
+def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
     assert mode in ['train', 'val']
     shuffle = mode == 'train'
     if cfg.rect and shuffle:
@@ -81,9 +81,9 @@ def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, ra
             pad=0.0 if mode == 'train' else 0.5,
             prefix=colorstr(f'{mode}: '),
             use_segments=cfg.task == 'segment',
-            use_keypoints=cfg.task == 'keypoint',
-            names=names,
-            classes=cfg.classes)
+            use_keypoints=cfg.task == 'pose',
+            classes=cfg.classes,
+            data=data_info)
     batch = min(batch, len(dataset))
     nd = torch.cuda.device_count()  # number of CUDA devices

ultralytics/yolo/data/dataset.py CHANGED Viewed

@@ -57,11 +57,11 @@ class YOLODataset(BaseDataset):
                  single_cls=False,
                  use_segments=False,
                  use_keypoints=False,
-                 names=None,
+                 data=None,
                  classes=None):
         self.use_segments = use_segments
         self.use_keypoints = use_keypoints
-        self.names = names
+        self.data = data
         assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
         super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
                          classes)
@@ -77,10 +77,16 @@ class YOLODataset(BaseDataset):
         nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
         desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
         total = len(self.im_files)
+        nc = len(self.data['names'])
+        nkpt, ndim = self.data.get('kpt_shape', (0, 0))
+        if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
+            raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
+                             "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
         with ThreadPool(NUM_THREADS) as pool:
             results = pool.imap(func=verify_image_label,
                                 iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
-                                             repeat(self.use_keypoints), repeat(len(self.names))))
+                                             repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt),
+                                             repeat(ndim)))
             pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
             for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
                 nm += nm_f

ultralytics 8.0.64__py3-none-any.whl → 8.0.66__py3-none-any.whl

Potentially problematic release.

ultralytics 8.0.64py3-none-any.whl → 8.0.66py3-none-any.whl