ultralytics 8.0.64__py3-none-any.whl → 8.0.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (41) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/datasets/coco-pose.yaml +38 -0
  3. ultralytics/datasets/coco8-pose.yaml +25 -0
  4. ultralytics/models/v8/yolov8-pose-p6.yaml +57 -0
  5. ultralytics/models/v8/yolov8-pose.yaml +47 -0
  6. ultralytics/nn/autobackend.py +7 -2
  7. ultralytics/nn/modules.py +33 -2
  8. ultralytics/nn/tasks.py +24 -7
  9. ultralytics/tracker/track.py +2 -3
  10. ultralytics/yolo/cfg/__init__.py +4 -4
  11. ultralytics/yolo/cfg/default.yaml +2 -0
  12. ultralytics/yolo/data/augment.py +24 -19
  13. ultralytics/yolo/data/build.py +4 -4
  14. ultralytics/yolo/data/dataset.py +9 -3
  15. ultralytics/yolo/data/utils.py +110 -34
  16. ultralytics/yolo/engine/exporter.py +9 -7
  17. ultralytics/yolo/engine/model.py +5 -4
  18. ultralytics/yolo/engine/predictor.py +1 -0
  19. ultralytics/yolo/engine/results.py +70 -56
  20. ultralytics/yolo/utils/benchmarks.py +4 -2
  21. ultralytics/yolo/utils/downloads.py +3 -3
  22. ultralytics/yolo/utils/instance.py +1 -1
  23. ultralytics/yolo/utils/loss.py +14 -0
  24. ultralytics/yolo/utils/metrics.py +111 -13
  25. ultralytics/yolo/utils/ops.py +30 -50
  26. ultralytics/yolo/utils/plotting.py +79 -4
  27. ultralytics/yolo/utils/torch_utils.py +11 -9
  28. ultralytics/yolo/v8/__init__.py +2 -2
  29. ultralytics/yolo/v8/detect/train.py +1 -1
  30. ultralytics/yolo/v8/detect/val.py +2 -2
  31. ultralytics/yolo/v8/pose/__init__.py +7 -0
  32. ultralytics/yolo/v8/pose/predict.py +103 -0
  33. ultralytics/yolo/v8/pose/train.py +170 -0
  34. ultralytics/yolo/v8/pose/val.py +213 -0
  35. ultralytics/yolo/v8/segment/val.py +3 -4
  36. {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/METADATA +27 -2
  37. {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/RECORD +41 -33
  38. {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/LICENSE +0 -0
  39. {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/WHEEL +0 -0
  40. {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/entry_points.txt +0 -0
  41. {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/top_level.txt +0 -0
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics YOLO 🚀, GPL-3.0 license
2
2
 
3
- __version__ = '8.0.64'
3
+ __version__ = '8.0.66'
4
4
 
5
5
  from ultralytics.hub import start
6
6
  from ultralytics.yolo.engine.model import YOLO
@@ -0,0 +1,38 @@
1
+ # Ultralytics YOLO 🚀, GPL-3.0 license
2
+ # COCO 2017 dataset http://cocodataset.org by Microsoft
3
+ # Example usage: yolo train data=coco-pose.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco-pose ← downloads here (20.1 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco-pose # dataset root dir
12
+ train: train2017.txt # train images (relative to 'path') 118287 images
13
+ val: val2017.txt # val images (relative to 'path') 5000 images
14
+ test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
15
+
16
+ # Keypoints
17
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
18
+ flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
19
+
20
+ # Classes
21
+ names:
22
+ 0: person
23
+
24
+ # Download script/URL (optional)
25
+ download: |
26
+ from ultralytics.yolo.utils.downloads import download
27
+ from pathlib import Path
28
+
29
+ # Download labels
30
+ dir = Path(yaml['path']) # dataset root dir
31
+ url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
32
+ urls = [url + 'coco2017labels-pose.zip'] # labels
33
+ download(urls, dir=dir.parent)
34
+ # Download data
35
+ urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
36
+ 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
37
+ 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
38
+ download(urls, dir=dir / 'images', threads=3)
@@ -0,0 +1,25 @@
1
+ # Ultralytics YOLO 🚀, GPL-3.0 license
2
+ # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
3
+ # Example usage: yolo train data=coco8-pose.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco8-pose ← downloads here (1 MB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco8-pose # dataset root dir
12
+ train: images/train # train images (relative to 'path') 4 images
13
+ val: images/val # val images (relative to 'path') 4 images
14
+ test: # test images (optional)
15
+
16
+ # Keypoints
17
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
18
+ flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
19
+
20
+ # Classes
21
+ names:
22
+ 0: person
23
+
24
+ # Download script/URL (optional)
25
+ download: https://ultralytics.com/assets/coco8-pose.zip
@@ -0,0 +1,57 @@
1
+ # Ultralytics YOLO 🚀, GPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 1 # number of classes
6
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv8.0x6 backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C2f, [128, True]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C2f, [256, True]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C2f, [512, True]]
25
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C2f, [768, True]]
27
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
28
+ - [-1, 3, C2f, [1024, True]]
29
+ - [-1, 1, SPPF, [1024, 5]] # 11
30
+
31
+ # YOLOv8.0x6 head
32
+ head:
33
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
35
+ - [-1, 3, C2, [768, False]] # 14
36
+
37
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 3, C2, [512, False]] # 17
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
47
+ - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
51
+ - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
52
+
53
+ - [-1, 1, Conv, [768, 3, 2]]
54
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
55
+ - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
56
+
57
+ - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
@@ -0,0 +1,47 @@
1
+ # Ultralytics YOLO 🚀, GPL-3.0 license
2
+ # YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
3
+
4
+ # Parameters
5
+ nc: 1 # number of classes
6
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv8.0n backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C2f, [128, True]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C2f, [256, True]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C2f, [512, True]]
25
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C2f, [1024, True]]
27
+ - [-1, 1, SPPF, [1024, 5]] # 9
28
+
29
+ # YOLOv8.0n head
30
+ head:
31
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
32
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33
+ - [-1, 3, C2f, [512]] # 12
34
+
35
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
36
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
37
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
38
+
39
+ - [-1, 1, Conv, [256, 3, 2]]
40
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
41
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
42
+
43
+ - [-1, 1, Conv, [512, 3, 2]]
44
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
45
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
46
+
47
+ - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
@@ -91,8 +91,10 @@ class AutoBackend(nn.Module):
91
91
  if nn_module:
92
92
  model = weights.to(device)
93
93
  model = model.fuse(verbose=verbose) if fuse else model
94
- names = model.module.names if hasattr(model, 'module') else model.names # get class names
94
+ if hasattr(model, 'kpt_shape'):
95
+ kpt_shape = model.kpt_shape # pose-only
95
96
  stride = max(int(model.stride.max()), 32) # model stride
97
+ names = model.module.names if hasattr(model, 'module') else model.names # get class names
96
98
  model.half() if fp16 else model.float()
97
99
  self.model = model # explicitly assign for to(), cpu(), cuda(), half()
98
100
  pt = True
@@ -102,6 +104,8 @@ class AutoBackend(nn.Module):
102
104
  device=device,
103
105
  inplace=True,
104
106
  fuse=fuse)
107
+ if hasattr(model, 'kpt_shape'):
108
+ kpt_shape = model.kpt_shape # pose-only
105
109
  stride = max(int(model.stride.max()), 32) # model stride
106
110
  names = model.module.names if hasattr(model, 'module') else model.names # get class names
107
111
  model.half() if fp16 else model.float()
@@ -268,13 +272,14 @@ class AutoBackend(nn.Module):
268
272
  for k, v in metadata.items():
269
273
  if k in ('stride', 'batch'):
270
274
  metadata[k] = int(v)
271
- elif k in ('imgsz', 'names') and isinstance(v, str):
275
+ elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str):
272
276
  metadata[k] = eval(v)
273
277
  stride = metadata['stride']
274
278
  task = metadata['task']
275
279
  batch = metadata['batch']
276
280
  imgsz = metadata['imgsz']
277
281
  names = metadata['names']
282
+ kpt_shape = metadata.get('kpt_shape')
278
283
  elif not (pt or triton or nn_module):
279
284
  LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
280
285
 
ultralytics/nn/modules.py CHANGED
@@ -378,7 +378,9 @@ class Ensemble(nn.ModuleList):
378
378
  return y, None # inference, train output
379
379
 
380
380
 
381
- # heads
381
+ # Model heads below ----------------------------------------------------------------------------------------------------
382
+
383
+
382
384
  class Detect(nn.Module):
383
385
  # YOLOv8 Detect head for detection models
384
386
  dynamic = False # force grid reconstruction
@@ -394,7 +396,6 @@ class Detect(nn.Module):
394
396
  self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
395
397
  self.no = nc + self.reg_max * 4 # number of outputs per anchor
396
398
  self.stride = torch.zeros(self.nl) # strides computed during build
397
-
398
399
  c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
399
400
  self.cv2 = nn.ModuleList(
400
401
  nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
@@ -454,6 +455,36 @@ class Segment(Detect):
454
455
  return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
455
456
 
456
457
 
458
+ class Pose(Detect):
459
+ # YOLOv8 Pose head for keypoints models
460
+ def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
461
+ super().__init__(nc, ch)
462
+ self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
463
+ self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
464
+ self.detect = Detect.forward
465
+
466
+ c4 = max(ch[0] // 4, self.nk)
467
+ self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
468
+
469
+ def forward(self, x):
470
+ bs = x[0].shape[0] # batch size
471
+ kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
472
+ x = self.detect(self, x)
473
+ if self.training:
474
+ return x, kpt
475
+ pred_kpt = self.kpts_decode(kpt)
476
+ return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
477
+
478
+ def kpts_decode(self, kpts):
479
+ ndim = self.kpt_shape[1]
480
+ y = kpts.clone()
481
+ if ndim == 3:
482
+ y[:, 2::3].sigmoid_() # inplace sigmoid
483
+ y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
484
+ y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
485
+ return y
486
+
487
+
457
488
  class Classify(nn.Module):
458
489
  # YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
459
490
  def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
ultralytics/nn/tasks.py CHANGED
@@ -10,7 +10,7 @@ import torch.nn as nn
10
10
 
11
11
  from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify,
12
12
  Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus,
13
- GhostBottleneck, GhostConv, Segment)
13
+ GhostBottleneck, GhostConv, Pose, Segment)
14
14
  from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
15
15
  from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml
16
16
  from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights,
@@ -183,10 +183,10 @@ class DetectionModel(BaseModel):
183
183
 
184
184
  # Build strides
185
185
  m = self.model[-1] # Detect()
186
- if isinstance(m, (Detect, Segment)):
186
+ if isinstance(m, (Detect, Segment, Pose)):
187
187
  s = 256 # 2x min stride
188
188
  m.inplace = self.inplace
189
- forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
189
+ forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x)
190
190
  m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
191
191
  self.stride = m.stride
192
192
  m.bias_init() # only run once
@@ -242,12 +242,23 @@ class DetectionModel(BaseModel):
242
242
  class SegmentationModel(DetectionModel):
243
243
  # YOLOv8 segmentation model
244
244
  def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
245
- super().__init__(cfg, ch, nc, verbose)
245
+ super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
246
246
 
247
247
  def _forward_augment(self, x):
248
248
  raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
249
249
 
250
250
 
251
+ class PoseModel(DetectionModel):
252
+ # YOLOv8 pose model
253
+ def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
254
+ if not isinstance(cfg, dict):
255
+ cfg = yaml_model_load(cfg) # load model YAML
256
+ if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
257
+ LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}")
258
+ cfg['kpt_shape'] = data_kpt_shape
259
+ super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
260
+
261
+
251
262
  class ClassificationModel(BaseModel):
252
263
  # YOLOv8 classification model
253
264
  def __init__(self,
@@ -425,7 +436,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
425
436
  # Args
426
437
  max_channels = float('inf')
427
438
  nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
428
- depth, width = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple'))
439
+ depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
429
440
  if scales:
430
441
  scale = d.get('scale')
431
442
  if not scale:
@@ -464,7 +475,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
464
475
  args = [ch[f]]
465
476
  elif m is Concat:
466
477
  c2 = sum(ch[x] for x in f)
467
- elif m in (Detect, Segment):
478
+ elif m in (Detect, Segment, Pose):
468
479
  args.append([ch[x] for x in f])
469
480
  if m is Segment:
470
481
  args[2] = make_divisible(min(args[2], max_channels) * width, 8)
@@ -543,6 +554,8 @@ def guess_model_task(model):
543
554
  return 'detect'
544
555
  if m == 'segment':
545
556
  return 'segment'
557
+ if m == 'pose':
558
+ return 'pose'
546
559
 
547
560
  # Guess from model cfg
548
561
  if isinstance(model, dict):
@@ -565,6 +578,8 @@ def guess_model_task(model):
565
578
  return 'segment'
566
579
  elif isinstance(m, Classify):
567
580
  return 'classify'
581
+ elif isinstance(m, Pose):
582
+ return 'pose'
568
583
 
569
584
  # Guess from model filename
570
585
  if isinstance(model, (str, Path)):
@@ -573,10 +588,12 @@ def guess_model_task(model):
573
588
  return 'segment'
574
589
  elif '-cls' in model.stem or 'classify' in model.parts:
575
590
  return 'classify'
591
+ elif '-pose' in model.stem or 'pose' in model.parts:
592
+ return 'pose'
576
593
  elif 'detect' in model.parts:
577
594
  return 'detect'
578
595
 
579
596
  # Unable to determine task from model
580
597
  LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
581
- "Explicitly define task for your model, i.e. 'task=detect', 'task=segment' or 'task=classify'.")
598
+ "Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.")
582
599
  return 'detect' # assume detect
@@ -33,10 +33,9 @@ def on_predict_postprocess_end(predictor):
33
33
  tracks = predictor.trackers[i].update(det, im0s[i])
34
34
  if len(tracks) == 0:
35
35
  continue
36
+ idx = tracks[:, -1].tolist()
37
+ predictor.results[i] = predictor.results[i][idx]
36
38
  predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
37
- if predictor.results[i].masks is not None:
38
- idx = tracks[:, -1].tolist()
39
- predictor.results[i].masks = predictor.results[i].masks[idx]
40
39
 
41
40
 
42
41
  def register_tracker(model):
@@ -18,13 +18,13 @@ TASKS = 'detect', 'segment', 'classify', 'pose'
18
18
  TASK2DATA = {
19
19
  'detect': 'coco128.yaml',
20
20
  'segment': 'coco128-seg.yaml',
21
- 'pose': 'coco128-pose.yaml',
22
- 'classify': 'imagenet100'}
21
+ 'classify': 'imagenet100',
22
+ 'pose': 'coco128-pose.yaml'}
23
23
  TASK2MODEL = {
24
24
  'detect': 'yolov8n.pt',
25
25
  'segment': 'yolov8n-seg.pt',
26
- 'pose': 'yolov8n-pose.yaml',
27
- 'classify': 'yolov8n-cls.pt'} # temp
26
+ 'classify': 'yolov8n-cls.pt',
27
+ 'pose': 'yolov8n-pose.yaml'}
28
28
 
29
29
  CLI_HELP_MSG = \
30
30
  f"""
@@ -88,6 +88,8 @@ warmup_bias_lr: 0.1 # warmup initial bias lr
88
88
  box: 7.5 # box loss gain
89
89
  cls: 0.5 # cls loss gain (scale with pixels)
90
90
  dfl: 1.5 # dfl loss gain
91
+ pose: 12.0 # pose loss gain
92
+ kobj: 1.0 # keypoint obj loss gain
91
93
  fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
92
94
  label_smoothing: 0.0 # label smoothing (fraction)
93
95
  nbs: 64 # nominal batch size
@@ -16,6 +16,8 @@ from ..utils.metrics import bbox_ioa
16
16
  from ..utils.ops import segment2box
17
17
  from .utils import polygons2masks, polygons2masks_overlap
18
18
 
19
+ POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
20
+
19
21
 
20
22
  # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
21
23
  class BaseTransform:
@@ -309,27 +311,22 @@ class RandomPerspective:
309
311
  """apply affine to keypoints.
310
312
 
311
313
  Args:
312
- keypoints(ndarray): keypoints, [N, 17, 2].
314
+ keypoints(ndarray): keypoints, [N, 17, 3].
313
315
  M(ndarray): affine matrix.
314
316
  Return:
315
- new_keypoints(ndarray): keypoints after affine, [N, 17, 2].
317
+ new_keypoints(ndarray): keypoints after affine, [N, 17, 3].
316
318
  """
317
- n = len(keypoints)
319
+ n, nkpt = keypoints.shape[:2]
318
320
  if n == 0:
319
321
  return keypoints
320
- new_keypoints = np.ones((n * 17, 3))
321
- new_keypoints[:, :2] = keypoints.reshape(n * 17, 2) # num_kpt is hardcoded to 17
322
- new_keypoints = new_keypoints @ M.T # transform
323
- new_keypoints = (new_keypoints[:, :2] / new_keypoints[:, 2:3]).reshape(n, 34) # perspective rescale or affine
324
- new_keypoints[keypoints.reshape(-1, 34) == 0] = 0
325
- x_kpts = new_keypoints[:, list(range(0, 34, 2))]
326
- y_kpts = new_keypoints[:, list(range(1, 34, 2))]
327
-
328
- x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
329
- y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
330
- new_keypoints[:, list(range(0, 34, 2))] = x_kpts
331
- new_keypoints[:, list(range(1, 34, 2))] = y_kpts
332
- return new_keypoints.reshape(n, 17, 2)
322
+ xy = np.ones((n * nkpt, 3))
323
+ visible = keypoints[..., 2].reshape(n * nkpt, 1)
324
+ xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
325
+ xy = xy @ M.T # transform
326
+ xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine
327
+ out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
328
+ visible[out_mask] = 0
329
+ return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
333
330
 
334
331
  def __call__(self, labels):
335
332
  """
@@ -415,12 +412,13 @@ class RandomHSV:
415
412
 
416
413
  class RandomFlip:
417
414
 
418
- def __init__(self, p=0.5, direction='horizontal') -> None:
415
+ def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
419
416
  assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
420
417
  assert 0 <= p <= 1.0
421
418
 
422
419
  self.p = p
423
420
  self.direction = direction
421
+ self.flip_idx = flip_idx
424
422
 
425
423
  def __call__(self, labels):
426
424
  img = labels['img']
@@ -437,6 +435,9 @@ class RandomFlip:
437
435
  if self.direction == 'horizontal' and random.random() < self.p:
438
436
  img = np.fliplr(img)
439
437
  instances.fliplr(w)
438
+ # for keypoints
439
+ if self.flip_idx is not None and instances.keypoints is not None:
440
+ instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
440
441
  labels['img'] = np.ascontiguousarray(img)
441
442
  labels['instances'] = instances
442
443
  return labels
@@ -633,7 +634,7 @@ class Format:
633
634
  labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
634
635
  labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
635
636
  if self.return_keypoint:
636
- labels['keypoints'] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2))
637
+ labels['keypoints'] = torch.from_numpy(instances.keypoints)
637
638
  # then we can use collate_fn
638
639
  if self.batch_idx:
639
640
  labels['batch_idx'] = torch.zeros(nl)
@@ -672,13 +673,17 @@ def v8_transforms(dataset, imgsz, hyp):
672
673
  perspective=hyp.perspective,
673
674
  pre_transform=LetterBox(new_shape=(imgsz, imgsz)),
674
675
  )])
676
+ flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation
677
+ if dataset.use_keypoints and flip_idx is None and hyp.fliplr > 0.0:
678
+ hyp.fliplr = 0.0
679
+ LOGGER.warning("WARNING ⚠️ No `flip_idx` provided while training keypoints, setting augmentation 'fliplr=0.0'")
675
680
  return Compose([
676
681
  pre_transform,
677
682
  MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
678
683
  Albumentations(p=1.0),
679
684
  RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
680
685
  RandomFlip(direction='vertical', p=hyp.flipud),
681
- RandomFlip(direction='horizontal', p=hyp.fliplr)]) # transforms
686
+ RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms
682
687
 
683
688
 
684
689
  # Classification augmentations -----------------------------------------------------------------------------------------
@@ -61,7 +61,7 @@ def seed_worker(worker_id): # noqa
61
61
  random.seed(worker_seed)
62
62
 
63
63
 
64
- def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode='train'):
64
+ def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
65
65
  assert mode in ['train', 'val']
66
66
  shuffle = mode == 'train'
67
67
  if cfg.rect and shuffle:
@@ -81,9 +81,9 @@ def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, ra
81
81
  pad=0.0 if mode == 'train' else 0.5,
82
82
  prefix=colorstr(f'{mode}: '),
83
83
  use_segments=cfg.task == 'segment',
84
- use_keypoints=cfg.task == 'keypoint',
85
- names=names,
86
- classes=cfg.classes)
84
+ use_keypoints=cfg.task == 'pose',
85
+ classes=cfg.classes,
86
+ data=data_info)
87
87
 
88
88
  batch = min(batch, len(dataset))
89
89
  nd = torch.cuda.device_count() # number of CUDA devices
@@ -57,11 +57,11 @@ class YOLODataset(BaseDataset):
57
57
  single_cls=False,
58
58
  use_segments=False,
59
59
  use_keypoints=False,
60
- names=None,
60
+ data=None,
61
61
  classes=None):
62
62
  self.use_segments = use_segments
63
63
  self.use_keypoints = use_keypoints
64
- self.names = names
64
+ self.data = data
65
65
  assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
66
66
  super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
67
67
  classes)
@@ -77,10 +77,16 @@ class YOLODataset(BaseDataset):
77
77
  nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
78
78
  desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
79
79
  total = len(self.im_files)
80
+ nc = len(self.data['names'])
81
+ nkpt, ndim = self.data.get('kpt_shape', (0, 0))
82
+ if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
83
+ raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
84
+ "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
80
85
  with ThreadPool(NUM_THREADS) as pool:
81
86
  results = pool.imap(func=verify_image_label,
82
87
  iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
83
- repeat(self.use_keypoints), repeat(len(self.names))))
88
+ repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt),
89
+ repeat(ndim)))
84
90
  pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
85
91
  for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
86
92
  nm += nm_f