ultralytics 8.0.64__py3-none-any.whl → 8.0.66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- ultralytics/__init__.py +1 -1
- ultralytics/datasets/coco-pose.yaml +38 -0
- ultralytics/datasets/coco8-pose.yaml +25 -0
- ultralytics/models/v8/yolov8-pose-p6.yaml +57 -0
- ultralytics/models/v8/yolov8-pose.yaml +47 -0
- ultralytics/nn/autobackend.py +7 -2
- ultralytics/nn/modules.py +33 -2
- ultralytics/nn/tasks.py +24 -7
- ultralytics/tracker/track.py +2 -3
- ultralytics/yolo/cfg/__init__.py +4 -4
- ultralytics/yolo/cfg/default.yaml +2 -0
- ultralytics/yolo/data/augment.py +24 -19
- ultralytics/yolo/data/build.py +4 -4
- ultralytics/yolo/data/dataset.py +9 -3
- ultralytics/yolo/data/utils.py +110 -34
- ultralytics/yolo/engine/exporter.py +9 -7
- ultralytics/yolo/engine/model.py +5 -4
- ultralytics/yolo/engine/predictor.py +1 -0
- ultralytics/yolo/engine/results.py +70 -56
- ultralytics/yolo/utils/benchmarks.py +4 -2
- ultralytics/yolo/utils/downloads.py +3 -3
- ultralytics/yolo/utils/instance.py +1 -1
- ultralytics/yolo/utils/loss.py +14 -0
- ultralytics/yolo/utils/metrics.py +111 -13
- ultralytics/yolo/utils/ops.py +30 -50
- ultralytics/yolo/utils/plotting.py +79 -4
- ultralytics/yolo/utils/torch_utils.py +11 -9
- ultralytics/yolo/v8/__init__.py +2 -2
- ultralytics/yolo/v8/detect/train.py +1 -1
- ultralytics/yolo/v8/detect/val.py +2 -2
- ultralytics/yolo/v8/pose/__init__.py +7 -0
- ultralytics/yolo/v8/pose/predict.py +103 -0
- ultralytics/yolo/v8/pose/train.py +170 -0
- ultralytics/yolo/v8/pose/val.py +213 -0
- ultralytics/yolo/v8/segment/val.py +3 -4
- {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/METADATA +27 -2
- {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/RECORD +41 -33
- {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/LICENSE +0 -0
- {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/WHEEL +0 -0
- {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.0.64.dist-info → ultralytics-8.0.66.dist-info}/top_level.txt +0 -0
ultralytics/__init__.py
CHANGED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, GPL-3.0 license
|
|
2
|
+
# COCO 2017 dataset http://cocodataset.org by Microsoft
|
|
3
|
+
# Example usage: yolo train data=coco-pose.yaml
|
|
4
|
+
# parent
|
|
5
|
+
# ├── ultralytics
|
|
6
|
+
# └── datasets
|
|
7
|
+
# └── coco-pose ← downloads here (20.1 GB)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
|
11
|
+
path: ../datasets/coco-pose # dataset root dir
|
|
12
|
+
train: train2017.txt # train images (relative to 'path') 118287 images
|
|
13
|
+
val: val2017.txt # val images (relative to 'path') 5000 images
|
|
14
|
+
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
|
|
15
|
+
|
|
16
|
+
# Keypoints
|
|
17
|
+
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
|
18
|
+
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
|
19
|
+
|
|
20
|
+
# Classes
|
|
21
|
+
names:
|
|
22
|
+
0: person
|
|
23
|
+
|
|
24
|
+
# Download script/URL (optional)
|
|
25
|
+
download: |
|
|
26
|
+
from ultralytics.yolo.utils.downloads import download
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
|
|
29
|
+
# Download labels
|
|
30
|
+
dir = Path(yaml['path']) # dataset root dir
|
|
31
|
+
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
|
32
|
+
urls = [url + 'coco2017labels-pose.zip'] # labels
|
|
33
|
+
download(urls, dir=dir.parent)
|
|
34
|
+
# Download data
|
|
35
|
+
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
|
|
36
|
+
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
|
|
37
|
+
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
|
|
38
|
+
download(urls, dir=dir / 'images', threads=3)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, GPL-3.0 license
|
|
2
|
+
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
|
|
3
|
+
# Example usage: yolo train data=coco8-pose.yaml
|
|
4
|
+
# parent
|
|
5
|
+
# ├── ultralytics
|
|
6
|
+
# └── datasets
|
|
7
|
+
# └── coco8-pose ← downloads here (1 MB)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
|
11
|
+
path: ../datasets/coco8-pose # dataset root dir
|
|
12
|
+
train: images/train # train images (relative to 'path') 4 images
|
|
13
|
+
val: images/val # val images (relative to 'path') 4 images
|
|
14
|
+
test: # test images (optional)
|
|
15
|
+
|
|
16
|
+
# Keypoints
|
|
17
|
+
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
|
18
|
+
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
|
19
|
+
|
|
20
|
+
# Classes
|
|
21
|
+
names:
|
|
22
|
+
0: person
|
|
23
|
+
|
|
24
|
+
# Download script/URL (optional)
|
|
25
|
+
download: https://ultralytics.com/assets/coco8-pose.zip
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, GPL-3.0 license
|
|
2
|
+
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 1 # number of classes
|
|
6
|
+
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
|
7
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
|
8
|
+
# [depth, width, max_channels]
|
|
9
|
+
n: [0.33, 0.25, 1024]
|
|
10
|
+
s: [0.33, 0.50, 1024]
|
|
11
|
+
m: [0.67, 0.75, 768]
|
|
12
|
+
l: [1.00, 1.00, 512]
|
|
13
|
+
x: [1.00, 1.25, 512]
|
|
14
|
+
|
|
15
|
+
# YOLOv8.0x6 backbone
|
|
16
|
+
backbone:
|
|
17
|
+
# [from, repeats, module, args]
|
|
18
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
19
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
20
|
+
- [-1, 3, C2f, [128, True]]
|
|
21
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
22
|
+
- [-1, 6, C2f, [256, True]]
|
|
23
|
+
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
|
24
|
+
- [-1, 6, C2f, [512, True]]
|
|
25
|
+
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
|
|
26
|
+
- [-1, 3, C2f, [768, True]]
|
|
27
|
+
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
|
|
28
|
+
- [-1, 3, C2f, [1024, True]]
|
|
29
|
+
- [-1, 1, SPPF, [1024, 5]] # 11
|
|
30
|
+
|
|
31
|
+
# YOLOv8.0x6 head
|
|
32
|
+
head:
|
|
33
|
+
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
|
34
|
+
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
|
35
|
+
- [-1, 3, C2, [768, False]] # 14
|
|
36
|
+
|
|
37
|
+
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
|
38
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
39
|
+
- [-1, 3, C2, [512, False]] # 17
|
|
40
|
+
|
|
41
|
+
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
|
42
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
43
|
+
- [-1, 3, C2, [256, False]] # 20 (P3/8-small)
|
|
44
|
+
|
|
45
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
46
|
+
- [[-1, 17], 1, Concat, [1]] # cat head P4
|
|
47
|
+
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
|
|
48
|
+
|
|
49
|
+
- [-1, 1, Conv, [512, 3, 2]]
|
|
50
|
+
- [[-1, 14], 1, Concat, [1]] # cat head P5
|
|
51
|
+
- [-1, 3, C2, [768, False]] # 26 (P5/32-large)
|
|
52
|
+
|
|
53
|
+
- [-1, 1, Conv, [768, 3, 2]]
|
|
54
|
+
- [[-1, 11], 1, Concat, [1]] # cat head P6
|
|
55
|
+
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
|
|
56
|
+
|
|
57
|
+
- [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, GPL-3.0 license
|
|
2
|
+
# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
|
|
3
|
+
|
|
4
|
+
# Parameters
|
|
5
|
+
nc: 1 # number of classes
|
|
6
|
+
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
|
7
|
+
scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
|
|
8
|
+
# [depth, width, max_channels]
|
|
9
|
+
n: [0.33, 0.25, 1024]
|
|
10
|
+
s: [0.33, 0.50, 1024]
|
|
11
|
+
m: [0.67, 0.75, 768]
|
|
12
|
+
l: [1.00, 1.00, 512]
|
|
13
|
+
x: [1.00, 1.25, 512]
|
|
14
|
+
|
|
15
|
+
# YOLOv8.0n backbone
|
|
16
|
+
backbone:
|
|
17
|
+
# [from, repeats, module, args]
|
|
18
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
19
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
20
|
+
- [-1, 3, C2f, [128, True]]
|
|
21
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
22
|
+
- [-1, 6, C2f, [256, True]]
|
|
23
|
+
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
|
24
|
+
- [-1, 6, C2f, [512, True]]
|
|
25
|
+
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
|
26
|
+
- [-1, 3, C2f, [1024, True]]
|
|
27
|
+
- [-1, 1, SPPF, [1024, 5]] # 9
|
|
28
|
+
|
|
29
|
+
# YOLOv8.0n head
|
|
30
|
+
head:
|
|
31
|
+
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
|
32
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
33
|
+
- [-1, 3, C2f, [512]] # 12
|
|
34
|
+
|
|
35
|
+
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
|
36
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
37
|
+
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
|
38
|
+
|
|
39
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
40
|
+
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
|
41
|
+
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
|
42
|
+
|
|
43
|
+
- [-1, 1, Conv, [512, 3, 2]]
|
|
44
|
+
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
|
45
|
+
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
|
46
|
+
|
|
47
|
+
- [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -91,8 +91,10 @@ class AutoBackend(nn.Module):
|
|
|
91
91
|
if nn_module:
|
|
92
92
|
model = weights.to(device)
|
|
93
93
|
model = model.fuse(verbose=verbose) if fuse else model
|
|
94
|
-
|
|
94
|
+
if hasattr(model, 'kpt_shape'):
|
|
95
|
+
kpt_shape = model.kpt_shape # pose-only
|
|
95
96
|
stride = max(int(model.stride.max()), 32) # model stride
|
|
97
|
+
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
|
96
98
|
model.half() if fp16 else model.float()
|
|
97
99
|
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
|
98
100
|
pt = True
|
|
@@ -102,6 +104,8 @@ class AutoBackend(nn.Module):
|
|
|
102
104
|
device=device,
|
|
103
105
|
inplace=True,
|
|
104
106
|
fuse=fuse)
|
|
107
|
+
if hasattr(model, 'kpt_shape'):
|
|
108
|
+
kpt_shape = model.kpt_shape # pose-only
|
|
105
109
|
stride = max(int(model.stride.max()), 32) # model stride
|
|
106
110
|
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
|
107
111
|
model.half() if fp16 else model.float()
|
|
@@ -268,13 +272,14 @@ class AutoBackend(nn.Module):
|
|
|
268
272
|
for k, v in metadata.items():
|
|
269
273
|
if k in ('stride', 'batch'):
|
|
270
274
|
metadata[k] = int(v)
|
|
271
|
-
elif k in ('imgsz', 'names') and isinstance(v, str):
|
|
275
|
+
elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str):
|
|
272
276
|
metadata[k] = eval(v)
|
|
273
277
|
stride = metadata['stride']
|
|
274
278
|
task = metadata['task']
|
|
275
279
|
batch = metadata['batch']
|
|
276
280
|
imgsz = metadata['imgsz']
|
|
277
281
|
names = metadata['names']
|
|
282
|
+
kpt_shape = metadata.get('kpt_shape')
|
|
278
283
|
elif not (pt or triton or nn_module):
|
|
279
284
|
LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
|
|
280
285
|
|
ultralytics/nn/modules.py
CHANGED
|
@@ -378,7 +378,9 @@ class Ensemble(nn.ModuleList):
|
|
|
378
378
|
return y, None # inference, train output
|
|
379
379
|
|
|
380
380
|
|
|
381
|
-
# heads
|
|
381
|
+
# Model heads below ----------------------------------------------------------------------------------------------------
|
|
382
|
+
|
|
383
|
+
|
|
382
384
|
class Detect(nn.Module):
|
|
383
385
|
# YOLOv8 Detect head for detection models
|
|
384
386
|
dynamic = False # force grid reconstruction
|
|
@@ -394,7 +396,6 @@ class Detect(nn.Module):
|
|
|
394
396
|
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
|
|
395
397
|
self.no = nc + self.reg_max * 4 # number of outputs per anchor
|
|
396
398
|
self.stride = torch.zeros(self.nl) # strides computed during build
|
|
397
|
-
|
|
398
399
|
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
|
|
399
400
|
self.cv2 = nn.ModuleList(
|
|
400
401
|
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
|
|
@@ -454,6 +455,36 @@ class Segment(Detect):
|
|
|
454
455
|
return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
|
|
455
456
|
|
|
456
457
|
|
|
458
|
+
class Pose(Detect):
|
|
459
|
+
# YOLOv8 Pose head for keypoints models
|
|
460
|
+
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
|
|
461
|
+
super().__init__(nc, ch)
|
|
462
|
+
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
|
463
|
+
self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
|
|
464
|
+
self.detect = Detect.forward
|
|
465
|
+
|
|
466
|
+
c4 = max(ch[0] // 4, self.nk)
|
|
467
|
+
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
|
|
468
|
+
|
|
469
|
+
def forward(self, x):
|
|
470
|
+
bs = x[0].shape[0] # batch size
|
|
471
|
+
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
|
|
472
|
+
x = self.detect(self, x)
|
|
473
|
+
if self.training:
|
|
474
|
+
return x, kpt
|
|
475
|
+
pred_kpt = self.kpts_decode(kpt)
|
|
476
|
+
return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
|
|
477
|
+
|
|
478
|
+
def kpts_decode(self, kpts):
|
|
479
|
+
ndim = self.kpt_shape[1]
|
|
480
|
+
y = kpts.clone()
|
|
481
|
+
if ndim == 3:
|
|
482
|
+
y[:, 2::3].sigmoid_() # inplace sigmoid
|
|
483
|
+
y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
|
|
484
|
+
y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
|
|
485
|
+
return y
|
|
486
|
+
|
|
487
|
+
|
|
457
488
|
class Classify(nn.Module):
|
|
458
489
|
# YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
|
459
490
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
ultralytics/nn/tasks.py
CHANGED
|
@@ -10,7 +10,7 @@ import torch.nn as nn
|
|
|
10
10
|
|
|
11
11
|
from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify,
|
|
12
12
|
Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus,
|
|
13
|
-
GhostBottleneck, GhostConv, Segment)
|
|
13
|
+
GhostBottleneck, GhostConv, Pose, Segment)
|
|
14
14
|
from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
|
|
15
15
|
from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml
|
|
16
16
|
from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights,
|
|
@@ -183,10 +183,10 @@ class DetectionModel(BaseModel):
|
|
|
183
183
|
|
|
184
184
|
# Build strides
|
|
185
185
|
m = self.model[-1] # Detect()
|
|
186
|
-
if isinstance(m, (Detect, Segment)):
|
|
186
|
+
if isinstance(m, (Detect, Segment, Pose)):
|
|
187
187
|
s = 256 # 2x min stride
|
|
188
188
|
m.inplace = self.inplace
|
|
189
|
-
forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
|
|
189
|
+
forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x)
|
|
190
190
|
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
|
|
191
191
|
self.stride = m.stride
|
|
192
192
|
m.bias_init() # only run once
|
|
@@ -242,12 +242,23 @@ class DetectionModel(BaseModel):
|
|
|
242
242
|
class SegmentationModel(DetectionModel):
|
|
243
243
|
# YOLOv8 segmentation model
|
|
244
244
|
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
|
|
245
|
-
super().__init__(cfg, ch, nc, verbose)
|
|
245
|
+
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
|
246
246
|
|
|
247
247
|
def _forward_augment(self, x):
|
|
248
248
|
raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
|
|
249
249
|
|
|
250
250
|
|
|
251
|
+
class PoseModel(DetectionModel):
|
|
252
|
+
# YOLOv8 pose model
|
|
253
|
+
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
|
|
254
|
+
if not isinstance(cfg, dict):
|
|
255
|
+
cfg = yaml_model_load(cfg) # load model YAML
|
|
256
|
+
if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
|
|
257
|
+
LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}")
|
|
258
|
+
cfg['kpt_shape'] = data_kpt_shape
|
|
259
|
+
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
|
260
|
+
|
|
261
|
+
|
|
251
262
|
class ClassificationModel(BaseModel):
|
|
252
263
|
# YOLOv8 classification model
|
|
253
264
|
def __init__(self,
|
|
@@ -425,7 +436,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|
|
425
436
|
# Args
|
|
426
437
|
max_channels = float('inf')
|
|
427
438
|
nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
|
|
428
|
-
depth, width = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple'))
|
|
439
|
+
depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
|
|
429
440
|
if scales:
|
|
430
441
|
scale = d.get('scale')
|
|
431
442
|
if not scale:
|
|
@@ -464,7 +475,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|
|
464
475
|
args = [ch[f]]
|
|
465
476
|
elif m is Concat:
|
|
466
477
|
c2 = sum(ch[x] for x in f)
|
|
467
|
-
elif m in (Detect, Segment):
|
|
478
|
+
elif m in (Detect, Segment, Pose):
|
|
468
479
|
args.append([ch[x] for x in f])
|
|
469
480
|
if m is Segment:
|
|
470
481
|
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
|
|
@@ -543,6 +554,8 @@ def guess_model_task(model):
|
|
|
543
554
|
return 'detect'
|
|
544
555
|
if m == 'segment':
|
|
545
556
|
return 'segment'
|
|
557
|
+
if m == 'pose':
|
|
558
|
+
return 'pose'
|
|
546
559
|
|
|
547
560
|
# Guess from model cfg
|
|
548
561
|
if isinstance(model, dict):
|
|
@@ -565,6 +578,8 @@ def guess_model_task(model):
|
|
|
565
578
|
return 'segment'
|
|
566
579
|
elif isinstance(m, Classify):
|
|
567
580
|
return 'classify'
|
|
581
|
+
elif isinstance(m, Pose):
|
|
582
|
+
return 'pose'
|
|
568
583
|
|
|
569
584
|
# Guess from model filename
|
|
570
585
|
if isinstance(model, (str, Path)):
|
|
@@ -573,10 +588,12 @@ def guess_model_task(model):
|
|
|
573
588
|
return 'segment'
|
|
574
589
|
elif '-cls' in model.stem or 'classify' in model.parts:
|
|
575
590
|
return 'classify'
|
|
591
|
+
elif '-pose' in model.stem or 'pose' in model.parts:
|
|
592
|
+
return 'pose'
|
|
576
593
|
elif 'detect' in model.parts:
|
|
577
594
|
return 'detect'
|
|
578
595
|
|
|
579
596
|
# Unable to determine task from model
|
|
580
597
|
LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
|
|
581
|
-
"Explicitly define task for your model, i.e. 'task=detect', '
|
|
598
|
+
"Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.")
|
|
582
599
|
return 'detect' # assume detect
|
ultralytics/tracker/track.py
CHANGED
|
@@ -33,10 +33,9 @@ def on_predict_postprocess_end(predictor):
|
|
|
33
33
|
tracks = predictor.trackers[i].update(det, im0s[i])
|
|
34
34
|
if len(tracks) == 0:
|
|
35
35
|
continue
|
|
36
|
+
idx = tracks[:, -1].tolist()
|
|
37
|
+
predictor.results[i] = predictor.results[i][idx]
|
|
36
38
|
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
|
|
37
|
-
if predictor.results[i].masks is not None:
|
|
38
|
-
idx = tracks[:, -1].tolist()
|
|
39
|
-
predictor.results[i].masks = predictor.results[i].masks[idx]
|
|
40
39
|
|
|
41
40
|
|
|
42
41
|
def register_tracker(model):
|
ultralytics/yolo/cfg/__init__.py
CHANGED
|
@@ -18,13 +18,13 @@ TASKS = 'detect', 'segment', 'classify', 'pose'
|
|
|
18
18
|
TASK2DATA = {
|
|
19
19
|
'detect': 'coco128.yaml',
|
|
20
20
|
'segment': 'coco128-seg.yaml',
|
|
21
|
-
'
|
|
22
|
-
'
|
|
21
|
+
'classify': 'imagenet100',
|
|
22
|
+
'pose': 'coco128-pose.yaml'}
|
|
23
23
|
TASK2MODEL = {
|
|
24
24
|
'detect': 'yolov8n.pt',
|
|
25
25
|
'segment': 'yolov8n-seg.pt',
|
|
26
|
-
'
|
|
27
|
-
'
|
|
26
|
+
'classify': 'yolov8n-cls.pt',
|
|
27
|
+
'pose': 'yolov8n-pose.yaml'}
|
|
28
28
|
|
|
29
29
|
CLI_HELP_MSG = \
|
|
30
30
|
f"""
|
|
@@ -88,6 +88,8 @@ warmup_bias_lr: 0.1 # warmup initial bias lr
|
|
|
88
88
|
box: 7.5 # box loss gain
|
|
89
89
|
cls: 0.5 # cls loss gain (scale with pixels)
|
|
90
90
|
dfl: 1.5 # dfl loss gain
|
|
91
|
+
pose: 12.0 # pose loss gain
|
|
92
|
+
kobj: 1.0 # keypoint obj loss gain
|
|
91
93
|
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
|
|
92
94
|
label_smoothing: 0.0 # label smoothing (fraction)
|
|
93
95
|
nbs: 64 # nominal batch size
|
ultralytics/yolo/data/augment.py
CHANGED
|
@@ -16,6 +16,8 @@ from ..utils.metrics import bbox_ioa
|
|
|
16
16
|
from ..utils.ops import segment2box
|
|
17
17
|
from .utils import polygons2masks, polygons2masks_overlap
|
|
18
18
|
|
|
19
|
+
POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
|
20
|
+
|
|
19
21
|
|
|
20
22
|
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
|
21
23
|
class BaseTransform:
|
|
@@ -309,27 +311,22 @@ class RandomPerspective:
|
|
|
309
311
|
"""apply affine to keypoints.
|
|
310
312
|
|
|
311
313
|
Args:
|
|
312
|
-
keypoints(ndarray): keypoints, [N, 17,
|
|
314
|
+
keypoints(ndarray): keypoints, [N, 17, 3].
|
|
313
315
|
M(ndarray): affine matrix.
|
|
314
316
|
Return:
|
|
315
|
-
new_keypoints(ndarray): keypoints after affine, [N, 17,
|
|
317
|
+
new_keypoints(ndarray): keypoints after affine, [N, 17, 3].
|
|
316
318
|
"""
|
|
317
|
-
n =
|
|
319
|
+
n, nkpt = keypoints.shape[:2]
|
|
318
320
|
if n == 0:
|
|
319
321
|
return keypoints
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
|
|
329
|
-
y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
|
|
330
|
-
new_keypoints[:, list(range(0, 34, 2))] = x_kpts
|
|
331
|
-
new_keypoints[:, list(range(1, 34, 2))] = y_kpts
|
|
332
|
-
return new_keypoints.reshape(n, 17, 2)
|
|
322
|
+
xy = np.ones((n * nkpt, 3))
|
|
323
|
+
visible = keypoints[..., 2].reshape(n * nkpt, 1)
|
|
324
|
+
xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
|
|
325
|
+
xy = xy @ M.T # transform
|
|
326
|
+
xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine
|
|
327
|
+
out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
|
|
328
|
+
visible[out_mask] = 0
|
|
329
|
+
return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
|
|
333
330
|
|
|
334
331
|
def __call__(self, labels):
|
|
335
332
|
"""
|
|
@@ -415,12 +412,13 @@ class RandomHSV:
|
|
|
415
412
|
|
|
416
413
|
class RandomFlip:
|
|
417
414
|
|
|
418
|
-
def __init__(self, p=0.5, direction='horizontal') -> None:
|
|
415
|
+
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
|
|
419
416
|
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
|
|
420
417
|
assert 0 <= p <= 1.0
|
|
421
418
|
|
|
422
419
|
self.p = p
|
|
423
420
|
self.direction = direction
|
|
421
|
+
self.flip_idx = flip_idx
|
|
424
422
|
|
|
425
423
|
def __call__(self, labels):
|
|
426
424
|
img = labels['img']
|
|
@@ -437,6 +435,9 @@ class RandomFlip:
|
|
|
437
435
|
if self.direction == 'horizontal' and random.random() < self.p:
|
|
438
436
|
img = np.fliplr(img)
|
|
439
437
|
instances.fliplr(w)
|
|
438
|
+
# for keypoints
|
|
439
|
+
if self.flip_idx is not None and instances.keypoints is not None:
|
|
440
|
+
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
|
|
440
441
|
labels['img'] = np.ascontiguousarray(img)
|
|
441
442
|
labels['instances'] = instances
|
|
442
443
|
return labels
|
|
@@ -633,7 +634,7 @@ class Format:
|
|
|
633
634
|
labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
|
|
634
635
|
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
|
635
636
|
if self.return_keypoint:
|
|
636
|
-
labels['keypoints'] = torch.from_numpy(instances.keypoints)
|
|
637
|
+
labels['keypoints'] = torch.from_numpy(instances.keypoints)
|
|
637
638
|
# then we can use collate_fn
|
|
638
639
|
if self.batch_idx:
|
|
639
640
|
labels['batch_idx'] = torch.zeros(nl)
|
|
@@ -672,13 +673,17 @@ def v8_transforms(dataset, imgsz, hyp):
|
|
|
672
673
|
perspective=hyp.perspective,
|
|
673
674
|
pre_transform=LetterBox(new_shape=(imgsz, imgsz)),
|
|
674
675
|
)])
|
|
676
|
+
flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation
|
|
677
|
+
if dataset.use_keypoints and flip_idx is None and hyp.fliplr > 0.0:
|
|
678
|
+
hyp.fliplr = 0.0
|
|
679
|
+
LOGGER.warning("WARNING ⚠️ No `flip_idx` provided while training keypoints, setting augmentation 'fliplr=0.0'")
|
|
675
680
|
return Compose([
|
|
676
681
|
pre_transform,
|
|
677
682
|
MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
|
|
678
683
|
Albumentations(p=1.0),
|
|
679
684
|
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
|
|
680
685
|
RandomFlip(direction='vertical', p=hyp.flipud),
|
|
681
|
-
RandomFlip(direction='horizontal', p=hyp.fliplr)]) # transforms
|
|
686
|
+
RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms
|
|
682
687
|
|
|
683
688
|
|
|
684
689
|
# Classification augmentations -----------------------------------------------------------------------------------------
|
ultralytics/yolo/data/build.py
CHANGED
|
@@ -61,7 +61,7 @@ def seed_worker(worker_id): # noqa
|
|
|
61
61
|
random.seed(worker_seed)
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
def build_dataloader(cfg, batch, img_path, stride=32, rect=False,
|
|
64
|
+
def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
|
|
65
65
|
assert mode in ['train', 'val']
|
|
66
66
|
shuffle = mode == 'train'
|
|
67
67
|
if cfg.rect and shuffle:
|
|
@@ -81,9 +81,9 @@ def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, ra
|
|
|
81
81
|
pad=0.0 if mode == 'train' else 0.5,
|
|
82
82
|
prefix=colorstr(f'{mode}: '),
|
|
83
83
|
use_segments=cfg.task == 'segment',
|
|
84
|
-
use_keypoints=cfg.task == '
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
use_keypoints=cfg.task == 'pose',
|
|
85
|
+
classes=cfg.classes,
|
|
86
|
+
data=data_info)
|
|
87
87
|
|
|
88
88
|
batch = min(batch, len(dataset))
|
|
89
89
|
nd = torch.cuda.device_count() # number of CUDA devices
|
ultralytics/yolo/data/dataset.py
CHANGED
|
@@ -57,11 +57,11 @@ class YOLODataset(BaseDataset):
|
|
|
57
57
|
single_cls=False,
|
|
58
58
|
use_segments=False,
|
|
59
59
|
use_keypoints=False,
|
|
60
|
-
|
|
60
|
+
data=None,
|
|
61
61
|
classes=None):
|
|
62
62
|
self.use_segments = use_segments
|
|
63
63
|
self.use_keypoints = use_keypoints
|
|
64
|
-
self.
|
|
64
|
+
self.data = data
|
|
65
65
|
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
|
|
66
66
|
super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
|
|
67
67
|
classes)
|
|
@@ -77,10 +77,16 @@ class YOLODataset(BaseDataset):
|
|
|
77
77
|
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
|
78
78
|
desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
|
|
79
79
|
total = len(self.im_files)
|
|
80
|
+
nc = len(self.data['names'])
|
|
81
|
+
nkpt, ndim = self.data.get('kpt_shape', (0, 0))
|
|
82
|
+
if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
|
|
83
|
+
raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
|
|
84
|
+
"keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
|
|
80
85
|
with ThreadPool(NUM_THREADS) as pool:
|
|
81
86
|
results = pool.imap(func=verify_image_label,
|
|
82
87
|
iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
|
|
83
|
-
repeat(self.use_keypoints), repeat(len(self.names)))
|
|
88
|
+
repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt),
|
|
89
|
+
repeat(ndim)))
|
|
84
90
|
pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
|
|
85
91
|
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
|
86
92
|
nm += nm_f
|