ultralytics 8.1.2__py3-none-any.whl → 8.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- ultralytics/__init__.py +15 -3
- ultralytics/cfg/datasets/Argoverse.yaml +7 -7
- ultralytics/cfg/datasets/DOTAv1.5.yaml +4 -4
- ultralytics/cfg/datasets/DOTAv1.yaml +4 -4
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +1 -3
- ultralytics/cfg/datasets/ImageNet.yaml +4 -6
- ultralytics/cfg/datasets/Objects365.yaml +3 -5
- ultralytics/cfg/datasets/SKU-110K.yaml +4 -6
- ultralytics/cfg/datasets/VOC.yaml +0 -2
- ultralytics/cfg/datasets/VisDrone.yaml +4 -6
- ultralytics/cfg/datasets/coco-pose.yaml +5 -6
- ultralytics/cfg/datasets/coco.yaml +4 -6
- ultralytics/cfg/datasets/coco128-seg.yaml +4 -6
- ultralytics/cfg/datasets/coco128.yaml +4 -6
- ultralytics/cfg/datasets/coco8-pose.yaml +5 -6
- ultralytics/cfg/datasets/coco8-seg.yaml +4 -6
- ultralytics/cfg/datasets/coco8.yaml +4 -6
- ultralytics/cfg/datasets/dota8.yaml +3 -3
- ultralytics/cfg/datasets/open-images-v7.yaml +4 -6
- ultralytics/cfg/datasets/tiger-pose.yaml +4 -5
- ultralytics/cfg/datasets/xView.yaml +3 -5
- ultralytics/cfg/default.yaml +103 -103
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +27 -27
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +23 -23
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +23 -23
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +27 -27
- ultralytics/cfg/models/v3/yolov3-spp.yaml +18 -18
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +16 -16
- ultralytics/cfg/models/v3/yolov3.yaml +18 -18
- ultralytics/cfg/models/v5/yolov5-p6.yaml +24 -24
- ultralytics/cfg/models/v5/yolov5.yaml +18 -19
- ultralytics/cfg/models/v6/yolov6.yaml +17 -17
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +25 -0
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +25 -0
- ultralytics/cfg/models/v8/yolov8-cls.yaml +7 -7
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +26 -26
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +27 -27
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +23 -23
- ultralytics/cfg/models/v8/yolov8-obb.yaml +23 -23
- ultralytics/cfg/models/v8/yolov8-p2.yaml +23 -23
- ultralytics/cfg/models/v8/yolov8-p6.yaml +24 -24
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +25 -25
- ultralytics/cfg/models/v8/yolov8-pose.yaml +19 -19
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +23 -23
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +24 -24
- ultralytics/cfg/models/v8/yolov8-seg.yaml +18 -18
- ultralytics/cfg/models/v8/yolov8.yaml +23 -23
- ultralytics/cfg/trackers/botsort.yaml +7 -7
- ultralytics/cfg/trackers/bytetrack.yaml +6 -6
- ultralytics/data/build.py +1 -1
- ultralytics/engine/model.py +11 -7
- ultralytics/engine/trainer.py +1 -4
- ultralytics/hub/session.py +1 -1
- ultralytics/nn/modules/head.py +1 -1
- ultralytics/nn/modules/transformer.py +3 -3
- ultralytics/utils/callbacks/tensorboard.py +38 -15
- ultralytics/utils/ops.py +2 -2
- ultralytics/utils/plotting.py +1 -1
- {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/METADATA +2 -2
- {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/RECORD +64 -62
- {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/LICENSE +0 -0
- {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/WHEEL +0 -0
- {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/top_level.txt +0 -0
ultralytics/cfg/default.yaml
CHANGED
|
@@ -1,125 +1,125 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
2
|
# Default training settings and hyperparameters for medium-augmentation COCO training
|
|
3
3
|
|
|
4
|
-
task: detect
|
|
5
|
-
mode: train
|
|
4
|
+
task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
|
|
5
|
+
mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
|
|
6
6
|
|
|
7
7
|
# Train settings -------------------------------------------------------------------------------------------------------
|
|
8
|
-
model:
|
|
9
|
-
data:
|
|
10
|
-
epochs: 100
|
|
11
|
-
time:
|
|
12
|
-
patience: 50
|
|
13
|
-
batch: 16
|
|
14
|
-
imgsz: 640
|
|
15
|
-
save: True
|
|
8
|
+
model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
|
|
9
|
+
data: # (str, optional) path to data file, i.e. coco128.yaml
|
|
10
|
+
epochs: 100 # (int) number of epochs to train for
|
|
11
|
+
time: # (float, optional) number of hours to train for, overrides epochs if supplied
|
|
12
|
+
patience: 50 # (int) epochs to wait for no observable improvement for early stopping of training
|
|
13
|
+
batch: 16 # (int) number of images per batch (-1 for AutoBatch)
|
|
14
|
+
imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
|
|
15
|
+
save: True # (bool) save train checkpoints and predict results
|
|
16
16
|
save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
|
|
17
|
-
cache: False
|
|
18
|
-
device:
|
|
19
|
-
workers: 8
|
|
20
|
-
project:
|
|
21
|
-
name:
|
|
22
|
-
exist_ok: False
|
|
23
|
-
pretrained: True
|
|
24
|
-
optimizer: auto
|
|
25
|
-
verbose: True
|
|
26
|
-
seed: 0
|
|
27
|
-
deterministic: True
|
|
28
|
-
single_cls: False
|
|
29
|
-
rect: False
|
|
30
|
-
cos_lr: False
|
|
31
|
-
close_mosaic: 10
|
|
32
|
-
resume: False
|
|
33
|
-
amp: True
|
|
34
|
-
fraction: 1.0
|
|
35
|
-
profile: False
|
|
36
|
-
freeze: None
|
|
37
|
-
multi_scale: False
|
|
17
|
+
cache: False # (bool) True/ram, disk or False. Use cache for data loading
|
|
18
|
+
device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
|
|
19
|
+
workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
|
|
20
|
+
project: # (str, optional) project name
|
|
21
|
+
name: # (str, optional) experiment name, results saved to 'project/name' directory
|
|
22
|
+
exist_ok: False # (bool) whether to overwrite existing experiment
|
|
23
|
+
pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
|
|
24
|
+
optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
|
|
25
|
+
verbose: True # (bool) whether to print verbose output
|
|
26
|
+
seed: 0 # (int) random seed for reproducibility
|
|
27
|
+
deterministic: True # (bool) whether to enable deterministic mode
|
|
28
|
+
single_cls: False # (bool) train multi-class data as single-class
|
|
29
|
+
rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
|
|
30
|
+
cos_lr: False # (bool) use cosine learning rate scheduler
|
|
31
|
+
close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
|
|
32
|
+
resume: False # (bool) resume training from last checkpoint
|
|
33
|
+
amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
|
|
34
|
+
fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
|
|
35
|
+
profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
|
|
36
|
+
freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
|
|
37
|
+
multi_scale: False # (bool) Whether to use multi-scale during training
|
|
38
38
|
# Segmentation
|
|
39
|
-
overlap_mask: True
|
|
40
|
-
mask_ratio: 4
|
|
39
|
+
overlap_mask: True # (bool) masks should overlap during training (segment train only)
|
|
40
|
+
mask_ratio: 4 # (int) mask downsample ratio (segment train only)
|
|
41
41
|
# Classification
|
|
42
|
-
dropout: 0.0
|
|
42
|
+
dropout: 0.0 # (float) use dropout regularization (classify train only)
|
|
43
43
|
|
|
44
44
|
# Val/Test settings ----------------------------------------------------------------------------------------------------
|
|
45
|
-
val: True
|
|
46
|
-
split: val
|
|
47
|
-
save_json: False
|
|
48
|
-
save_hybrid: False
|
|
49
|
-
conf:
|
|
50
|
-
iou: 0.7
|
|
51
|
-
max_det: 300
|
|
52
|
-
half: False
|
|
53
|
-
dnn: False
|
|
54
|
-
plots: True
|
|
45
|
+
val: True # (bool) validate/test during training
|
|
46
|
+
split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
|
|
47
|
+
save_json: False # (bool) save results to JSON file
|
|
48
|
+
save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
|
|
49
|
+
conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
|
|
50
|
+
iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
|
|
51
|
+
max_det: 300 # (int) maximum number of detections per image
|
|
52
|
+
half: False # (bool) use half precision (FP16)
|
|
53
|
+
dnn: False # (bool) use OpenCV DNN for ONNX inference
|
|
54
|
+
plots: True # (bool) save plots and images during train/val
|
|
55
55
|
|
|
56
56
|
# Predict settings -----------------------------------------------------------------------------------------------------
|
|
57
|
-
source:
|
|
58
|
-
vid_stride: 1
|
|
59
|
-
stream_buffer: False
|
|
60
|
-
visualize: False
|
|
61
|
-
augment: False
|
|
62
|
-
agnostic_nms: False
|
|
63
|
-
classes:
|
|
64
|
-
retina_masks: False
|
|
65
|
-
embed:
|
|
57
|
+
source: # (str, optional) source directory for images or videos
|
|
58
|
+
vid_stride: 1 # (int) video frame-rate stride
|
|
59
|
+
stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
|
|
60
|
+
visualize: False # (bool) visualize model features
|
|
61
|
+
augment: False # (bool) apply image augmentation to prediction sources
|
|
62
|
+
agnostic_nms: False # (bool) class-agnostic NMS
|
|
63
|
+
classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
|
|
64
|
+
retina_masks: False # (bool) use high-resolution segmentation masks
|
|
65
|
+
embed: # (list[int], optional) return feature vectors/embeddings from given layers
|
|
66
66
|
|
|
67
67
|
# Visualize settings ---------------------------------------------------------------------------------------------------
|
|
68
|
-
show: False
|
|
69
|
-
save_frames: False
|
|
70
|
-
save_txt: False
|
|
71
|
-
save_conf: False
|
|
72
|
-
save_crop: False
|
|
73
|
-
show_labels: True
|
|
74
|
-
show_conf: True
|
|
75
|
-
show_boxes: True
|
|
76
|
-
line_width:
|
|
68
|
+
show: False # (bool) show predicted images and videos if environment allows
|
|
69
|
+
save_frames: False # (bool) save predicted individual video frames
|
|
70
|
+
save_txt: False # (bool) save results as .txt file
|
|
71
|
+
save_conf: False # (bool) save results with confidence scores
|
|
72
|
+
save_crop: False # (bool) save cropped images with results
|
|
73
|
+
show_labels: True # (bool) show prediction labels, i.e. 'person'
|
|
74
|
+
show_conf: True # (bool) show prediction confidence, i.e. '0.99'
|
|
75
|
+
show_boxes: True # (bool) show prediction boxes
|
|
76
|
+
line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
|
|
77
77
|
|
|
78
78
|
# Export settings ------------------------------------------------------------------------------------------------------
|
|
79
|
-
format: torchscript
|
|
80
|
-
keras: False
|
|
81
|
-
optimize: False
|
|
82
|
-
int8: False
|
|
83
|
-
dynamic: False
|
|
84
|
-
simplify: False
|
|
85
|
-
opset:
|
|
86
|
-
workspace: 4
|
|
87
|
-
nms: False
|
|
79
|
+
format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
|
|
80
|
+
keras: False # (bool) use Kera=s
|
|
81
|
+
optimize: False # (bool) TorchScript: optimize for mobile
|
|
82
|
+
int8: False # (bool) CoreML/TF INT8 quantization
|
|
83
|
+
dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
|
|
84
|
+
simplify: False # (bool) ONNX: simplify model
|
|
85
|
+
opset: # (int, optional) ONNX: opset version
|
|
86
|
+
workspace: 4 # (int) TensorRT: workspace size (GB)
|
|
87
|
+
nms: False # (bool) CoreML: add NMS
|
|
88
88
|
|
|
89
89
|
# Hyperparameters ------------------------------------------------------------------------------------------------------
|
|
90
|
-
lr0: 0.01
|
|
91
|
-
lrf: 0.01
|
|
92
|
-
momentum: 0.937
|
|
93
|
-
weight_decay: 0.0005
|
|
94
|
-
warmup_epochs: 3.0
|
|
95
|
-
warmup_momentum: 0.8
|
|
96
|
-
warmup_bias_lr: 0.1
|
|
97
|
-
box: 7.5
|
|
98
|
-
cls: 0.5
|
|
99
|
-
dfl: 1.5
|
|
100
|
-
pose: 12.0
|
|
101
|
-
kobj: 1.0
|
|
102
|
-
label_smoothing: 0.0
|
|
103
|
-
nbs: 64
|
|
104
|
-
hsv_h: 0.015
|
|
105
|
-
hsv_s: 0.7
|
|
106
|
-
hsv_v: 0.4
|
|
107
|
-
degrees: 0.0
|
|
108
|
-
translate: 0.1
|
|
109
|
-
scale: 0.5
|
|
110
|
-
shear: 0.0
|
|
111
|
-
perspective: 0.0
|
|
112
|
-
flipud: 0.0
|
|
113
|
-
fliplr: 0.5
|
|
114
|
-
mosaic: 1.0
|
|
115
|
-
mixup: 0.0
|
|
116
|
-
copy_paste: 0.0
|
|
117
|
-
auto_augment: randaugment
|
|
118
|
-
erasing: 0.4
|
|
119
|
-
crop_fraction: 1.0
|
|
90
|
+
lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
|
|
91
|
+
lrf: 0.01 # (float) final learning rate (lr0 * lrf)
|
|
92
|
+
momentum: 0.937 # (float) SGD momentum/Adam beta1
|
|
93
|
+
weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
|
|
94
|
+
warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
|
|
95
|
+
warmup_momentum: 0.8 # (float) warmup initial momentum
|
|
96
|
+
warmup_bias_lr: 0.1 # (float) warmup initial bias lr
|
|
97
|
+
box: 7.5 # (float) box loss gain
|
|
98
|
+
cls: 0.5 # (float) cls loss gain (scale with pixels)
|
|
99
|
+
dfl: 1.5 # (float) dfl loss gain
|
|
100
|
+
pose: 12.0 # (float) pose loss gain
|
|
101
|
+
kobj: 1.0 # (float) keypoint obj loss gain
|
|
102
|
+
label_smoothing: 0.0 # (float) label smoothing (fraction)
|
|
103
|
+
nbs: 64 # (int) nominal batch size
|
|
104
|
+
hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
|
|
105
|
+
hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
|
|
106
|
+
hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
|
|
107
|
+
degrees: 0.0 # (float) image rotation (+/- deg)
|
|
108
|
+
translate: 0.1 # (float) image translation (+/- fraction)
|
|
109
|
+
scale: 0.5 # (float) image scale (+/- gain)
|
|
110
|
+
shear: 0.0 # (float) image shear (+/- deg)
|
|
111
|
+
perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
|
|
112
|
+
flipud: 0.0 # (float) image flip up-down (probability)
|
|
113
|
+
fliplr: 0.5 # (float) image flip left-right (probability)
|
|
114
|
+
mosaic: 1.0 # (float) image mosaic (probability)
|
|
115
|
+
mixup: 0.0 # (float) image mixup (probability)
|
|
116
|
+
copy_paste: 0.0 # (float) segment copy-paste (probability)
|
|
117
|
+
auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
|
|
118
|
+
erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
|
|
119
|
+
crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
|
|
120
120
|
|
|
121
121
|
# Custom config.yaml ---------------------------------------------------------------------------------------------------
|
|
122
|
-
cfg:
|
|
122
|
+
cfg: # (str, optional) for overriding defaults.yaml
|
|
123
123
|
|
|
124
124
|
# Tracker settings ------------------------------------------------------------------------------------------------------
|
|
125
|
-
tracker: botsort.yaml
|
|
125
|
+
tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
|
|
@@ -2,49 +2,49 @@
|
|
|
2
2
|
# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
|
|
3
3
|
|
|
4
4
|
# Parameters
|
|
5
|
-
nc: 80
|
|
5
|
+
nc: 80 # number of classes
|
|
6
6
|
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
|
7
7
|
# [depth, width, max_channels]
|
|
8
8
|
l: [1.00, 1.00, 1024]
|
|
9
9
|
|
|
10
10
|
backbone:
|
|
11
11
|
# [from, repeats, module, args]
|
|
12
|
-
- [-1, 1, HGStem, [32, 48]]
|
|
13
|
-
- [-1, 6, HGBlock, [48, 128, 3]]
|
|
12
|
+
- [-1, 1, HGStem, [32, 48]] # 0-P2/4
|
|
13
|
+
- [-1, 6, HGBlock, [48, 128, 3]] # stage 1
|
|
14
14
|
|
|
15
|
-
- [-1, 1, DWConv, [128, 3, 2, 1, False]]
|
|
16
|
-
- [-1, 6, HGBlock, [96, 512, 3]]
|
|
15
|
+
- [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
|
|
16
|
+
- [-1, 6, HGBlock, [96, 512, 3]] # stage 2
|
|
17
17
|
|
|
18
|
-
- [-1, 1, DWConv, [512, 3, 2, 1, False]]
|
|
19
|
-
- [-1, 6, HGBlock, [192, 1024, 5, True, False]]
|
|
18
|
+
- [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
|
|
19
|
+
- [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
|
|
20
20
|
- [-1, 6, HGBlock, [192, 1024, 5, True, True]]
|
|
21
|
-
- [-1, 6, HGBlock, [192, 1024, 5, True, True]]
|
|
21
|
+
- [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
|
|
22
22
|
|
|
23
|
-
- [-1, 1, DWConv, [1024, 3, 2, 1, False]]
|
|
24
|
-
- [-1, 6, HGBlock, [384, 2048, 5, True, False]]
|
|
23
|
+
- [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
|
|
24
|
+
- [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
|
|
25
25
|
|
|
26
26
|
head:
|
|
27
|
-
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
27
|
+
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
|
|
28
28
|
- [-1, 1, AIFI, [1024, 8]]
|
|
29
|
-
- [-1, 1, Conv, [256, 1, 1]]
|
|
29
|
+
- [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
|
|
30
30
|
|
|
31
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
32
|
-
- [7, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
31
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
32
|
+
- [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
|
|
33
33
|
- [[-2, -1], 1, Concat, [1]]
|
|
34
|
-
- [-1, 3, RepC3, [256]]
|
|
35
|
-
- [-1, 1, Conv, [256, 1, 1]]
|
|
34
|
+
- [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
|
|
35
|
+
- [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
|
|
36
36
|
|
|
37
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
38
|
-
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
39
|
-
- [[-2, -1], 1, Concat, [1]]
|
|
40
|
-
- [-1, 3, RepC3, [256]]
|
|
37
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
38
|
+
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
|
|
39
|
+
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
|
40
|
+
- [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
|
|
41
41
|
|
|
42
|
-
- [-1, 1, Conv, [256, 3, 2]]
|
|
43
|
-
- [[-1, 17], 1, Concat, [1]]
|
|
44
|
-
- [-1, 3, RepC3, [256]]
|
|
42
|
+
- [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
|
|
43
|
+
- [[-1, 17], 1, Concat, [1]] # cat Y4
|
|
44
|
+
- [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
|
|
45
45
|
|
|
46
|
-
- [-1, 1, Conv, [256, 3, 2]]
|
|
47
|
-
- [[-1, 12], 1, Concat, [1]]
|
|
48
|
-
- [-1, 3, RepC3, [256]]
|
|
46
|
+
- [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
|
|
47
|
+
- [[-1, 12], 1, Concat, [1]] # cat Y5
|
|
48
|
+
- [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
|
|
49
49
|
|
|
50
|
-
- [[21, 24, 27], 1, RTDETRDecoder, [nc]]
|
|
50
|
+
- [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
|
@@ -2,41 +2,41 @@
|
|
|
2
2
|
# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
|
|
3
3
|
|
|
4
4
|
# Parameters
|
|
5
|
-
nc: 80
|
|
5
|
+
nc: 80 # number of classes
|
|
6
6
|
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
|
7
7
|
# [depth, width, max_channels]
|
|
8
8
|
l: [1.00, 1.00, 1024]
|
|
9
9
|
|
|
10
10
|
backbone:
|
|
11
11
|
# [from, repeats, module, args]
|
|
12
|
-
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]]
|
|
13
|
-
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]]
|
|
14
|
-
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]]
|
|
15
|
-
- [-1, 1, ResNetLayer, [512, 256, 2, False, 23]]
|
|
16
|
-
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]]
|
|
12
|
+
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
|
|
13
|
+
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
|
|
14
|
+
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
|
|
15
|
+
- [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
|
|
16
|
+
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
|
|
17
17
|
|
|
18
18
|
head:
|
|
19
|
-
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
19
|
+
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
|
|
20
20
|
- [-1, 1, AIFI, [1024, 8]]
|
|
21
|
-
- [-1, 1, Conv, [256, 1, 1]]
|
|
21
|
+
- [-1, 1, Conv, [256, 1, 1]] # 7
|
|
22
22
|
|
|
23
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
24
|
-
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
23
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
24
|
+
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
|
|
25
25
|
- [[-2, -1], 1, Concat, [1]]
|
|
26
|
-
- [-1, 3, RepC3, [256]]
|
|
27
|
-
- [-1, 1, Conv, [256, 1, 1]]
|
|
26
|
+
- [-1, 3, RepC3, [256]] # 11
|
|
27
|
+
- [-1, 1, Conv, [256, 1, 1]] # 12
|
|
28
28
|
|
|
29
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
30
|
-
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
31
|
-
- [[-2, -1], 1, Concat, [1]]
|
|
32
|
-
- [-1, 3, RepC3, [256]]
|
|
29
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
30
|
+
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
|
|
31
|
+
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
|
32
|
+
- [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
|
|
33
33
|
|
|
34
|
-
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
-
- [[-1, 12], 1, Concat, [1]]
|
|
36
|
-
- [-1, 3, RepC3, [256]]
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
|
|
35
|
+
- [[-1, 12], 1, Concat, [1]] # cat Y4
|
|
36
|
+
- [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
|
|
37
37
|
|
|
38
|
-
- [-1, 1, Conv, [256, 3, 2]]
|
|
39
|
-
- [[-1, 7], 1, Concat, [1]]
|
|
40
|
-
- [-1, 3, RepC3, [256]]
|
|
38
|
+
- [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
|
|
39
|
+
- [[-1, 7], 1, Concat, [1]] # cat Y5
|
|
40
|
+
- [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
|
|
41
41
|
|
|
42
|
-
- [[16, 19, 22], 1, RTDETRDecoder, [nc]]
|
|
42
|
+
- [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
|
@@ -2,41 +2,41 @@
|
|
|
2
2
|
# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
|
|
3
3
|
|
|
4
4
|
# Parameters
|
|
5
|
-
nc: 80
|
|
5
|
+
nc: 80 # number of classes
|
|
6
6
|
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
|
7
7
|
# [depth, width, max_channels]
|
|
8
8
|
l: [1.00, 1.00, 1024]
|
|
9
9
|
|
|
10
10
|
backbone:
|
|
11
11
|
# [from, repeats, module, args]
|
|
12
|
-
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]]
|
|
13
|
-
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]]
|
|
14
|
-
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]]
|
|
15
|
-
- [-1, 1, ResNetLayer, [512, 256, 2, False, 6]]
|
|
16
|
-
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]]
|
|
12
|
+
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
|
|
13
|
+
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
|
|
14
|
+
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
|
|
15
|
+
- [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
|
|
16
|
+
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
|
|
17
17
|
|
|
18
18
|
head:
|
|
19
|
-
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
19
|
+
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
|
|
20
20
|
- [-1, 1, AIFI, [1024, 8]]
|
|
21
|
-
- [-1, 1, Conv, [256, 1, 1]]
|
|
21
|
+
- [-1, 1, Conv, [256, 1, 1]] # 7
|
|
22
22
|
|
|
23
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
24
|
-
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
23
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
24
|
+
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
|
|
25
25
|
- [[-2, -1], 1, Concat, [1]]
|
|
26
|
-
- [-1, 3, RepC3, [256]]
|
|
27
|
-
- [-1, 1, Conv, [256, 1, 1]]
|
|
26
|
+
- [-1, 3, RepC3, [256]] # 11
|
|
27
|
+
- [-1, 1, Conv, [256, 1, 1]] # 12
|
|
28
28
|
|
|
29
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
30
|
-
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]]
|
|
31
|
-
- [[-2, -1], 1, Concat, [1]]
|
|
32
|
-
- [-1, 3, RepC3, [256]]
|
|
29
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
30
|
+
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
|
|
31
|
+
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
|
32
|
+
- [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
|
|
33
33
|
|
|
34
|
-
- [-1, 1, Conv, [256, 3, 2]]
|
|
35
|
-
- [[-1, 12], 1, Concat, [1]]
|
|
36
|
-
- [-1, 3, RepC3, [256]]
|
|
34
|
+
- [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
|
|
35
|
+
- [[-1, 12], 1, Concat, [1]] # cat Y4
|
|
36
|
+
- [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
|
|
37
37
|
|
|
38
|
-
- [-1, 1, Conv, [256, 3, 2]]
|
|
39
|
-
- [[-1, 7], 1, Concat, [1]]
|
|
40
|
-
- [-1, 3, RepC3, [256]]
|
|
38
|
+
- [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
|
|
39
|
+
- [[-1, 7], 1, Concat, [1]] # cat Y5
|
|
40
|
+
- [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
|
|
41
41
|
|
|
42
|
-
- [[16, 19, 22], 1, RTDETRDecoder, [nc]]
|
|
42
|
+
- [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
|
@@ -2,53 +2,53 @@
|
|
|
2
2
|
# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
|
|
3
3
|
|
|
4
4
|
# Parameters
|
|
5
|
-
nc: 80
|
|
5
|
+
nc: 80 # number of classes
|
|
6
6
|
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
|
7
7
|
# [depth, width, max_channels]
|
|
8
8
|
x: [1.00, 1.00, 2048]
|
|
9
9
|
|
|
10
10
|
backbone:
|
|
11
11
|
# [from, repeats, module, args]
|
|
12
|
-
- [-1, 1, HGStem, [32, 64]]
|
|
13
|
-
- [-1, 6, HGBlock, [64, 128, 3]]
|
|
12
|
+
- [-1, 1, HGStem, [32, 64]] # 0-P2/4
|
|
13
|
+
- [-1, 6, HGBlock, [64, 128, 3]] # stage 1
|
|
14
14
|
|
|
15
|
-
- [-1, 1, DWConv, [128, 3, 2, 1, False]]
|
|
15
|
+
- [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
|
|
16
16
|
- [-1, 6, HGBlock, [128, 512, 3]]
|
|
17
|
-
- [-1, 6, HGBlock, [128, 512, 3, False, True]]
|
|
17
|
+
- [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
|
|
18
18
|
|
|
19
|
-
- [-1, 1, DWConv, [512, 3, 2, 1, False]]
|
|
20
|
-
- [-1, 6, HGBlock, [256, 1024, 5, True, False]]
|
|
19
|
+
- [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
|
|
20
|
+
- [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
|
|
21
21
|
- [-1, 6, HGBlock, [256, 1024, 5, True, True]]
|
|
22
22
|
- [-1, 6, HGBlock, [256, 1024, 5, True, True]]
|
|
23
23
|
- [-1, 6, HGBlock, [256, 1024, 5, True, True]]
|
|
24
|
-
- [-1, 6, HGBlock, [256, 1024, 5, True, True]]
|
|
24
|
+
- [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
|
|
25
25
|
|
|
26
|
-
- [-1, 1, DWConv, [1024, 3, 2, 1, False]]
|
|
26
|
+
- [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
|
|
27
27
|
- [-1, 6, HGBlock, [512, 2048, 5, True, False]]
|
|
28
|
-
- [-1, 6, HGBlock, [512, 2048, 5, True, True]]
|
|
28
|
+
- [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
|
|
29
29
|
|
|
30
30
|
head:
|
|
31
|
-
- [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]]
|
|
31
|
+
- [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
|
|
32
32
|
- [-1, 1, AIFI, [2048, 8]]
|
|
33
|
-
- [-1, 1, Conv, [384, 1, 1]]
|
|
33
|
+
- [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
|
|
34
34
|
|
|
35
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
36
|
-
- [10, 1, Conv, [384, 1, 1, None, 1, 1, False]]
|
|
35
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
36
|
+
- [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
|
|
37
37
|
- [[-2, -1], 1, Concat, [1]]
|
|
38
|
-
- [-1, 3, RepC3, [384]]
|
|
39
|
-
- [-1, 1, Conv, [384, 1, 1]]
|
|
38
|
+
- [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
|
|
39
|
+
- [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
|
|
40
40
|
|
|
41
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
42
|
-
- [4, 1, Conv, [384, 1, 1, None, 1, 1, False]]
|
|
43
|
-
- [[-2, -1], 1, Concat, [1]]
|
|
44
|
-
- [-1, 3, RepC3, [384]]
|
|
41
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
42
|
+
- [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
|
|
43
|
+
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
|
44
|
+
- [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
|
|
45
45
|
|
|
46
|
-
- [-1, 1, Conv, [384, 3, 2]]
|
|
47
|
-
- [[-1, 21], 1, Concat, [1]]
|
|
48
|
-
- [-1, 3, RepC3, [384]]
|
|
46
|
+
- [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
|
|
47
|
+
- [[-1, 21], 1, Concat, [1]] # cat Y4
|
|
48
|
+
- [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
|
|
49
49
|
|
|
50
|
-
- [-1, 1, Conv, [384, 3, 2]]
|
|
51
|
-
- [[-1, 16], 1, Concat, [1]]
|
|
52
|
-
- [-1, 3, RepC3, [384]]
|
|
50
|
+
- [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
|
|
51
|
+
- [[-1, 16], 1, Concat, [1]] # cat Y5
|
|
52
|
+
- [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
|
|
53
53
|
|
|
54
|
-
- [[25, 28, 31], 1, RTDETRDecoder, [nc]]
|
|
54
|
+
- [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
|
@@ -2,24 +2,24 @@
|
|
|
2
2
|
# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
|
|
3
3
|
|
|
4
4
|
# Parameters
|
|
5
|
-
nc: 80
|
|
6
|
-
depth_multiple: 1.0
|
|
7
|
-
width_multiple: 1.0
|
|
5
|
+
nc: 80 # number of classes
|
|
6
|
+
depth_multiple: 1.0 # model depth multiple
|
|
7
|
+
width_multiple: 1.0 # layer channel multiple
|
|
8
8
|
|
|
9
9
|
# darknet53 backbone
|
|
10
10
|
backbone:
|
|
11
11
|
# [from, number, module, args]
|
|
12
|
-
- [-1, 1, Conv, [32, 3, 1]]
|
|
13
|
-
- [-1, 1, Conv, [64, 3, 2]]
|
|
12
|
+
- [-1, 1, Conv, [32, 3, 1]] # 0
|
|
13
|
+
- [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
|
|
14
14
|
- [-1, 1, Bottleneck, [64]]
|
|
15
|
-
- [-1, 1, Conv, [128, 3, 2]]
|
|
15
|
+
- [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
|
|
16
16
|
- [-1, 2, Bottleneck, [128]]
|
|
17
|
-
- [-1, 1, Conv, [256, 3, 2]]
|
|
17
|
+
- [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
|
|
18
18
|
- [-1, 8, Bottleneck, [256]]
|
|
19
|
-
- [-1, 1, Conv, [512, 3, 2]]
|
|
19
|
+
- [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
|
|
20
20
|
- [-1, 8, Bottleneck, [512]]
|
|
21
|
-
- [-1, 1, Conv, [1024, 3, 2]]
|
|
22
|
-
- [-1, 4, Bottleneck, [1024]]
|
|
21
|
+
- [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
|
|
22
|
+
- [-1, 4, Bottleneck, [1024]] # 10
|
|
23
23
|
|
|
24
24
|
# YOLOv3-SPP head
|
|
25
25
|
head:
|
|
@@ -27,20 +27,20 @@ head:
|
|
|
27
27
|
- [-1, 1, SPP, [512, [5, 9, 13]]]
|
|
28
28
|
- [-1, 1, Conv, [1024, 3, 1]]
|
|
29
29
|
- [-1, 1, Conv, [512, 1, 1]]
|
|
30
|
-
- [-1, 1, Conv, [1024, 3, 1]]
|
|
30
|
+
- [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
|
|
31
31
|
|
|
32
32
|
- [-2, 1, Conv, [256, 1, 1]]
|
|
33
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
34
|
-
- [[-1, 8], 1, Concat, [1]]
|
|
33
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
34
|
+
- [[-1, 8], 1, Concat, [1]] # cat backbone P4
|
|
35
35
|
- [-1, 1, Bottleneck, [512, False]]
|
|
36
36
|
- [-1, 1, Bottleneck, [512, False]]
|
|
37
37
|
- [-1, 1, Conv, [256, 1, 1]]
|
|
38
|
-
- [-1, 1, Conv, [512, 3, 1]]
|
|
38
|
+
- [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
|
|
39
39
|
|
|
40
40
|
- [-2, 1, Conv, [128, 1, 1]]
|
|
41
|
-
- [-1, 1, nn.Upsample, [None, 2,
|
|
42
|
-
- [[-1, 6], 1, Concat, [1]]
|
|
41
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
42
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P3
|
|
43
43
|
- [-1, 1, Bottleneck, [256, False]]
|
|
44
|
-
- [-1, 2, Bottleneck, [256, False]]
|
|
44
|
+
- [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
|
|
45
45
|
|
|
46
|
-
- [[27, 22, 15], 1, Detect, [nc]]
|
|
46
|
+
- [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
|