ultralytics 8.1.2__py3-none-any.whl → 8.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (64) hide show
  1. ultralytics/__init__.py +15 -3
  2. ultralytics/cfg/datasets/Argoverse.yaml +7 -7
  3. ultralytics/cfg/datasets/DOTAv1.5.yaml +4 -4
  4. ultralytics/cfg/datasets/DOTAv1.yaml +4 -4
  5. ultralytics/cfg/datasets/GlobalWheat2020.yaml +1 -3
  6. ultralytics/cfg/datasets/ImageNet.yaml +4 -6
  7. ultralytics/cfg/datasets/Objects365.yaml +3 -5
  8. ultralytics/cfg/datasets/SKU-110K.yaml +4 -6
  9. ultralytics/cfg/datasets/VOC.yaml +0 -2
  10. ultralytics/cfg/datasets/VisDrone.yaml +4 -6
  11. ultralytics/cfg/datasets/coco-pose.yaml +5 -6
  12. ultralytics/cfg/datasets/coco.yaml +4 -6
  13. ultralytics/cfg/datasets/coco128-seg.yaml +4 -6
  14. ultralytics/cfg/datasets/coco128.yaml +4 -6
  15. ultralytics/cfg/datasets/coco8-pose.yaml +5 -6
  16. ultralytics/cfg/datasets/coco8-seg.yaml +4 -6
  17. ultralytics/cfg/datasets/coco8.yaml +4 -6
  18. ultralytics/cfg/datasets/dota8.yaml +3 -3
  19. ultralytics/cfg/datasets/open-images-v7.yaml +4 -6
  20. ultralytics/cfg/datasets/tiger-pose.yaml +4 -5
  21. ultralytics/cfg/datasets/xView.yaml +3 -5
  22. ultralytics/cfg/default.yaml +103 -103
  23. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +27 -27
  24. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +23 -23
  25. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +23 -23
  26. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +27 -27
  27. ultralytics/cfg/models/v3/yolov3-spp.yaml +18 -18
  28. ultralytics/cfg/models/v3/yolov3-tiny.yaml +16 -16
  29. ultralytics/cfg/models/v3/yolov3.yaml +18 -18
  30. ultralytics/cfg/models/v5/yolov5-p6.yaml +24 -24
  31. ultralytics/cfg/models/v5/yolov5.yaml +18 -19
  32. ultralytics/cfg/models/v6/yolov6.yaml +17 -17
  33. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +25 -0
  34. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +25 -0
  35. ultralytics/cfg/models/v8/yolov8-cls.yaml +7 -7
  36. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +26 -26
  37. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +27 -27
  38. ultralytics/cfg/models/v8/yolov8-ghost.yaml +23 -23
  39. ultralytics/cfg/models/v8/yolov8-obb.yaml +23 -23
  40. ultralytics/cfg/models/v8/yolov8-p2.yaml +23 -23
  41. ultralytics/cfg/models/v8/yolov8-p6.yaml +24 -24
  42. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +25 -25
  43. ultralytics/cfg/models/v8/yolov8-pose.yaml +19 -19
  44. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +23 -23
  45. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +24 -24
  46. ultralytics/cfg/models/v8/yolov8-seg.yaml +18 -18
  47. ultralytics/cfg/models/v8/yolov8.yaml +23 -23
  48. ultralytics/cfg/trackers/botsort.yaml +7 -7
  49. ultralytics/cfg/trackers/bytetrack.yaml +6 -6
  50. ultralytics/data/build.py +1 -1
  51. ultralytics/engine/model.py +11 -7
  52. ultralytics/engine/trainer.py +1 -4
  53. ultralytics/hub/session.py +1 -1
  54. ultralytics/nn/modules/head.py +1 -1
  55. ultralytics/nn/modules/transformer.py +3 -3
  56. ultralytics/utils/callbacks/tensorboard.py +38 -15
  57. ultralytics/utils/ops.py +2 -2
  58. ultralytics/utils/plotting.py +1 -1
  59. {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/METADATA +2 -2
  60. {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/RECORD +64 -62
  61. {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/LICENSE +0 -0
  62. {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/WHEEL +0 -0
  63. {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/entry_points.txt +0 -0
  64. {ultralytics-8.1.2.dist-info → ultralytics-8.1.4.dist-info}/top_level.txt +0 -0
@@ -1,125 +1,125 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
2
  # Default training settings and hyperparameters for medium-augmentation COCO training
3
3
 
4
- task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
5
- mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
4
+ task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
5
+ mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
6
6
 
7
7
  # Train settings -------------------------------------------------------------------------------------------------------
8
- model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
9
- data: # (str, optional) path to data file, i.e. coco128.yaml
10
- epochs: 100 # (int) number of epochs to train for
11
- time: # (float, optional) number of hours to train for, overrides epochs if supplied
12
- patience: 50 # (int) epochs to wait for no observable improvement for early stopping of training
13
- batch: 16 # (int) number of images per batch (-1 for AutoBatch)
14
- imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
15
- save: True # (bool) save train checkpoints and predict results
8
+ model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
9
+ data: # (str, optional) path to data file, i.e. coco128.yaml
10
+ epochs: 100 # (int) number of epochs to train for
11
+ time: # (float, optional) number of hours to train for, overrides epochs if supplied
12
+ patience: 50 # (int) epochs to wait for no observable improvement for early stopping of training
13
+ batch: 16 # (int) number of images per batch (-1 for AutoBatch)
14
+ imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
15
+ save: True # (bool) save train checkpoints and predict results
16
16
  save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
17
- cache: False # (bool) True/ram, disk or False. Use cache for data loading
18
- device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
19
- workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
20
- project: # (str, optional) project name
21
- name: # (str, optional) experiment name, results saved to 'project/name' directory
22
- exist_ok: False # (bool) whether to overwrite existing experiment
23
- pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
24
- optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
25
- verbose: True # (bool) whether to print verbose output
26
- seed: 0 # (int) random seed for reproducibility
27
- deterministic: True # (bool) whether to enable deterministic mode
28
- single_cls: False # (bool) train multi-class data as single-class
29
- rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
30
- cos_lr: False # (bool) use cosine learning rate scheduler
31
- close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
32
- resume: False # (bool) resume training from last checkpoint
33
- amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
34
- fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
35
- profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
36
- freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
37
- multi_scale: False # (bool) Whether to use multi-scale during training
17
+ cache: False # (bool) True/ram, disk or False. Use cache for data loading
18
+ device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
19
+ workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
20
+ project: # (str, optional) project name
21
+ name: # (str, optional) experiment name, results saved to 'project/name' directory
22
+ exist_ok: False # (bool) whether to overwrite existing experiment
23
+ pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
24
+ optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
25
+ verbose: True # (bool) whether to print verbose output
26
+ seed: 0 # (int) random seed for reproducibility
27
+ deterministic: True # (bool) whether to enable deterministic mode
28
+ single_cls: False # (bool) train multi-class data as single-class
29
+ rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
30
+ cos_lr: False # (bool) use cosine learning rate scheduler
31
+ close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
32
+ resume: False # (bool) resume training from last checkpoint
33
+ amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
34
+ fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
35
+ profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
36
+ freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
37
+ multi_scale: False # (bool) Whether to use multi-scale during training
38
38
  # Segmentation
39
- overlap_mask: True # (bool) masks should overlap during training (segment train only)
40
- mask_ratio: 4 # (int) mask downsample ratio (segment train only)
39
+ overlap_mask: True # (bool) masks should overlap during training (segment train only)
40
+ mask_ratio: 4 # (int) mask downsample ratio (segment train only)
41
41
  # Classification
42
- dropout: 0.0 # (float) use dropout regularization (classify train only)
42
+ dropout: 0.0 # (float) use dropout regularization (classify train only)
43
43
 
44
44
  # Val/Test settings ----------------------------------------------------------------------------------------------------
45
- val: True # (bool) validate/test during training
46
- split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
47
- save_json: False # (bool) save results to JSON file
48
- save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
49
- conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
50
- iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
51
- max_det: 300 # (int) maximum number of detections per image
52
- half: False # (bool) use half precision (FP16)
53
- dnn: False # (bool) use OpenCV DNN for ONNX inference
54
- plots: True # (bool) save plots and images during train/val
45
+ val: True # (bool) validate/test during training
46
+ split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
47
+ save_json: False # (bool) save results to JSON file
48
+ save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
49
+ conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
50
+ iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
51
+ max_det: 300 # (int) maximum number of detections per image
52
+ half: False # (bool) use half precision (FP16)
53
+ dnn: False # (bool) use OpenCV DNN for ONNX inference
54
+ plots: True # (bool) save plots and images during train/val
55
55
 
56
56
  # Predict settings -----------------------------------------------------------------------------------------------------
57
- source: # (str, optional) source directory for images or videos
58
- vid_stride: 1 # (int) video frame-rate stride
59
- stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
60
- visualize: False # (bool) visualize model features
61
- augment: False # (bool) apply image augmentation to prediction sources
62
- agnostic_nms: False # (bool) class-agnostic NMS
63
- classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
64
- retina_masks: False # (bool) use high-resolution segmentation masks
65
- embed: # (list[int], optional) return feature vectors/embeddings from given layers
57
+ source: # (str, optional) source directory for images or videos
58
+ vid_stride: 1 # (int) video frame-rate stride
59
+ stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
60
+ visualize: False # (bool) visualize model features
61
+ augment: False # (bool) apply image augmentation to prediction sources
62
+ agnostic_nms: False # (bool) class-agnostic NMS
63
+ classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
64
+ retina_masks: False # (bool) use high-resolution segmentation masks
65
+ embed: # (list[int], optional) return feature vectors/embeddings from given layers
66
66
 
67
67
  # Visualize settings ---------------------------------------------------------------------------------------------------
68
- show: False # (bool) show predicted images and videos if environment allows
69
- save_frames: False # (bool) save predicted individual video frames
70
- save_txt: False # (bool) save results as .txt file
71
- save_conf: False # (bool) save results with confidence scores
72
- save_crop: False # (bool) save cropped images with results
73
- show_labels: True # (bool) show prediction labels, i.e. 'person'
74
- show_conf: True # (bool) show prediction confidence, i.e. '0.99'
75
- show_boxes: True # (bool) show prediction boxes
76
- line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
68
+ show: False # (bool) show predicted images and videos if environment allows
69
+ save_frames: False # (bool) save predicted individual video frames
70
+ save_txt: False # (bool) save results as .txt file
71
+ save_conf: False # (bool) save results with confidence scores
72
+ save_crop: False # (bool) save cropped images with results
73
+ show_labels: True # (bool) show prediction labels, i.e. 'person'
74
+ show_conf: True # (bool) show prediction confidence, i.e. '0.99'
75
+ show_boxes: True # (bool) show prediction boxes
76
+ line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
77
77
 
78
78
  # Export settings ------------------------------------------------------------------------------------------------------
79
- format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
80
- keras: False # (bool) use Kera=s
81
- optimize: False # (bool) TorchScript: optimize for mobile
82
- int8: False # (bool) CoreML/TF INT8 quantization
83
- dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
84
- simplify: False # (bool) ONNX: simplify model
85
- opset: # (int, optional) ONNX: opset version
86
- workspace: 4 # (int) TensorRT: workspace size (GB)
87
- nms: False # (bool) CoreML: add NMS
79
+ format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
80
+ keras: False # (bool) use Kera=s
81
+ optimize: False # (bool) TorchScript: optimize for mobile
82
+ int8: False # (bool) CoreML/TF INT8 quantization
83
+ dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
84
+ simplify: False # (bool) ONNX: simplify model
85
+ opset: # (int, optional) ONNX: opset version
86
+ workspace: 4 # (int) TensorRT: workspace size (GB)
87
+ nms: False # (bool) CoreML: add NMS
88
88
 
89
89
  # Hyperparameters ------------------------------------------------------------------------------------------------------
90
- lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
91
- lrf: 0.01 # (float) final learning rate (lr0 * lrf)
92
- momentum: 0.937 # (float) SGD momentum/Adam beta1
93
- weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
94
- warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
95
- warmup_momentum: 0.8 # (float) warmup initial momentum
96
- warmup_bias_lr: 0.1 # (float) warmup initial bias lr
97
- box: 7.5 # (float) box loss gain
98
- cls: 0.5 # (float) cls loss gain (scale with pixels)
99
- dfl: 1.5 # (float) dfl loss gain
100
- pose: 12.0 # (float) pose loss gain
101
- kobj: 1.0 # (float) keypoint obj loss gain
102
- label_smoothing: 0.0 # (float) label smoothing (fraction)
103
- nbs: 64 # (int) nominal batch size
104
- hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
105
- hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
106
- hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
107
- degrees: 0.0 # (float) image rotation (+/- deg)
108
- translate: 0.1 # (float) image translation (+/- fraction)
109
- scale: 0.5 # (float) image scale (+/- gain)
110
- shear: 0.0 # (float) image shear (+/- deg)
111
- perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
112
- flipud: 0.0 # (float) image flip up-down (probability)
113
- fliplr: 0.5 # (float) image flip left-right (probability)
114
- mosaic: 1.0 # (float) image mosaic (probability)
115
- mixup: 0.0 # (float) image mixup (probability)
116
- copy_paste: 0.0 # (float) segment copy-paste (probability)
117
- auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
118
- erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
119
- crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
90
+ lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
91
+ lrf: 0.01 # (float) final learning rate (lr0 * lrf)
92
+ momentum: 0.937 # (float) SGD momentum/Adam beta1
93
+ weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
94
+ warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
95
+ warmup_momentum: 0.8 # (float) warmup initial momentum
96
+ warmup_bias_lr: 0.1 # (float) warmup initial bias lr
97
+ box: 7.5 # (float) box loss gain
98
+ cls: 0.5 # (float) cls loss gain (scale with pixels)
99
+ dfl: 1.5 # (float) dfl loss gain
100
+ pose: 12.0 # (float) pose loss gain
101
+ kobj: 1.0 # (float) keypoint obj loss gain
102
+ label_smoothing: 0.0 # (float) label smoothing (fraction)
103
+ nbs: 64 # (int) nominal batch size
104
+ hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
105
+ hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
106
+ hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
107
+ degrees: 0.0 # (float) image rotation (+/- deg)
108
+ translate: 0.1 # (float) image translation (+/- fraction)
109
+ scale: 0.5 # (float) image scale (+/- gain)
110
+ shear: 0.0 # (float) image shear (+/- deg)
111
+ perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
112
+ flipud: 0.0 # (float) image flip up-down (probability)
113
+ fliplr: 0.5 # (float) image flip left-right (probability)
114
+ mosaic: 1.0 # (float) image mosaic (probability)
115
+ mixup: 0.0 # (float) image mixup (probability)
116
+ copy_paste: 0.0 # (float) segment copy-paste (probability)
117
+ auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
118
+ erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
119
+ crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
120
120
 
121
121
  # Custom config.yaml ---------------------------------------------------------------------------------------------------
122
- cfg: # (str, optional) for overriding defaults.yaml
122
+ cfg: # (str, optional) for overriding defaults.yaml
123
123
 
124
124
  # Tracker settings ------------------------------------------------------------------------------------------------------
125
- tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
125
+ tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
@@ -2,49 +2,49 @@
2
2
  # RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
3
3
 
4
4
  # Parameters
5
- nc: 80 # number of classes
5
+ nc: 80 # number of classes
6
6
  scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
7
  # [depth, width, max_channels]
8
8
  l: [1.00, 1.00, 1024]
9
9
 
10
10
  backbone:
11
11
  # [from, repeats, module, args]
12
- - [-1, 1, HGStem, [32, 48]] # 0-P2/4
13
- - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
12
+ - [-1, 1, HGStem, [32, 48]] # 0-P2/4
13
+ - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
14
14
 
15
- - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16
- - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
15
+ - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16
+ - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
17
17
 
18
- - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
19
- - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
18
+ - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
19
+ - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
20
20
  - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
21
- - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
21
+ - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
22
22
 
23
- - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
24
- - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
23
+ - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
24
+ - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
25
25
 
26
26
  head:
27
- - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
27
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
28
28
  - [-1, 1, AIFI, [1024, 8]]
29
- - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
29
+ - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
30
30
 
31
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
32
- - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
31
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32
+ - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
33
33
  - [[-2, -1], 1, Concat, [1]]
34
- - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
35
- - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
34
+ - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
35
+ - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
36
36
 
37
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
38
- - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
39
- - [[-2, -1], 1, Concat, [1]] # cat backbone P4
40
- - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
39
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
40
+ - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
41
41
 
42
- - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
43
- - [[-1, 17], 1, Concat, [1]] # cat Y4
44
- - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
42
+ - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
43
+ - [[-1, 17], 1, Concat, [1]] # cat Y4
44
+ - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
45
45
 
46
- - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
47
- - [[-1, 12], 1, Concat, [1]] # cat Y5
48
- - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
46
+ - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
47
+ - [[-1, 12], 1, Concat, [1]] # cat Y5
48
+ - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
49
49
 
50
- - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
50
+ - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
@@ -2,41 +2,41 @@
2
2
  # RT-DETR-ResNet101 object detection model with P3-P5 outputs.
3
3
 
4
4
  # Parameters
5
- nc: 80 # number of classes
5
+ nc: 80 # number of classes
6
6
  scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
7
  # [depth, width, max_channels]
8
8
  l: [1.00, 1.00, 1024]
9
9
 
10
10
  backbone:
11
11
  # [from, repeats, module, args]
12
- - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
- - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
- - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
- - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
16
- - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
12
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
16
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17
17
 
18
18
  head:
19
- - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
19
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20
20
  - [-1, 1, AIFI, [1024, 8]]
21
- - [-1, 1, Conv, [256, 1, 1]] # 7
21
+ - [-1, 1, Conv, [256, 1, 1]] # 7
22
22
 
23
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
24
- - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
23
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
24
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25
25
  - [[-2, -1], 1, Concat, [1]]
26
- - [-1, 3, RepC3, [256]] # 11
27
- - [-1, 1, Conv, [256, 1, 1]] # 12
26
+ - [-1, 3, RepC3, [256]] # 11
27
+ - [-1, 1, Conv, [256, 1, 1]] # 12
28
28
 
29
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
30
- - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
- - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
- - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
29
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
30
+ - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33
33
 
34
- - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
- - [[-1, 12], 1, Concat, [1]] # cat Y4
36
- - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
34
+ - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
+ - [[-1, 12], 1, Concat, [1]] # cat Y4
36
+ - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37
37
 
38
- - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
- - [[-1, 7], 1, Concat, [1]] # cat Y5
40
- - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
38
+ - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
+ - [[-1, 7], 1, Concat, [1]] # cat Y5
40
+ - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41
41
 
42
- - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
42
+ - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
@@ -2,41 +2,41 @@
2
2
  # RT-DETR-ResNet50 object detection model with P3-P5 outputs.
3
3
 
4
4
  # Parameters
5
- nc: 80 # number of classes
5
+ nc: 80 # number of classes
6
6
  scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
7
  # [depth, width, max_channels]
8
8
  l: [1.00, 1.00, 1024]
9
9
 
10
10
  backbone:
11
11
  # [from, repeats, module, args]
12
- - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
- - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
- - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
- - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
16
- - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
12
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
16
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17
17
 
18
18
  head:
19
- - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
19
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20
20
  - [-1, 1, AIFI, [1024, 8]]
21
- - [-1, 1, Conv, [256, 1, 1]] # 7
21
+ - [-1, 1, Conv, [256, 1, 1]] # 7
22
22
 
23
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
24
- - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
23
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
24
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25
25
  - [[-2, -1], 1, Concat, [1]]
26
- - [-1, 3, RepC3, [256]] # 11
27
- - [-1, 1, Conv, [256, 1, 1]] # 12
26
+ - [-1, 3, RepC3, [256]] # 11
27
+ - [-1, 1, Conv, [256, 1, 1]] # 12
28
28
 
29
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
30
- - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
- - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
- - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
29
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
30
+ - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33
33
 
34
- - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
- - [[-1, 12], 1, Concat, [1]] # cat Y4
36
- - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
34
+ - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
+ - [[-1, 12], 1, Concat, [1]] # cat Y4
36
+ - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37
37
 
38
- - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
- - [[-1, 7], 1, Concat, [1]] # cat Y5
40
- - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
38
+ - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
+ - [[-1, 7], 1, Concat, [1]] # cat Y5
40
+ - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41
41
 
42
- - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
42
+ - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
@@ -2,53 +2,53 @@
2
2
  # RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
3
3
 
4
4
  # Parameters
5
- nc: 80 # number of classes
5
+ nc: 80 # number of classes
6
6
  scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
7
  # [depth, width, max_channels]
8
8
  x: [1.00, 1.00, 2048]
9
9
 
10
10
  backbone:
11
11
  # [from, repeats, module, args]
12
- - [-1, 1, HGStem, [32, 64]] # 0-P2/4
13
- - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
12
+ - [-1, 1, HGStem, [32, 64]] # 0-P2/4
13
+ - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
14
14
 
15
- - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
15
+ - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16
16
  - [-1, 6, HGBlock, [128, 512, 3]]
17
- - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
17
+ - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
18
18
 
19
- - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
20
- - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
19
+ - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
20
+ - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
21
21
  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
22
22
  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
23
23
  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
24
- - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
24
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
25
25
 
26
- - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
26
+ - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
27
27
  - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
28
- - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
28
+ - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
29
29
 
30
30
  head:
31
- - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
31
+ - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
32
32
  - [-1, 1, AIFI, [2048, 8]]
33
- - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
33
+ - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
34
34
 
35
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
36
- - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
35
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
36
+ - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
37
37
  - [[-2, -1], 1, Concat, [1]]
38
- - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
39
- - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
38
+ - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
39
+ - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
40
40
 
41
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
42
- - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
43
- - [[-2, -1], 1, Concat, [1]] # cat backbone P4
44
- - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
43
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
44
+ - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
45
45
 
46
- - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
47
- - [[-1, 21], 1, Concat, [1]] # cat Y4
48
- - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
46
+ - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
47
+ - [[-1, 21], 1, Concat, [1]] # cat Y4
48
+ - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
49
49
 
50
- - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
51
- - [[-1, 16], 1, Concat, [1]] # cat Y5
52
- - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
50
+ - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
51
+ - [[-1, 16], 1, Concat, [1]] # cat Y5
52
+ - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
53
53
 
54
- - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
54
+ - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
@@ -2,24 +2,24 @@
2
2
  # YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
3
3
 
4
4
  # Parameters
5
- nc: 80 # number of classes
6
- depth_multiple: 1.0 # model depth multiple
7
- width_multiple: 1.0 # layer channel multiple
5
+ nc: 80 # number of classes
6
+ depth_multiple: 1.0 # model depth multiple
7
+ width_multiple: 1.0 # layer channel multiple
8
8
 
9
9
  # darknet53 backbone
10
10
  backbone:
11
11
  # [from, number, module, args]
12
- - [-1, 1, Conv, [32, 3, 1]] # 0
13
- - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
12
+ - [-1, 1, Conv, [32, 3, 1]] # 0
13
+ - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
14
14
  - [-1, 1, Bottleneck, [64]]
15
- - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
15
+ - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
16
16
  - [-1, 2, Bottleneck, [128]]
17
- - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
17
+ - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
18
18
  - [-1, 8, Bottleneck, [256]]
19
- - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
19
+ - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
20
20
  - [-1, 8, Bottleneck, [512]]
21
- - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
22
- - [-1, 4, Bottleneck, [1024]] # 10
21
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
22
+ - [-1, 4, Bottleneck, [1024]] # 10
23
23
 
24
24
  # YOLOv3-SPP head
25
25
  head:
@@ -27,20 +27,20 @@ head:
27
27
  - [-1, 1, SPP, [512, [5, 9, 13]]]
28
28
  - [-1, 1, Conv, [1024, 3, 1]]
29
29
  - [-1, 1, Conv, [512, 1, 1]]
30
- - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
30
+ - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
31
31
 
32
32
  - [-2, 1, Conv, [256, 1, 1]]
33
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
34
- - [[-1, 8], 1, Concat, [1]] # cat backbone P4
33
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P4
35
35
  - [-1, 1, Bottleneck, [512, False]]
36
36
  - [-1, 1, Bottleneck, [512, False]]
37
37
  - [-1, 1, Conv, [256, 1, 1]]
38
- - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
38
+ - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
39
39
 
40
40
  - [-2, 1, Conv, [128, 1, 1]]
41
- - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
42
- - [[-1, 6], 1, Concat, [1]] # cat backbone P3
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P3
43
43
  - [-1, 1, Bottleneck, [256, False]]
44
- - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
44
+ - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
45
45
 
46
- - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
46
+ - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)