ultralytics 8.0.196__py3-none-any.whl → 8.0.198__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +4 -5
- ultralytics/data/augment.py +2 -2
- ultralytics/data/converter.py +12 -13
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/__init__.py +1 -0
- ultralytics/engine/exporter.py +1 -1
- ultralytics/engine/trainer.py +2 -1
- ultralytics/hub/session.py +1 -1
- ultralytics/models/fastsam/predict.py +33 -2
- ultralytics/models/fastsam/prompt.py +38 -1
- ultralytics/models/fastsam/utils.py +5 -5
- ultralytics/models/fastsam/val.py +27 -1
- ultralytics/models/nas/model.py +20 -0
- ultralytics/models/nas/predict.py +23 -0
- ultralytics/models/nas/val.py +24 -0
- ultralytics/models/rtdetr/val.py +17 -5
- ultralytics/models/sam/modules/decoders.py +26 -1
- ultralytics/models/sam/modules/encoders.py +31 -3
- ultralytics/models/sam/modules/sam.py +22 -7
- ultralytics/models/sam/modules/tiny_encoder.py +147 -45
- ultralytics/models/sam/modules/transformer.py +47 -2
- ultralytics/models/sam/predict.py +19 -2
- ultralytics/models/utils/loss.py +20 -2
- ultralytics/models/utils/ops.py +5 -5
- ultralytics/nn/modules/block.py +33 -10
- ultralytics/nn/modules/conv.py +16 -4
- ultralytics/nn/modules/head.py +48 -17
- ultralytics/nn/modules/transformer.py +2 -2
- ultralytics/nn/tasks.py +7 -7
- ultralytics/utils/__init__.py +2 -1
- ultralytics/utils/benchmarks.py +13 -0
- ultralytics/utils/callbacks/mlflow.py +76 -36
- ultralytics/utils/callbacks/wb.py +92 -1
- ultralytics/utils/checks.py +4 -4
- ultralytics/utils/errors.py +12 -0
- ultralytics/utils/files.py +1 -1
- ultralytics/utils/instance.py +41 -3
- ultralytics/utils/loss.py +22 -19
- ultralytics/utils/metrics.py +106 -24
- ultralytics/utils/tal.py +1 -1
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tuner.py +10 -4
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/METADATA +1 -1
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/RECORD +49 -49
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/LICENSE +0 -0
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/WHEEL +0 -0
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/top_level.txt +0 -0
ultralytics/__init__.py
CHANGED
ultralytics/cfg/__init__.py
CHANGED
|
@@ -7,9 +7,9 @@ from pathlib import Path
|
|
|
7
7
|
from types import SimpleNamespace
|
|
8
8
|
from typing import Dict, List, Union
|
|
9
9
|
|
|
10
|
-
from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, ROOT,
|
|
11
|
-
SETTINGS_YAML, TESTS_RUNNING, IterableSimpleNamespace, __version__, checks,
|
|
12
|
-
deprecation_warn, yaml_load, yaml_print)
|
|
10
|
+
from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, ROOT, RUNS_DIR,
|
|
11
|
+
SETTINGS, SETTINGS_YAML, TESTS_RUNNING, IterableSimpleNamespace, __version__, checks,
|
|
12
|
+
colorstr, deprecation_warn, yaml_load, yaml_print)
|
|
13
13
|
|
|
14
14
|
# Define valid tasks and modes
|
|
15
15
|
MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark'
|
|
@@ -153,8 +153,7 @@ def get_save_dir(args, name=None):
|
|
|
153
153
|
else:
|
|
154
154
|
from ultralytics.utils.files import increment_path
|
|
155
155
|
|
|
156
|
-
project = args.project or (ROOT /
|
|
157
|
-
'../tests/tmp/runs' if TESTS_RUNNING else Path(SETTINGS['runs_dir'])) / args.task
|
|
156
|
+
project = args.project or (ROOT.parent / 'tests/tmp/runs' if TESTS_RUNNING else RUNS_DIR) / args.task
|
|
158
157
|
name = name or args.name or f'{args.mode}'
|
|
159
158
|
save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
|
|
160
159
|
|
ultralytics/data/augment.py
CHANGED
|
@@ -491,7 +491,7 @@ class RandomPerspective:
|
|
|
491
491
|
border = labels.pop('mosaic_border', self.border)
|
|
492
492
|
self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h
|
|
493
493
|
# M is affine matrix
|
|
494
|
-
#
|
|
494
|
+
# Scale for func:`box_candidates`
|
|
495
495
|
img, M, scale = self.affine_transform(img, border)
|
|
496
496
|
|
|
497
497
|
bboxes = self.apply_bboxes(instances.bboxes, M)
|
|
@@ -894,7 +894,7 @@ class Format:
|
|
|
894
894
|
return labels
|
|
895
895
|
|
|
896
896
|
def _format_img(self, img):
|
|
897
|
-
"""Format the image for
|
|
897
|
+
"""Format the image for YOLO from Numpy array to PyTorch tensor."""
|
|
898
898
|
if len(img.shape) < 3:
|
|
899
899
|
img = np.expand_dims(img, -1)
|
|
900
900
|
img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])
|
ultralytics/data/converter.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
import shutil
|
|
5
4
|
from collections import defaultdict
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
|
|
8
7
|
import cv2
|
|
9
8
|
import numpy as np
|
|
10
9
|
|
|
11
|
-
from ultralytics.utils import TQDM
|
|
10
|
+
from ultralytics.utils import LOGGER, TQDM
|
|
11
|
+
from ultralytics.utils.files import increment_path
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def coco91_to_coco80_class():
|
|
@@ -48,12 +48,12 @@ def coco80_to_coco91_class(): #
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
def convert_coco(labels_dir='../coco/annotations/',
|
|
51
|
-
save_dir='
|
|
51
|
+
save_dir='coco_converted/',
|
|
52
52
|
use_segments=False,
|
|
53
53
|
use_keypoints=False,
|
|
54
54
|
cls91to80=True):
|
|
55
55
|
"""
|
|
56
|
-
Converts COCO dataset annotations to a format
|
|
56
|
+
Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
59
|
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
|
|
@@ -74,9 +74,7 @@ def convert_coco(labels_dir='../coco/annotations/',
|
|
|
74
74
|
"""
|
|
75
75
|
|
|
76
76
|
# Create dataset directory
|
|
77
|
-
save_dir =
|
|
78
|
-
if save_dir.exists():
|
|
79
|
-
shutil.rmtree(save_dir) # delete dir
|
|
77
|
+
save_dir = increment_path(save_dir) # increment if save directory already exists
|
|
80
78
|
for p in save_dir / 'labels', save_dir / 'images':
|
|
81
79
|
p.mkdir(parents=True, exist_ok=True) # make dir
|
|
82
80
|
|
|
@@ -147,6 +145,8 @@ def convert_coco(labels_dir='../coco/annotations/',
|
|
|
147
145
|
if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments
|
|
148
146
|
file.write(('%g ' * len(line)).rstrip() % line + '\n')
|
|
149
147
|
|
|
148
|
+
LOGGER.info(f'COCO data converted successfully.\nResults saved to {save_dir.resolve()}')
|
|
149
|
+
|
|
150
150
|
|
|
151
151
|
def convert_dota_to_yolo_obb(dota_root_path: str):
|
|
152
152
|
"""
|
|
@@ -271,26 +271,25 @@ def merge_multi_segment(segments):
|
|
|
271
271
|
segments = [np.array(i).reshape(-1, 2) for i in segments]
|
|
272
272
|
idx_list = [[] for _ in range(len(segments))]
|
|
273
273
|
|
|
274
|
-
#
|
|
274
|
+
# Record the indexes with min distance between each segment
|
|
275
275
|
for i in range(1, len(segments)):
|
|
276
276
|
idx1, idx2 = min_index(segments[i - 1], segments[i])
|
|
277
277
|
idx_list[i - 1].append(idx1)
|
|
278
278
|
idx_list[i].append(idx2)
|
|
279
279
|
|
|
280
|
-
#
|
|
280
|
+
# Use two round to connect all the segments
|
|
281
281
|
for k in range(2):
|
|
282
|
-
#
|
|
282
|
+
# Forward connection
|
|
283
283
|
if k == 0:
|
|
284
284
|
for i, idx in enumerate(idx_list):
|
|
285
|
-
#
|
|
286
|
-
# reverse the index of middle segments
|
|
285
|
+
# Middle segments have two indexes, reverse the index of middle segments
|
|
287
286
|
if len(idx) == 2 and idx[0] > idx[1]:
|
|
288
287
|
idx = idx[::-1]
|
|
289
288
|
segments[i] = segments[i][::-1, :]
|
|
290
289
|
|
|
291
290
|
segments[i] = np.roll(segments[i], -idx[0], axis=0)
|
|
292
291
|
segments[i] = np.concatenate([segments[i], segments[i][:1]])
|
|
293
|
-
#
|
|
292
|
+
# Deal with the first segment and the last one
|
|
294
293
|
if i in [0, len(idx_list) - 1]:
|
|
295
294
|
s.append(segments[i])
|
|
296
295
|
else:
|
ultralytics/data/dataset.py
CHANGED
|
@@ -162,7 +162,7 @@ class YOLODataset(BaseDataset):
|
|
|
162
162
|
def update_labels_info(self, label):
|
|
163
163
|
"""Custom your label format here."""
|
|
164
164
|
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
|
165
|
-
#
|
|
165
|
+
# We can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
|
166
166
|
bboxes = label.pop('bboxes')
|
|
167
167
|
segments = label.pop('segments')
|
|
168
168
|
keypoints = label.pop('keypoints', None)
|
ultralytics/engine/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
ultralytics/engine/exporter.py
CHANGED
|
@@ -140,7 +140,7 @@ class Exporter:
|
|
|
140
140
|
Args:
|
|
141
141
|
cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
|
|
142
142
|
overrides (dict, optional): Configuration overrides. Defaults to None.
|
|
143
|
-
_callbacks (
|
|
143
|
+
_callbacks (dict, optional): Dictionary of callback functions. Defaults to None.
|
|
144
144
|
"""
|
|
145
145
|
self.args = get_cfg(cfg, overrides)
|
|
146
146
|
if self.args.format.lower() in ('coreml', 'mlmodel'): # fix attempt for protobuf<3.20.x errors
|
ultralytics/engine/trainer.py
CHANGED
|
@@ -91,6 +91,7 @@ class BaseTrainer:
|
|
|
91
91
|
|
|
92
92
|
# Dirs
|
|
93
93
|
self.save_dir = get_save_dir(self.args)
|
|
94
|
+
self.args.name = self.save_dir.name # update name for loggers
|
|
94
95
|
self.wdir = self.save_dir / 'weights' # weights dir
|
|
95
96
|
if RANK in (-1, 0):
|
|
96
97
|
self.wdir.mkdir(parents=True, exist_ok=True) # make dir
|
|
@@ -526,7 +527,7 @@ class BaseTrainer:
|
|
|
526
527
|
|
|
527
528
|
# TODO: may need to put these following functions into callback
|
|
528
529
|
def plot_training_samples(self, batch, ni):
|
|
529
|
-
"""Plots training samples during
|
|
530
|
+
"""Plots training samples during YOLO training."""
|
|
530
531
|
pass
|
|
531
532
|
|
|
532
533
|
def plot_training_labels(self):
|
ultralytics/hub/session.py
CHANGED
|
@@ -23,7 +23,7 @@ class HUBTrainingSession:
|
|
|
23
23
|
|
|
24
24
|
Attributes:
|
|
25
25
|
agent_id (str): Identifier for the instance communicating with the server.
|
|
26
|
-
model_id (str): Identifier for the
|
|
26
|
+
model_id (str): Identifier for the YOLO model being trained.
|
|
27
27
|
model_url (str): URL for the model in Ultralytics HUB.
|
|
28
28
|
api_url (str): API URL for the model in Ultralytics HUB.
|
|
29
29
|
auth_header (dict): Authentication header for the Ultralytics HUB API requests.
|
|
@@ -9,14 +9,45 @@ from ultralytics.utils import DEFAULT_CFG, ops
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class FastSAMPredictor(DetectionPredictor):
|
|
12
|
+
"""
|
|
13
|
+
FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks in Ultralytics
|
|
14
|
+
YOLO framework.
|
|
15
|
+
|
|
16
|
+
This class extends the DetectionPredictor, customizing the prediction pipeline specifically for fast SAM.
|
|
17
|
+
It adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing
|
|
18
|
+
for single-class segmentation.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
cfg (dict): Configuration parameters for prediction.
|
|
22
|
+
overrides (dict, optional): Optional parameter overrides for custom behavior.
|
|
23
|
+
_callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
|
|
24
|
+
"""
|
|
12
25
|
|
|
13
26
|
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
|
14
|
-
"""
|
|
27
|
+
"""
|
|
28
|
+
Initializes the FastSAMPredictor class, inheriting from DetectionPredictor and setting the task to 'segment'.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
cfg (dict): Configuration parameters for prediction.
|
|
32
|
+
overrides (dict, optional): Optional parameter overrides for custom behavior.
|
|
33
|
+
_callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
|
|
34
|
+
"""
|
|
15
35
|
super().__init__(cfg, overrides, _callbacks)
|
|
16
36
|
self.args.task = 'segment'
|
|
17
37
|
|
|
18
38
|
def postprocess(self, preds, img, orig_imgs):
|
|
19
|
-
"""
|
|
39
|
+
"""
|
|
40
|
+
Perform post-processing steps on predictions, including non-max suppression and scaling boxes to original image
|
|
41
|
+
size, and returns the final results.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
preds (list): The raw output predictions from the model.
|
|
45
|
+
img (torch.Tensor): The processed image tensor.
|
|
46
|
+
orig_imgs (list | torch.Tensor): The original image or list of images.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
(list): A list of Results objects, each containing processed boxes, masks, and other metadata.
|
|
50
|
+
"""
|
|
20
51
|
p = ops.non_max_suppression(
|
|
21
52
|
preds[0],
|
|
22
53
|
self.args.conf,
|
|
@@ -13,6 +13,15 @@ from ultralytics.utils import TQDM
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class FastSAMPrompt:
|
|
16
|
+
"""
|
|
17
|
+
Fast Segment Anything Model class for image annotation and visualization.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
device (str): Computing device ('cuda' or 'cpu').
|
|
21
|
+
results: Object detection or segmentation results.
|
|
22
|
+
source: Source image or image path.
|
|
23
|
+
clip: CLIP model for linear assignment.
|
|
24
|
+
"""
|
|
16
25
|
|
|
17
26
|
def __init__(self, source, results, device='cuda') -> None:
|
|
18
27
|
"""Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment."""
|
|
@@ -92,12 +101,26 @@ class FastSAMPrompt:
|
|
|
92
101
|
better_quality=True,
|
|
93
102
|
retina=False,
|
|
94
103
|
with_contours=True):
|
|
104
|
+
"""
|
|
105
|
+
Plots annotations, bounding boxes, and points on images and saves the output.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
annotations (list): Annotations to be plotted.
|
|
109
|
+
output (str or Path): Output directory for saving the plots.
|
|
110
|
+
bbox (list, optional): Bounding box coordinates [x1, y1, x2, y2]. Defaults to None.
|
|
111
|
+
points (list, optional): Points to be plotted. Defaults to None.
|
|
112
|
+
point_label (list, optional): Labels for the points. Defaults to None.
|
|
113
|
+
mask_random_color (bool, optional): Whether to use random color for masks. Defaults to True.
|
|
114
|
+
better_quality (bool, optional): Whether to apply morphological transformations for better mask quality. Defaults to True.
|
|
115
|
+
retina (bool, optional): Whether to use retina mask. Defaults to False.
|
|
116
|
+
with_contours (bool, optional): Whether to plot contours. Defaults to True.
|
|
117
|
+
"""
|
|
95
118
|
pbar = TQDM(annotations, total=len(annotations))
|
|
96
119
|
for ann in pbar:
|
|
97
120
|
result_name = os.path.basename(ann.path)
|
|
98
121
|
image = ann.orig_img[..., ::-1] # BGR to RGB
|
|
99
122
|
original_h, original_w = ann.orig_shape
|
|
100
|
-
#
|
|
123
|
+
# For macOS only
|
|
101
124
|
# plt.switch_backend('TkAgg')
|
|
102
125
|
plt.figure(figsize=(original_w / 100, original_h / 100))
|
|
103
126
|
# Add subplot with no margin.
|
|
@@ -160,6 +183,20 @@ class FastSAMPrompt:
|
|
|
160
183
|
target_height=960,
|
|
161
184
|
target_width=960,
|
|
162
185
|
):
|
|
186
|
+
"""
|
|
187
|
+
Quickly shows the mask annotations on the given matplotlib axis.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
annotation (array-like): Mask annotation.
|
|
191
|
+
ax (matplotlib.axes.Axes): Matplotlib axis.
|
|
192
|
+
random_color (bool, optional): Whether to use random color for masks. Defaults to False.
|
|
193
|
+
bbox (list, optional): Bounding box coordinates [x1, y1, x2, y2]. Defaults to None.
|
|
194
|
+
points (list, optional): Points to be plotted. Defaults to None.
|
|
195
|
+
pointlabel (list, optional): Labels for the points. Defaults to None.
|
|
196
|
+
retinamask (bool, optional): Whether to use retina mask. Defaults to True.
|
|
197
|
+
target_height (int, optional): Target height for resizing. Defaults to 960.
|
|
198
|
+
target_width (int, optional): Target width for resizing. Defaults to 960.
|
|
199
|
+
"""
|
|
163
200
|
n, h, w = annotation.shape # batch, height, width
|
|
164
201
|
|
|
165
202
|
areas = np.sum(annotation, axis=(1, 2))
|
|
@@ -42,23 +42,23 @@ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=Fals
|
|
|
42
42
|
high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
|
|
43
43
|
"""
|
|
44
44
|
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
|
45
|
-
#
|
|
45
|
+
# Obtain coordinates for intersections
|
|
46
46
|
x1 = torch.max(box1[0], boxes[:, 0])
|
|
47
47
|
y1 = torch.max(box1[1], boxes[:, 1])
|
|
48
48
|
x2 = torch.min(box1[2], boxes[:, 2])
|
|
49
49
|
y2 = torch.min(box1[3], boxes[:, 3])
|
|
50
50
|
|
|
51
|
-
#
|
|
51
|
+
# Compute the area of intersection
|
|
52
52
|
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
|
53
53
|
|
|
54
|
-
#
|
|
54
|
+
# Compute the area of both individual boxes
|
|
55
55
|
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
|
56
56
|
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
|
57
57
|
|
|
58
|
-
#
|
|
58
|
+
# Compute the area of union
|
|
59
59
|
union = box1_area + box2_area - intersection
|
|
60
60
|
|
|
61
|
-
#
|
|
61
|
+
# Compute the IoU
|
|
62
62
|
iou = intersection / union # Should be shape (n, )
|
|
63
63
|
if raw_output:
|
|
64
64
|
return 0 if iou.numel() == 0 else iou
|
|
@@ -5,9 +5,35 @@ from ultralytics.utils.metrics import SegmentMetrics
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class FastSAMValidator(SegmentationValidator):
|
|
8
|
+
"""
|
|
9
|
+
Custom validation class for fast SAM (Segment Anything Model) segmentation in Ultralytics YOLO framework.
|
|
10
|
+
|
|
11
|
+
Extends the SegmentationValidator class, customizing the validation process specifically for fast SAM. This class
|
|
12
|
+
sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled
|
|
13
|
+
to avoid errors during validation.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
dataloader: The data loader object used for validation.
|
|
17
|
+
save_dir (str): The directory where validation results will be saved.
|
|
18
|
+
pbar: A progress bar object.
|
|
19
|
+
args: Additional arguments for customization.
|
|
20
|
+
_callbacks: List of callback functions to be invoked during validation.
|
|
21
|
+
"""
|
|
8
22
|
|
|
9
23
|
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
|
|
10
|
-
"""
|
|
24
|
+
"""
|
|
25
|
+
Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation.
|
|
29
|
+
save_dir (Path, optional): Directory to save results.
|
|
30
|
+
pbar (tqdm.tqdm): Progress bar for displaying progress.
|
|
31
|
+
args (SimpleNamespace): Configuration for the validator.
|
|
32
|
+
_callbacks (dict): Dictionary to store various callback functions.
|
|
33
|
+
|
|
34
|
+
Notes:
|
|
35
|
+
Plots for ConfusionMatrix and other related metrics are disabled in this class to avoid errors.
|
|
36
|
+
"""
|
|
11
37
|
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
|
|
12
38
|
self.args.task = 'segment'
|
|
13
39
|
self.args.plots = False # disable ConfusionMatrix and other plots to avoid errors
|
ultralytics/models/nas/model.py
CHANGED
|
@@ -23,6 +23,26 @@ from .val import NASValidator
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class NAS(Model):
|
|
26
|
+
"""
|
|
27
|
+
YOLO NAS model for object detection.
|
|
28
|
+
|
|
29
|
+
This class provides an interface for the YOLO-NAS models and extends the `Model` class from Ultralytics engine.
|
|
30
|
+
It is designed to facilitate the task of object detection using pre-trained or custom-trained YOLO-NAS models.
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
```python
|
|
34
|
+
from ultralytics import NAS
|
|
35
|
+
|
|
36
|
+
model = NAS('yolo_nas_s')
|
|
37
|
+
results = model.predict('ultralytics/assets/bus.jpg')
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
model (str): Path to the pre-trained model or model name. Defaults to 'yolo_nas_s.pt'.
|
|
42
|
+
|
|
43
|
+
Note:
|
|
44
|
+
YOLO-NAS models only support pre-trained models. Do not provide YAML configuration files.
|
|
45
|
+
"""
|
|
26
46
|
|
|
27
47
|
def __init__(self, model='yolo_nas_s.pt') -> None:
|
|
28
48
|
"""Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
|
|
@@ -8,6 +8,29 @@ from ultralytics.utils import ops
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class NASPredictor(BasePredictor):
|
|
11
|
+
"""
|
|
12
|
+
Ultralytics YOLO NAS Predictor for object detection.
|
|
13
|
+
|
|
14
|
+
This class extends the `BasePredictor` from Ultralytics engine and is responsible for post-processing the
|
|
15
|
+
raw predictions generated by the YOLO NAS models. It applies operations like non-maximum suppression and
|
|
16
|
+
scaling the bounding boxes to fit the original image dimensions.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
args (Namespace): Namespace containing various configurations for post-processing.
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
```python
|
|
23
|
+
from ultralytics import NAS
|
|
24
|
+
|
|
25
|
+
model = NAS('yolo_nas_s')
|
|
26
|
+
predictor = model.predictor
|
|
27
|
+
# Assumes that raw_preds, img, orig_imgs are available
|
|
28
|
+
results = predictor.postprocess(raw_preds, img, orig_imgs)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Note:
|
|
32
|
+
Typically, this class is not instantiated directly. It is used internally within the `NAS` class.
|
|
33
|
+
"""
|
|
11
34
|
|
|
12
35
|
def postprocess(self, preds_in, img, orig_imgs):
|
|
13
36
|
"""Postprocess predictions and returns a list of Results objects."""
|
ultralytics/models/nas/val.py
CHANGED
|
@@ -9,6 +9,30 @@ __all__ = ['NASValidator']
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class NASValidator(DetectionValidator):
|
|
12
|
+
"""
|
|
13
|
+
Ultralytics YOLO NAS Validator for object detection.
|
|
14
|
+
|
|
15
|
+
Extends `DetectionValidator` from the Ultralytics models package and is designed to post-process the raw predictions
|
|
16
|
+
generated by YOLO NAS models. It performs non-maximum suppression to remove overlapping and low-confidence boxes,
|
|
17
|
+
ultimately producing the final detections.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
args (Namespace): Namespace containing various configurations for post-processing, such as confidence and IoU thresholds.
|
|
21
|
+
lb (torch.Tensor): Optional tensor for multilabel NMS.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
```python
|
|
25
|
+
from ultralytics import NAS
|
|
26
|
+
|
|
27
|
+
model = NAS('yolo_nas_s')
|
|
28
|
+
validator = model.validator
|
|
29
|
+
# Assumes that raw_preds are available
|
|
30
|
+
final_preds = validator.postprocess(raw_preds)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Note:
|
|
34
|
+
This class is generally not instantiated directly but is used internally within the `NAS` class.
|
|
35
|
+
"""
|
|
12
36
|
|
|
13
37
|
def postprocess(self, preds_in):
|
|
14
38
|
"""Apply Non-maximum suppression to prediction outputs."""
|
ultralytics/models/rtdetr/val.py
CHANGED
|
@@ -12,14 +12,19 @@ from ultralytics.utils import colorstr, ops
|
|
|
12
12
|
__all__ = 'RTDETRValidator', # tuple or list
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
# TODO: Temporarily RT-DETR does not need padding.
|
|
16
15
|
class RTDETRDataset(YOLODataset):
|
|
16
|
+
"""
|
|
17
|
+
Real-Time DEtection and TRacking (RT-DETR) dataset class extending the base YOLODataset class.
|
|
18
|
+
|
|
19
|
+
This specialized dataset class is designed for use with the RT-DETR object detection model and is optimized for
|
|
20
|
+
real-time detection and tracking tasks.
|
|
21
|
+
"""
|
|
17
22
|
|
|
18
23
|
def __init__(self, *args, data=None, **kwargs):
|
|
19
24
|
"""Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
|
|
20
25
|
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
|
|
21
26
|
|
|
22
|
-
# NOTE: add stretch version load_image for
|
|
27
|
+
# NOTE: add stretch version load_image for RTDETR mosaic
|
|
23
28
|
def load_image(self, i, rect_mode=False):
|
|
24
29
|
"""Loads 1 image from dataset index 'i', returns (im, resized hw)."""
|
|
25
30
|
return super().load_image(i=i, rect_mode=rect_mode)
|
|
@@ -46,7 +51,11 @@ class RTDETRDataset(YOLODataset):
|
|
|
46
51
|
|
|
47
52
|
class RTDETRValidator(DetectionValidator):
|
|
48
53
|
"""
|
|
49
|
-
|
|
54
|
+
RTDETRValidator extends the DetectionValidator class to provide validation capabilities specifically tailored for
|
|
55
|
+
the RT-DETR (Real-Time DETR) object detection model.
|
|
56
|
+
|
|
57
|
+
The class allows building of an RTDETR-specific dataset for validation, applies Non-maximum suppression for
|
|
58
|
+
post-processing, and updates evaluation metrics accordingly.
|
|
50
59
|
|
|
51
60
|
Example:
|
|
52
61
|
```python
|
|
@@ -56,6 +65,9 @@ class RTDETRValidator(DetectionValidator):
|
|
|
56
65
|
validator = RTDETRValidator(args=args)
|
|
57
66
|
validator()
|
|
58
67
|
```
|
|
68
|
+
|
|
69
|
+
Note:
|
|
70
|
+
For further details on the attributes and methods, refer to the parent DetectionValidator class.
|
|
59
71
|
"""
|
|
60
72
|
|
|
61
73
|
def build_dataset(self, img_path, mode='val', batch=None):
|
|
@@ -87,10 +99,10 @@ class RTDETRValidator(DetectionValidator):
|
|
|
87
99
|
for i, bbox in enumerate(bboxes): # (300, 4)
|
|
88
100
|
bbox = ops.xywh2xyxy(bbox)
|
|
89
101
|
score, cls = scores[i].max(-1) # (300, )
|
|
90
|
-
# Do not need threshold for evaluation as only got 300 boxes here
|
|
102
|
+
# Do not need threshold for evaluation as only got 300 boxes here
|
|
91
103
|
# idx = score > self.args.conf
|
|
92
104
|
pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter
|
|
93
|
-
#
|
|
105
|
+
# Sort by confidence to correctly get internal metrics
|
|
94
106
|
pred = pred[score.argsort(descending=True)]
|
|
95
107
|
outputs[i] = pred # [idx]
|
|
96
108
|
|
|
@@ -10,6 +10,21 @@ from ultralytics.nn.modules import LayerNorm2d
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class MaskDecoder(nn.Module):
|
|
13
|
+
"""
|
|
14
|
+
Decoder module for generating masks and their associated quality scores, using a transformer architecture to predict
|
|
15
|
+
masks given image and prompt embeddings.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
transformer_dim (int): Channel dimension for the transformer module.
|
|
19
|
+
transformer (nn.Module): The transformer module used for mask prediction.
|
|
20
|
+
num_multimask_outputs (int): Number of masks to predict for disambiguating masks.
|
|
21
|
+
iou_token (nn.Embedding): Embedding for the IoU token.
|
|
22
|
+
num_mask_tokens (int): Number of mask tokens.
|
|
23
|
+
mask_tokens (nn.Embedding): Embedding for the mask tokens.
|
|
24
|
+
output_upscaling (nn.Sequential): Neural network sequence for upscaling the output.
|
|
25
|
+
output_hypernetworks_mlps (nn.ModuleList): Hypernetwork MLPs for generating masks.
|
|
26
|
+
iou_prediction_head (nn.Module): MLP for predicting mask quality.
|
|
27
|
+
"""
|
|
13
28
|
|
|
14
29
|
def __init__(
|
|
15
30
|
self,
|
|
@@ -136,7 +151,7 @@ class MaskDecoder(nn.Module):
|
|
|
136
151
|
|
|
137
152
|
class MLP(nn.Module):
|
|
138
153
|
"""
|
|
139
|
-
|
|
154
|
+
MLP (Multi-Layer Perceptron) model lightly adapted from
|
|
140
155
|
https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py
|
|
141
156
|
"""
|
|
142
157
|
|
|
@@ -148,6 +163,16 @@ class MLP(nn.Module):
|
|
|
148
163
|
num_layers: int,
|
|
149
164
|
sigmoid_output: bool = False,
|
|
150
165
|
) -> None:
|
|
166
|
+
"""
|
|
167
|
+
Initializes the MLP (Multi-Layer Perceptron) model.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
input_dim (int): The dimensionality of the input features.
|
|
171
|
+
hidden_dim (int): The dimensionality of the hidden layers.
|
|
172
|
+
output_dim (int): The dimensionality of the output layer.
|
|
173
|
+
num_layers (int): The number of hidden layers.
|
|
174
|
+
sigmoid_output (bool, optional): Whether to apply a sigmoid activation to the output layer. Defaults to False.
|
|
175
|
+
"""
|
|
151
176
|
super().__init__()
|
|
152
177
|
self.num_layers = num_layers
|
|
153
178
|
h = [hidden_dim] * (num_layers - 1)
|
|
@@ -12,6 +12,18 @@ from ultralytics.nn.modules import LayerNorm2d, MLPBlock
|
|
|
12
12
|
|
|
13
13
|
# This class and its supporting functions below lightly adapted from the ViTDet backbone available at: https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py # noqa
|
|
14
14
|
class ImageEncoderViT(nn.Module):
|
|
15
|
+
"""
|
|
16
|
+
An image encoder using Vision Transformer (ViT) architecture for encoding an image into a compact latent space. The
|
|
17
|
+
encoder takes an image, splits it into patches, and processes these patches through a series of transformer blocks.
|
|
18
|
+
The encoded patches are then processed through a neck to generate the final encoded representation.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
img_size (int): Dimension of input images, assumed to be square.
|
|
22
|
+
patch_embed (PatchEmbed): Module for patch embedding.
|
|
23
|
+
pos_embed (nn.Parameter, optional): Absolute positional embedding for patches.
|
|
24
|
+
blocks (nn.ModuleList): List of transformer blocks for processing patch embeddings.
|
|
25
|
+
neck (nn.Sequential): Neck module to further process the output.
|
|
26
|
+
"""
|
|
15
27
|
|
|
16
28
|
def __init__(
|
|
17
29
|
self,
|
|
@@ -112,6 +124,22 @@ class ImageEncoderViT(nn.Module):
|
|
|
112
124
|
|
|
113
125
|
|
|
114
126
|
class PromptEncoder(nn.Module):
|
|
127
|
+
"""
|
|
128
|
+
Encodes different types of prompts, including points, boxes, and masks, for input to SAM's mask decoder. The encoder
|
|
129
|
+
produces both sparse and dense embeddings for the input prompts.
|
|
130
|
+
|
|
131
|
+
Attributes:
|
|
132
|
+
embed_dim (int): Dimension of the embeddings.
|
|
133
|
+
input_image_size (Tuple[int, int]): Size of the input image as (H, W).
|
|
134
|
+
image_embedding_size (Tuple[int, int]): Spatial size of the image embedding as (H, W).
|
|
135
|
+
pe_layer (PositionEmbeddingRandom): Module for random position embedding.
|
|
136
|
+
num_point_embeddings (int): Number of point embeddings for different types of points.
|
|
137
|
+
point_embeddings (nn.ModuleList): List of point embeddings.
|
|
138
|
+
not_a_point_embed (nn.Embedding): Embedding for points that are not a part of any label.
|
|
139
|
+
mask_input_size (Tuple[int, int]): Size of the input mask.
|
|
140
|
+
mask_downscaling (nn.Sequential): Neural network for downscaling the mask.
|
|
141
|
+
no_mask_embed (nn.Embedding): Embedding for cases where no mask is provided.
|
|
142
|
+
"""
|
|
115
143
|
|
|
116
144
|
def __init__(
|
|
117
145
|
self,
|
|
@@ -276,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module):
|
|
|
276
304
|
|
|
277
305
|
def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
|
|
278
306
|
"""Positionally encode points that are normalized to [0,1]."""
|
|
279
|
-
#
|
|
307
|
+
# Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
|
|
280
308
|
coords = 2 * coords - 1
|
|
281
309
|
coords = coords @ self.positional_encoding_gaussian_matrix
|
|
282
310
|
coords = 2 * np.pi * coords
|
|
283
|
-
#
|
|
311
|
+
# Outputs d_1 x ... x d_n x C shape
|
|
284
312
|
return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
|
|
285
313
|
|
|
286
314
|
def forward(self, size: Tuple[int, int]) -> torch.Tensor:
|
|
@@ -401,7 +429,7 @@ class Attention(nn.Module):
|
|
|
401
429
|
self.use_rel_pos = use_rel_pos
|
|
402
430
|
if self.use_rel_pos:
|
|
403
431
|
assert (input_size is not None), 'Input size must be provided if using relative positional encoding.'
|
|
404
|
-
#
|
|
432
|
+
# Initialize relative positional embeddings
|
|
405
433
|
self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
|
|
406
434
|
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
|
|
407
435
|
|