ultralytics 8.0.237__py3-none-any.whl → 8.0.239__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- ultralytics/__init__.py +2 -2
- ultralytics/cfg/__init__.py +241 -138
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +34 -0
- ultralytics/data/__init__.py +9 -2
- ultralytics/data/annotator.py +4 -4
- ultralytics/data/augment.py +186 -169
- ultralytics/data/base.py +54 -48
- ultralytics/data/build.py +34 -23
- ultralytics/data/converter.py +242 -70
- ultralytics/data/dataset.py +117 -95
- ultralytics/data/explorer/__init__.py +5 -0
- ultralytics/data/explorer/explorer.py +170 -97
- ultralytics/data/explorer/gui/__init__.py +1 -0
- ultralytics/data/explorer/gui/dash.py +146 -76
- ultralytics/data/explorer/utils.py +87 -25
- ultralytics/data/loaders.py +75 -62
- ultralytics/data/split_dota.py +44 -36
- ultralytics/data/utils.py +160 -142
- ultralytics/engine/exporter.py +348 -292
- ultralytics/engine/model.py +102 -66
- ultralytics/engine/predictor.py +74 -55
- ultralytics/engine/results.py +63 -40
- ultralytics/engine/trainer.py +192 -144
- ultralytics/engine/tuner.py +66 -59
- ultralytics/engine/validator.py +31 -26
- ultralytics/hub/__init__.py +54 -31
- ultralytics/hub/auth.py +28 -25
- ultralytics/hub/session.py +282 -133
- ultralytics/hub/utils.py +64 -42
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +6 -6
- ultralytics/models/fastsam/predict.py +3 -2
- ultralytics/models/fastsam/prompt.py +55 -48
- ultralytics/models/fastsam/val.py +1 -1
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +9 -8
- ultralytics/models/nas/predict.py +8 -6
- ultralytics/models/nas/val.py +11 -9
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +11 -9
- ultralytics/models/rtdetr/train.py +18 -16
- ultralytics/models/rtdetr/val.py +25 -19
- ultralytics/models/sam/__init__.py +1 -1
- ultralytics/models/sam/amg.py +13 -14
- ultralytics/models/sam/build.py +44 -42
- ultralytics/models/sam/model.py +6 -6
- ultralytics/models/sam/modules/decoders.py +6 -4
- ultralytics/models/sam/modules/encoders.py +37 -35
- ultralytics/models/sam/modules/sam.py +5 -4
- ultralytics/models/sam/modules/tiny_encoder.py +95 -73
- ultralytics/models/sam/modules/transformer.py +3 -2
- ultralytics/models/sam/predict.py +39 -27
- ultralytics/models/utils/loss.py +99 -95
- ultralytics/models/utils/ops.py +34 -31
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +8 -6
- ultralytics/models/yolo/classify/train.py +37 -31
- ultralytics/models/yolo/classify/val.py +26 -24
- ultralytics/models/yolo/detect/__init__.py +1 -1
- ultralytics/models/yolo/detect/predict.py +8 -6
- ultralytics/models/yolo/detect/train.py +47 -37
- ultralytics/models/yolo/detect/val.py +100 -82
- ultralytics/models/yolo/model.py +31 -25
- ultralytics/models/yolo/obb/__init__.py +1 -1
- ultralytics/models/yolo/obb/predict.py +13 -12
- ultralytics/models/yolo/obb/train.py +3 -3
- ultralytics/models/yolo/obb/val.py +80 -58
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +17 -12
- ultralytics/models/yolo/pose/train.py +28 -25
- ultralytics/models/yolo/pose/val.py +91 -64
- ultralytics/models/yolo/segment/__init__.py +1 -1
- ultralytics/models/yolo/segment/predict.py +10 -8
- ultralytics/models/yolo/segment/train.py +16 -15
- ultralytics/models/yolo/segment/val.py +90 -68
- ultralytics/nn/__init__.py +26 -6
- ultralytics/nn/autobackend.py +144 -112
- ultralytics/nn/modules/__init__.py +96 -13
- ultralytics/nn/modules/block.py +28 -7
- ultralytics/nn/modules/conv.py +41 -23
- ultralytics/nn/modules/head.py +67 -59
- ultralytics/nn/modules/transformer.py +49 -32
- ultralytics/nn/modules/utils.py +20 -15
- ultralytics/nn/tasks.py +215 -141
- ultralytics/solutions/ai_gym.py +59 -47
- ultralytics/solutions/distance_calculation.py +22 -15
- ultralytics/solutions/heatmap.py +76 -54
- ultralytics/solutions/object_counter.py +46 -39
- ultralytics/solutions/speed_estimation.py +13 -16
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +1 -0
- ultralytics/trackers/bot_sort.py +2 -1
- ultralytics/trackers/byte_tracker.py +10 -7
- ultralytics/trackers/track.py +7 -7
- ultralytics/trackers/utils/gmc.py +25 -25
- ultralytics/trackers/utils/kalman_filter.py +85 -42
- ultralytics/trackers/utils/matching.py +8 -7
- ultralytics/utils/__init__.py +173 -151
- ultralytics/utils/autobatch.py +10 -10
- ultralytics/utils/benchmarks.py +76 -86
- ultralytics/utils/callbacks/__init__.py +1 -1
- ultralytics/utils/callbacks/base.py +29 -29
- ultralytics/utils/callbacks/clearml.py +51 -43
- ultralytics/utils/callbacks/comet.py +81 -66
- ultralytics/utils/callbacks/dvc.py +33 -26
- ultralytics/utils/callbacks/hub.py +44 -26
- ultralytics/utils/callbacks/mlflow.py +31 -24
- ultralytics/utils/callbacks/neptune.py +35 -25
- ultralytics/utils/callbacks/raytune.py +9 -4
- ultralytics/utils/callbacks/tensorboard.py +16 -11
- ultralytics/utils/callbacks/wb.py +39 -33
- ultralytics/utils/checks.py +189 -141
- ultralytics/utils/dist.py +15 -12
- ultralytics/utils/downloads.py +112 -96
- ultralytics/utils/errors.py +1 -1
- ultralytics/utils/files.py +11 -11
- ultralytics/utils/instance.py +22 -22
- ultralytics/utils/loss.py +117 -67
- ultralytics/utils/metrics.py +224 -158
- ultralytics/utils/ops.py +39 -29
- ultralytics/utils/patches.py +3 -3
- ultralytics/utils/plotting.py +217 -120
- ultralytics/utils/tal.py +19 -13
- ultralytics/utils/torch_utils.py +138 -109
- ultralytics/utils/triton.py +12 -10
- ultralytics/utils/tuner.py +49 -47
- {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/METADATA +5 -4
- ultralytics-8.0.239.dist-info/RECORD +188 -0
- ultralytics-8.0.237.dist-info/RECORD +0 -187
- {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/LICENSE +0 -0
- {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/WHEEL +0 -0
- {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/top_level.txt +0 -0
ultralytics/utils/tal.py
CHANGED
|
@@ -7,7 +7,7 @@ from .checks import check_version
|
|
|
7
7
|
from .metrics import bbox_iou, probiou
|
|
8
8
|
from .ops import xywhr2xyxyxyxy
|
|
9
9
|
|
|
10
|
-
TORCH_1_10 = check_version(torch.__version__,
|
|
10
|
+
TORCH_1_10 = check_version(torch.__version__, "1.10.0")
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class TaskAlignedAssigner(nn.Module):
|
|
@@ -61,12 +61,17 @@ class TaskAlignedAssigner(nn.Module):
|
|
|
61
61
|
|
|
62
62
|
if self.n_max_boxes == 0:
|
|
63
63
|
device = gt_bboxes.device
|
|
64
|
-
return (
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
64
|
+
return (
|
|
65
|
+
torch.full_like(pd_scores[..., 0], self.bg_idx).to(device),
|
|
66
|
+
torch.zeros_like(pd_bboxes).to(device),
|
|
67
|
+
torch.zeros_like(pd_scores).to(device),
|
|
68
|
+
torch.zeros_like(pd_scores[..., 0]).to(device),
|
|
69
|
+
torch.zeros_like(pd_scores[..., 0]).to(device),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
mask_pos, align_metric, overlaps = self.get_pos_mask(
|
|
73
|
+
pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt
|
|
74
|
+
)
|
|
70
75
|
|
|
71
76
|
target_gt_idx, fg_mask, mask_pos = self.select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes)
|
|
72
77
|
|
|
@@ -148,7 +153,7 @@ class TaskAlignedAssigner(nn.Module):
|
|
|
148
153
|
ones = torch.ones_like(topk_idxs[:, :, :1], dtype=torch.int8, device=topk_idxs.device)
|
|
149
154
|
for k in range(self.topk):
|
|
150
155
|
# Expand topk_idxs for each value of k and add 1 at the specified positions
|
|
151
|
-
count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones)
|
|
156
|
+
count_tensor.scatter_add_(-1, topk_idxs[:, :, k : k + 1], ones)
|
|
152
157
|
# count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device))
|
|
153
158
|
# Filter invalid bboxes
|
|
154
159
|
count_tensor.masked_fill_(count_tensor > 1, 0)
|
|
@@ -192,9 +197,11 @@ class TaskAlignedAssigner(nn.Module):
|
|
|
192
197
|
target_labels.clamp_(0)
|
|
193
198
|
|
|
194
199
|
# 10x faster than F.one_hot()
|
|
195
|
-
target_scores = torch.zeros(
|
|
196
|
-
|
|
197
|
-
|
|
200
|
+
target_scores = torch.zeros(
|
|
201
|
+
(target_labels.shape[0], target_labels.shape[1], self.num_classes),
|
|
202
|
+
dtype=torch.int64,
|
|
203
|
+
device=target_labels.device,
|
|
204
|
+
) # (b, h*w, 80)
|
|
198
205
|
target_scores.scatter_(2, target_labels.unsqueeze(-1), 1)
|
|
199
206
|
|
|
200
207
|
fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80)
|
|
@@ -252,7 +259,6 @@ class TaskAlignedAssigner(nn.Module):
|
|
|
252
259
|
|
|
253
260
|
|
|
254
261
|
class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
|
|
255
|
-
|
|
256
262
|
def iou_calculation(self, gt_bboxes, pd_bboxes):
|
|
257
263
|
"""Iou calculation for rotated bounding boxes."""
|
|
258
264
|
return probiou(gt_bboxes, pd_bboxes).squeeze(-1).clamp_(0)
|
|
@@ -295,7 +301,7 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
|
|
|
295
301
|
_, _, h, w = feats[i].shape
|
|
296
302
|
sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x
|
|
297
303
|
sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y
|
|
298
|
-
sy, sx = torch.meshgrid(sy, sx, indexing=
|
|
304
|
+
sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
|
|
299
305
|
anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
|
|
300
306
|
stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
|
|
301
307
|
return torch.cat(anchor_points), torch.cat(stride_tensor)
|
ultralytics/utils/torch_utils.py
CHANGED
|
@@ -25,11 +25,11 @@ try:
|
|
|
25
25
|
except ImportError:
|
|
26
26
|
thop = None
|
|
27
27
|
|
|
28
|
-
TORCH_1_9 = check_version(torch.__version__,
|
|
29
|
-
TORCH_2_0 = check_version(torch.__version__,
|
|
30
|
-
TORCHVISION_0_10 = check_version(torchvision.__version__,
|
|
31
|
-
TORCHVISION_0_11 = check_version(torchvision.__version__,
|
|
32
|
-
TORCHVISION_0_13 = check_version(torchvision.__version__,
|
|
28
|
+
TORCH_1_9 = check_version(torch.__version__, "1.9.0")
|
|
29
|
+
TORCH_2_0 = check_version(torch.__version__, "2.0.0")
|
|
30
|
+
TORCHVISION_0_10 = check_version(torchvision.__version__, "0.10.0")
|
|
31
|
+
TORCHVISION_0_11 = check_version(torchvision.__version__, "0.11.0")
|
|
32
|
+
TORCHVISION_0_13 = check_version(torchvision.__version__, "0.13.0")
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
@contextmanager
|
|
@@ -60,13 +60,13 @@ def get_cpu_info():
|
|
|
60
60
|
"""Return a string with system CPU information, i.e. 'Apple M2'."""
|
|
61
61
|
import cpuinfo # pip install py-cpuinfo
|
|
62
62
|
|
|
63
|
-
k =
|
|
63
|
+
k = "brand_raw", "hardware_raw", "arch_string_raw" # info keys sorted by preference (not all keys always available)
|
|
64
64
|
info = cpuinfo.get_cpu_info() # info dict
|
|
65
|
-
string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2],
|
|
66
|
-
return string.replace(
|
|
65
|
+
string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown")
|
|
66
|
+
return string.replace("(R)", "").replace("CPU ", "").replace("@ ", "")
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
def select_device(device=
|
|
69
|
+
def select_device(device="", batch=0, newline=False, verbose=True):
|
|
70
70
|
"""
|
|
71
71
|
Selects the appropriate PyTorch device based on the provided arguments.
|
|
72
72
|
|
|
@@ -103,49 +103,57 @@ def select_device(device='', batch=0, newline=False, verbose=True):
|
|
|
103
103
|
if isinstance(device, torch.device):
|
|
104
104
|
return device
|
|
105
105
|
|
|
106
|
-
s = f
|
|
106
|
+
s = f"Ultralytics YOLOv{__version__} 🚀 Python-{platform.python_version()} torch-{torch.__version__} "
|
|
107
107
|
device = str(device).lower()
|
|
108
|
-
for remove in
|
|
109
|
-
device = device.replace(remove,
|
|
110
|
-
cpu = device ==
|
|
111
|
-
mps = device in (
|
|
108
|
+
for remove in "cuda:", "none", "(", ")", "[", "]", "'", " ":
|
|
109
|
+
device = device.replace(remove, "") # to string, 'cuda:0' -> '0' and '(0, 1)' -> '0,1'
|
|
110
|
+
cpu = device == "cpu"
|
|
111
|
+
mps = device in ("mps", "mps:0") # Apple Metal Performance Shaders (MPS)
|
|
112
112
|
if cpu or mps:
|
|
113
|
-
os.environ[
|
|
113
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # force torch.cuda.is_available() = False
|
|
114
114
|
elif device: # non-cpu device requested
|
|
115
|
-
if device ==
|
|
116
|
-
device =
|
|
117
|
-
visible = os.environ.get(
|
|
118
|
-
os.environ[
|
|
119
|
-
if not (torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(
|
|
115
|
+
if device == "cuda":
|
|
116
|
+
device = "0"
|
|
117
|
+
visible = os.environ.get("CUDA_VISIBLE_DEVICES", None)
|
|
118
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = device # set environment variable - must be before assert is_available()
|
|
119
|
+
if not (torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(",", ""))):
|
|
120
120
|
LOGGER.info(s)
|
|
121
|
-
install =
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
121
|
+
install = (
|
|
122
|
+
"See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no "
|
|
123
|
+
"CUDA devices are seen by torch.\n"
|
|
124
|
+
if torch.cuda.device_count() == 0
|
|
125
|
+
else ""
|
|
126
|
+
)
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Invalid CUDA 'device={device}' requested."
|
|
129
|
+
f" Use 'device=cpu' or pass valid CUDA device(s) if available,"
|
|
130
|
+
f" i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.\n"
|
|
131
|
+
f"\ntorch.cuda.is_available(): {torch.cuda.is_available()}"
|
|
132
|
+
f"\ntorch.cuda.device_count(): {torch.cuda.device_count()}"
|
|
133
|
+
f"\nos.environ['CUDA_VISIBLE_DEVICES']: {visible}\n"
|
|
134
|
+
f"{install}"
|
|
135
|
+
)
|
|
130
136
|
|
|
131
137
|
if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available
|
|
132
|
-
devices = device.split(
|
|
138
|
+
devices = device.split(",") if device else "0" # range(torch.cuda.device_count()) # i.e. 0,1,6,7
|
|
133
139
|
n = len(devices) # device count
|
|
134
140
|
if n > 1 and batch > 0 and batch % n != 0: # check batch_size is divisible by device_count
|
|
135
|
-
raise ValueError(
|
|
136
|
-
|
|
137
|
-
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"'batch={batch}' must be a multiple of GPU count {n}. Try 'batch={batch // n * n}' or "
|
|
143
|
+
f"'batch={batch // n * n + n}', the nearest batch sizes evenly divisible by {n}."
|
|
144
|
+
)
|
|
145
|
+
space = " " * (len(s) + 1)
|
|
138
146
|
for i, d in enumerate(devices):
|
|
139
147
|
p = torch.cuda.get_device_properties(i)
|
|
140
148
|
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
|
|
141
|
-
arg =
|
|
149
|
+
arg = "cuda:0"
|
|
142
150
|
elif mps and TORCH_2_0 and torch.backends.mps.is_available():
|
|
143
151
|
# Prefer MPS if available
|
|
144
|
-
s += f
|
|
145
|
-
arg =
|
|
152
|
+
s += f"MPS ({get_cpu_info()})\n"
|
|
153
|
+
arg = "mps"
|
|
146
154
|
else: # revert to CPU
|
|
147
|
-
s += f
|
|
148
|
-
arg =
|
|
155
|
+
s += f"CPU ({get_cpu_info()})\n"
|
|
156
|
+
arg = "cpu"
|
|
149
157
|
|
|
150
158
|
if verbose:
|
|
151
159
|
LOGGER.info(s if newline else s.rstrip())
|
|
@@ -161,14 +169,20 @@ def time_sync():
|
|
|
161
169
|
|
|
162
170
|
def fuse_conv_and_bn(conv, bn):
|
|
163
171
|
"""Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/."""
|
|
164
|
-
fusedconv =
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
+
fusedconv = (
|
|
173
|
+
nn.Conv2d(
|
|
174
|
+
conv.in_channels,
|
|
175
|
+
conv.out_channels,
|
|
176
|
+
kernel_size=conv.kernel_size,
|
|
177
|
+
stride=conv.stride,
|
|
178
|
+
padding=conv.padding,
|
|
179
|
+
dilation=conv.dilation,
|
|
180
|
+
groups=conv.groups,
|
|
181
|
+
bias=True,
|
|
182
|
+
)
|
|
183
|
+
.requires_grad_(False)
|
|
184
|
+
.to(conv.weight.device)
|
|
185
|
+
)
|
|
172
186
|
|
|
173
187
|
# Prepare filters
|
|
174
188
|
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
|
@@ -185,15 +199,21 @@ def fuse_conv_and_bn(conv, bn):
|
|
|
185
199
|
|
|
186
200
|
def fuse_deconv_and_bn(deconv, bn):
|
|
187
201
|
"""Fuse ConvTranspose2d() and BatchNorm2d() layers."""
|
|
188
|
-
fuseddconv =
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
202
|
+
fuseddconv = (
|
|
203
|
+
nn.ConvTranspose2d(
|
|
204
|
+
deconv.in_channels,
|
|
205
|
+
deconv.out_channels,
|
|
206
|
+
kernel_size=deconv.kernel_size,
|
|
207
|
+
stride=deconv.stride,
|
|
208
|
+
padding=deconv.padding,
|
|
209
|
+
output_padding=deconv.output_padding,
|
|
210
|
+
dilation=deconv.dilation,
|
|
211
|
+
groups=deconv.groups,
|
|
212
|
+
bias=True,
|
|
213
|
+
)
|
|
214
|
+
.requires_grad_(False)
|
|
215
|
+
.to(deconv.weight.device)
|
|
216
|
+
)
|
|
197
217
|
|
|
198
218
|
# Prepare filters
|
|
199
219
|
w_deconv = deconv.weight.clone().view(deconv.out_channels, -1)
|
|
@@ -221,18 +241,21 @@ def model_info(model, detailed=False, verbose=True, imgsz=640):
|
|
|
221
241
|
n_l = len(list(model.modules())) # number of layers
|
|
222
242
|
if detailed:
|
|
223
243
|
LOGGER.info(
|
|
224
|
-
f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}"
|
|
244
|
+
f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}"
|
|
245
|
+
)
|
|
225
246
|
for i, (name, p) in enumerate(model.named_parameters()):
|
|
226
|
-
name = name.replace(
|
|
227
|
-
LOGGER.info(
|
|
228
|
-
|
|
247
|
+
name = name.replace("module_list.", "")
|
|
248
|
+
LOGGER.info(
|
|
249
|
+
"%5g %40s %9s %12g %20s %10.3g %10.3g %10s"
|
|
250
|
+
% (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std(), p.dtype)
|
|
251
|
+
)
|
|
229
252
|
|
|
230
253
|
flops = get_flops(model, imgsz)
|
|
231
|
-
fused =
|
|
232
|
-
fs = f
|
|
233
|
-
yaml_file = getattr(model,
|
|
234
|
-
model_name = Path(yaml_file).stem.replace(
|
|
235
|
-
LOGGER.info(f
|
|
254
|
+
fused = " (fused)" if getattr(model, "is_fused", lambda: False)() else ""
|
|
255
|
+
fs = f", {flops:.1f} GFLOPs" if flops else ""
|
|
256
|
+
yaml_file = getattr(model, "yaml_file", "") or getattr(model, "yaml", {}).get("yaml_file", "")
|
|
257
|
+
model_name = Path(yaml_file).stem.replace("yolo", "YOLO") or "Model"
|
|
258
|
+
LOGGER.info(f"{model_name} summary{fused}: {n_l} layers, {n_p} parameters, {n_g} gradients{fs}")
|
|
236
259
|
return n_l, n_p, n_g, flops
|
|
237
260
|
|
|
238
261
|
|
|
@@ -262,13 +285,15 @@ def model_info_for_loggers(trainer):
|
|
|
262
285
|
"""
|
|
263
286
|
if trainer.args.profile: # profile ONNX and TensorRT times
|
|
264
287
|
from ultralytics.utils.benchmarks import ProfileModels
|
|
288
|
+
|
|
265
289
|
results = ProfileModels([trainer.last], device=trainer.device).profile()[0]
|
|
266
|
-
results.pop(
|
|
290
|
+
results.pop("model/name")
|
|
267
291
|
else: # only return PyTorch times from most recent validation
|
|
268
292
|
results = {
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
293
|
+
"model/parameters": get_num_params(trainer.model),
|
|
294
|
+
"model/GFLOPs": round(get_flops(trainer.model), 3),
|
|
295
|
+
}
|
|
296
|
+
results["model/speed_PyTorch(ms)"] = round(trainer.validator.speed["inference"], 3)
|
|
272
297
|
return results
|
|
273
298
|
|
|
274
299
|
|
|
@@ -284,14 +309,14 @@ def get_flops(model, imgsz=640):
|
|
|
284
309
|
imgsz = [imgsz, imgsz] # expand if int/float
|
|
285
310
|
try:
|
|
286
311
|
# Use stride size for input tensor
|
|
287
|
-
stride = max(int(model.stride.max()), 32) if hasattr(model,
|
|
312
|
+
stride = max(int(model.stride.max()), 32) if hasattr(model, "stride") else 32 # max stride
|
|
288
313
|
im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
|
|
289
|
-
flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] /
|
|
314
|
+
flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1e9 * 2 # stride GFLOPs
|
|
290
315
|
return flops * imgsz[0] / stride * imgsz[1] / stride # imgsz GFLOPs
|
|
291
316
|
except Exception:
|
|
292
317
|
# Use actual image size for input tensor (i.e. required for RTDETR models)
|
|
293
318
|
im = torch.empty((1, p.shape[1], *imgsz), device=p.device) # input image in BCHW format
|
|
294
|
-
return thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] /
|
|
319
|
+
return thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1e9 * 2 # imgsz GFLOPs
|
|
295
320
|
except Exception:
|
|
296
321
|
return 0.0
|
|
297
322
|
|
|
@@ -301,11 +326,11 @@ def get_flops_with_torch_profiler(model, imgsz=640):
|
|
|
301
326
|
if TORCH_2_0:
|
|
302
327
|
model = de_parallel(model)
|
|
303
328
|
p = next(model.parameters())
|
|
304
|
-
stride = (max(int(model.stride.max()), 32) if hasattr(model,
|
|
329
|
+
stride = (max(int(model.stride.max()), 32) if hasattr(model, "stride") else 32) * 2 # max stride
|
|
305
330
|
im = torch.zeros((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
|
|
306
331
|
with torch.profiler.profile(with_flops=True) as prof:
|
|
307
332
|
model(im)
|
|
308
|
-
flops = sum(x.flops for x in prof.key_averages()) /
|
|
333
|
+
flops = sum(x.flops for x in prof.key_averages()) / 1e9
|
|
309
334
|
imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float
|
|
310
335
|
flops = flops * imgsz[0] / stride * imgsz[1] / stride # 640x640 GFLOPs
|
|
311
336
|
return flops
|
|
@@ -333,7 +358,7 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32):
|
|
|
333
358
|
return img
|
|
334
359
|
h, w = img.shape[2:]
|
|
335
360
|
s = (int(h * ratio), int(w * ratio)) # new size
|
|
336
|
-
img = F.interpolate(img, size=s, mode=
|
|
361
|
+
img = F.interpolate(img, size=s, mode="bilinear", align_corners=False) # resize
|
|
337
362
|
if not same_shape: # pad/crop img
|
|
338
363
|
h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
|
|
339
364
|
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
|
|
@@ -349,7 +374,7 @@ def make_divisible(x, divisor):
|
|
|
349
374
|
def copy_attr(a, b, include=(), exclude=()):
|
|
350
375
|
"""Copies attributes from object 'b' to object 'a', with options to include/exclude certain attributes."""
|
|
351
376
|
for k, v in b.__dict__.items():
|
|
352
|
-
if (len(include) and k not in include) or k.startswith(
|
|
377
|
+
if (len(include) and k not in include) or k.startswith("_") or k in exclude:
|
|
353
378
|
continue
|
|
354
379
|
else:
|
|
355
380
|
setattr(a, k, v)
|
|
@@ -357,7 +382,7 @@ def copy_attr(a, b, include=(), exclude=()):
|
|
|
357
382
|
|
|
358
383
|
def get_latest_opset():
|
|
359
384
|
"""Return second-most (for maturity) recently supported ONNX opset by this version of torch."""
|
|
360
|
-
return max(int(k[14:]) for k in vars(torch.onnx) if
|
|
385
|
+
return max(int(k[14:]) for k in vars(torch.onnx) if "symbolic_opset" in k) - 1 # opset
|
|
361
386
|
|
|
362
387
|
|
|
363
388
|
def intersect_dicts(da, db, exclude=()):
|
|
@@ -392,10 +417,10 @@ def init_seeds(seed=0, deterministic=False):
|
|
|
392
417
|
if TORCH_2_0:
|
|
393
418
|
torch.use_deterministic_algorithms(True, warn_only=True) # warn if deterministic is not possible
|
|
394
419
|
torch.backends.cudnn.deterministic = True
|
|
395
|
-
os.environ[
|
|
396
|
-
os.environ[
|
|
420
|
+
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
|
421
|
+
os.environ["PYTHONHASHSEED"] = str(seed)
|
|
397
422
|
else:
|
|
398
|
-
LOGGER.warning(
|
|
423
|
+
LOGGER.warning("WARNING ⚠️ Upgrade to torch>=2.0.0 for deterministic training.")
|
|
399
424
|
else:
|
|
400
425
|
torch.use_deterministic_algorithms(False)
|
|
401
426
|
torch.backends.cudnn.deterministic = False
|
|
@@ -430,13 +455,13 @@ class ModelEMA:
|
|
|
430
455
|
v += (1 - d) * msd[k].detach()
|
|
431
456
|
# assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype}, model {msd[k].dtype}'
|
|
432
457
|
|
|
433
|
-
def update_attr(self, model, include=(), exclude=(
|
|
458
|
+
def update_attr(self, model, include=(), exclude=("process_group", "reducer")):
|
|
434
459
|
"""Updates attributes and saves stripped model with optimizer removed."""
|
|
435
460
|
if self.enabled:
|
|
436
461
|
copy_attr(self.ema, model, include, exclude)
|
|
437
462
|
|
|
438
463
|
|
|
439
|
-
def strip_optimizer(f: Union[str, Path] =
|
|
464
|
+
def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "") -> None:
|
|
440
465
|
"""
|
|
441
466
|
Strip optimizer from 'f' to finalize training, optionally save as 's'.
|
|
442
467
|
|
|
@@ -456,26 +481,26 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
|
|
|
456
481
|
strip_optimizer(f)
|
|
457
482
|
```
|
|
458
483
|
"""
|
|
459
|
-
x = torch.load(f, map_location=torch.device(
|
|
460
|
-
if
|
|
461
|
-
LOGGER.info(f
|
|
484
|
+
x = torch.load(f, map_location=torch.device("cpu"))
|
|
485
|
+
if "model" not in x:
|
|
486
|
+
LOGGER.info(f"Skipping {f}, not a valid Ultralytics model.")
|
|
462
487
|
return
|
|
463
488
|
|
|
464
|
-
if hasattr(x[
|
|
465
|
-
x[
|
|
466
|
-
args = {**DEFAULT_CFG_DICT, **x[
|
|
467
|
-
if x.get(
|
|
468
|
-
x[
|
|
469
|
-
for k in
|
|
489
|
+
if hasattr(x["model"], "args"):
|
|
490
|
+
x["model"].args = dict(x["model"].args) # convert from IterableSimpleNamespace to dict
|
|
491
|
+
args = {**DEFAULT_CFG_DICT, **x["train_args"]} if "train_args" in x else None # combine args
|
|
492
|
+
if x.get("ema"):
|
|
493
|
+
x["model"] = x["ema"] # replace model with ema
|
|
494
|
+
for k in "optimizer", "best_fitness", "ema", "updates": # keys
|
|
470
495
|
x[k] = None
|
|
471
|
-
x[
|
|
472
|
-
x[
|
|
473
|
-
for p in x[
|
|
496
|
+
x["epoch"] = -1
|
|
497
|
+
x["model"].half() # to FP16
|
|
498
|
+
for p in x["model"].parameters():
|
|
474
499
|
p.requires_grad = False
|
|
475
|
-
x[
|
|
500
|
+
x["train_args"] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # strip non-default keys
|
|
476
501
|
# x['model'].args = x['train_args']
|
|
477
502
|
torch.save(x, s or f)
|
|
478
|
-
mb = os.path.getsize(s or f) /
|
|
503
|
+
mb = os.path.getsize(s or f) / 1e6 # file size
|
|
479
504
|
LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
|
|
480
505
|
|
|
481
506
|
|
|
@@ -496,18 +521,20 @@ def profile(input, ops, n=10, device=None):
|
|
|
496
521
|
results = []
|
|
497
522
|
if not isinstance(device, torch.device):
|
|
498
523
|
device = select_device(device)
|
|
499
|
-
LOGGER.info(
|
|
500
|
-
|
|
524
|
+
LOGGER.info(
|
|
525
|
+
f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
|
|
526
|
+
f"{'input':>24s}{'output':>24s}"
|
|
527
|
+
)
|
|
501
528
|
|
|
502
529
|
for x in input if isinstance(input, list) else [input]:
|
|
503
530
|
x = x.to(device)
|
|
504
531
|
x.requires_grad = True
|
|
505
532
|
for m in ops if isinstance(ops, list) else [ops]:
|
|
506
|
-
m = m.to(device) if hasattr(m,
|
|
507
|
-
m = m.half() if hasattr(m,
|
|
533
|
+
m = m.to(device) if hasattr(m, "to") else m # device
|
|
534
|
+
m = m.half() if hasattr(m, "half") and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
|
|
508
535
|
tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward
|
|
509
536
|
try:
|
|
510
|
-
flops = thop.profile(m, inputs=[x], verbose=False)[0] /
|
|
537
|
+
flops = thop.profile(m, inputs=[x], verbose=False)[0] / 1e9 * 2 if thop else 0 # GFLOPs
|
|
511
538
|
except Exception:
|
|
512
539
|
flops = 0
|
|
513
540
|
|
|
@@ -521,13 +548,13 @@ def profile(input, ops, n=10, device=None):
|
|
|
521
548
|
t[2] = time_sync()
|
|
522
549
|
except Exception: # no backward method
|
|
523
550
|
# print(e) # for debug
|
|
524
|
-
t[2] = float(
|
|
551
|
+
t[2] = float("nan")
|
|
525
552
|
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
|
|
526
553
|
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
|
|
527
|
-
mem = torch.cuda.memory_reserved() /
|
|
528
|
-
s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else
|
|
554
|
+
mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # (GB)
|
|
555
|
+
s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else "list" for x in (x, y)) # shapes
|
|
529
556
|
p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
|
|
530
|
-
LOGGER.info(f
|
|
557
|
+
LOGGER.info(f"{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}")
|
|
531
558
|
results.append([p, flops, mem, tf, tb, s_in, s_out])
|
|
532
559
|
except Exception as e:
|
|
533
560
|
LOGGER.info(e)
|
|
@@ -548,7 +575,7 @@ class EarlyStopping:
|
|
|
548
575
|
"""
|
|
549
576
|
self.best_fitness = 0.0 # i.e. mAP
|
|
550
577
|
self.best_epoch = 0
|
|
551
|
-
self.patience = patience or float(
|
|
578
|
+
self.patience = patience or float("inf") # epochs to wait after fitness stops improving to stop
|
|
552
579
|
self.possible_stop = False # possible stop may occur next epoch
|
|
553
580
|
|
|
554
581
|
def __call__(self, epoch, fitness):
|
|
@@ -572,8 +599,10 @@ class EarlyStopping:
|
|
|
572
599
|
self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch
|
|
573
600
|
stop = delta >= self.patience # stop training if patience exceeded
|
|
574
601
|
if stop:
|
|
575
|
-
LOGGER.info(
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
602
|
+
LOGGER.info(
|
|
603
|
+
f"Stopping training early as no improvement observed in last {self.patience} epochs. "
|
|
604
|
+
f"Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n"
|
|
605
|
+
f"To update EarlyStopping(patience={self.patience}) pass a new patience value, "
|
|
606
|
+
f"i.e. `patience=300` or use `patience=0` to disable EarlyStopping."
|
|
607
|
+
)
|
|
579
608
|
return stop
|
ultralytics/utils/triton.py
CHANGED
|
@@ -22,7 +22,7 @@ class TritonRemoteModel:
|
|
|
22
22
|
output_names (List[str]): The names of the model outputs.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
def __init__(self, url: str, endpoint: str =
|
|
25
|
+
def __init__(self, url: str, endpoint: str = "", scheme: str = ""):
|
|
26
26
|
"""
|
|
27
27
|
Initialize the TritonRemoteModel.
|
|
28
28
|
|
|
@@ -36,7 +36,7 @@ class TritonRemoteModel:
|
|
|
36
36
|
"""
|
|
37
37
|
if not endpoint and not scheme: # Parse all args from URL string
|
|
38
38
|
splits = urlsplit(url)
|
|
39
|
-
endpoint = splits.path.strip(
|
|
39
|
+
endpoint = splits.path.strip("/").split("/")[0]
|
|
40
40
|
scheme = splits.scheme
|
|
41
41
|
url = splits.netloc
|
|
42
42
|
|
|
@@ -44,26 +44,28 @@ class TritonRemoteModel:
|
|
|
44
44
|
self.url = url
|
|
45
45
|
|
|
46
46
|
# Choose the Triton client based on the communication scheme
|
|
47
|
-
if scheme ==
|
|
47
|
+
if scheme == "http":
|
|
48
48
|
import tritonclient.http as client # noqa
|
|
49
|
+
|
|
49
50
|
self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False)
|
|
50
51
|
config = self.triton_client.get_model_config(endpoint)
|
|
51
52
|
else:
|
|
52
53
|
import tritonclient.grpc as client # noqa
|
|
54
|
+
|
|
53
55
|
self.triton_client = client.InferenceServerClient(url=self.url, verbose=False, ssl=False)
|
|
54
|
-
config = self.triton_client.get_model_config(endpoint, as_json=True)[
|
|
56
|
+
config = self.triton_client.get_model_config(endpoint, as_json=True)["config"]
|
|
55
57
|
|
|
56
58
|
# Sort output names alphabetically, i.e. 'output0', 'output1', etc.
|
|
57
|
-
config[
|
|
59
|
+
config["output"] = sorted(config["output"], key=lambda x: x.get("name"))
|
|
58
60
|
|
|
59
61
|
# Define model attributes
|
|
60
|
-
type_map = {
|
|
62
|
+
type_map = {"TYPE_FP32": np.float32, "TYPE_FP16": np.float16, "TYPE_UINT8": np.uint8}
|
|
61
63
|
self.InferRequestedOutput = client.InferRequestedOutput
|
|
62
64
|
self.InferInput = client.InferInput
|
|
63
|
-
self.input_formats = [x[
|
|
65
|
+
self.input_formats = [x["data_type"] for x in config["input"]]
|
|
64
66
|
self.np_input_formats = [type_map[x] for x in self.input_formats]
|
|
65
|
-
self.input_names = [x[
|
|
66
|
-
self.output_names = [x[
|
|
67
|
+
self.input_names = [x["name"] for x in config["input"]]
|
|
68
|
+
self.output_names = [x["name"] for x in config["output"]]
|
|
67
69
|
|
|
68
70
|
def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]:
|
|
69
71
|
"""
|
|
@@ -80,7 +82,7 @@ class TritonRemoteModel:
|
|
|
80
82
|
for i, x in enumerate(inputs):
|
|
81
83
|
if x.dtype != self.np_input_formats[i]:
|
|
82
84
|
x = x.astype(self.np_input_formats[i])
|
|
83
|
-
infer_input = self.InferInput(self.input_names[i], [*x.shape], self.input_formats[i].replace(
|
|
85
|
+
infer_input = self.InferInput(self.input_names[i], [*x.shape], self.input_formats[i].replace("TYPE_", ""))
|
|
84
86
|
infer_input.set_data_from_numpy(x)
|
|
85
87
|
infer_inputs.append(infer_input)
|
|
86
88
|
|