ultralytics 8.0.196__py3-none-any.whl → 8.0.198__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +4 -5
- ultralytics/data/augment.py +2 -2
- ultralytics/data/converter.py +12 -13
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/__init__.py +1 -0
- ultralytics/engine/exporter.py +1 -1
- ultralytics/engine/trainer.py +2 -1
- ultralytics/hub/session.py +1 -1
- ultralytics/models/fastsam/predict.py +33 -2
- ultralytics/models/fastsam/prompt.py +38 -1
- ultralytics/models/fastsam/utils.py +5 -5
- ultralytics/models/fastsam/val.py +27 -1
- ultralytics/models/nas/model.py +20 -0
- ultralytics/models/nas/predict.py +23 -0
- ultralytics/models/nas/val.py +24 -0
- ultralytics/models/rtdetr/val.py +17 -5
- ultralytics/models/sam/modules/decoders.py +26 -1
- ultralytics/models/sam/modules/encoders.py +31 -3
- ultralytics/models/sam/modules/sam.py +22 -7
- ultralytics/models/sam/modules/tiny_encoder.py +147 -45
- ultralytics/models/sam/modules/transformer.py +47 -2
- ultralytics/models/sam/predict.py +19 -2
- ultralytics/models/utils/loss.py +20 -2
- ultralytics/models/utils/ops.py +5 -5
- ultralytics/nn/modules/block.py +33 -10
- ultralytics/nn/modules/conv.py +16 -4
- ultralytics/nn/modules/head.py +48 -17
- ultralytics/nn/modules/transformer.py +2 -2
- ultralytics/nn/tasks.py +7 -7
- ultralytics/utils/__init__.py +2 -1
- ultralytics/utils/benchmarks.py +13 -0
- ultralytics/utils/callbacks/mlflow.py +76 -36
- ultralytics/utils/callbacks/wb.py +92 -1
- ultralytics/utils/checks.py +4 -4
- ultralytics/utils/errors.py +12 -0
- ultralytics/utils/files.py +1 -1
- ultralytics/utils/instance.py +41 -3
- ultralytics/utils/loss.py +22 -19
- ultralytics/utils/metrics.py +106 -24
- ultralytics/utils/tal.py +1 -1
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tuner.py +10 -4
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/METADATA +1 -1
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/RECORD +49 -49
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/LICENSE +0 -0
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/WHEEL +0 -0
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/top_level.txt +0 -0
ultralytics/nn/modules/block.py
CHANGED
|
@@ -37,7 +37,12 @@ class DFL(nn.Module):
|
|
|
37
37
|
class Proto(nn.Module):
|
|
38
38
|
"""YOLOv8 mask Proto module for segmentation models."""
|
|
39
39
|
|
|
40
|
-
def __init__(self, c1, c_=256, c2=32):
|
|
40
|
+
def __init__(self, c1, c_=256, c2=32):
|
|
41
|
+
"""
|
|
42
|
+
Initializes the YOLOv8 mask Proto module with specified number of protos and masks.
|
|
43
|
+
|
|
44
|
+
Input arguments are ch_in, number of protos, number of masks.
|
|
45
|
+
"""
|
|
41
46
|
super().__init__()
|
|
42
47
|
self.cv1 = Conv(c1, c_, k=3)
|
|
43
48
|
self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest')
|
|
@@ -124,7 +129,12 @@ class SPP(nn.Module):
|
|
|
124
129
|
class SPPF(nn.Module):
|
|
125
130
|
"""Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
|
|
126
131
|
|
|
127
|
-
def __init__(self, c1, c2, k=5):
|
|
132
|
+
def __init__(self, c1, c2, k=5):
|
|
133
|
+
"""
|
|
134
|
+
Initializes the SPPF layer with given input/output channels and kernel size.
|
|
135
|
+
|
|
136
|
+
This module is equivalent to SPP(k=(5, 9, 13)).
|
|
137
|
+
"""
|
|
128
138
|
super().__init__()
|
|
129
139
|
c_ = c1 // 2 # hidden channels
|
|
130
140
|
self.cv1 = Conv(c1, c_, 1, 1)
|
|
@@ -142,7 +152,8 @@ class SPPF(nn.Module):
|
|
|
142
152
|
class C1(nn.Module):
|
|
143
153
|
"""CSP Bottleneck with 1 convolution."""
|
|
144
154
|
|
|
145
|
-
def __init__(self, c1, c2, n=1):
|
|
155
|
+
def __init__(self, c1, c2, n=1):
|
|
156
|
+
"""Initializes the CSP Bottleneck with configurations for 1 convolution with arguments ch_in, ch_out, number."""
|
|
146
157
|
super().__init__()
|
|
147
158
|
self.cv1 = Conv(c1, c2, 1, 1)
|
|
148
159
|
self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
|
|
@@ -156,7 +167,10 @@ class C1(nn.Module):
|
|
|
156
167
|
class C2(nn.Module):
|
|
157
168
|
"""CSP Bottleneck with 2 convolutions."""
|
|
158
169
|
|
|
159
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
170
|
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
171
|
+
"""Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut,
|
|
172
|
+
groups, expansion.
|
|
173
|
+
"""
|
|
160
174
|
super().__init__()
|
|
161
175
|
self.c = int(c2 * e) # hidden channels
|
|
162
176
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
@@ -173,7 +187,10 @@ class C2(nn.Module):
|
|
|
173
187
|
class C2f(nn.Module):
|
|
174
188
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
175
189
|
|
|
176
|
-
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
|
|
190
|
+
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
|
|
191
|
+
"""Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
|
|
192
|
+
expansion.
|
|
193
|
+
"""
|
|
177
194
|
super().__init__()
|
|
178
195
|
self.c = int(c2 * e) # hidden channels
|
|
179
196
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
@@ -196,7 +213,8 @@ class C2f(nn.Module):
|
|
|
196
213
|
class C3(nn.Module):
|
|
197
214
|
"""CSP Bottleneck with 3 convolutions."""
|
|
198
215
|
|
|
199
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
216
|
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
217
|
+
"""Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values."""
|
|
200
218
|
super().__init__()
|
|
201
219
|
c_ = int(c2 * e) # hidden channels
|
|
202
220
|
self.cv1 = Conv(c1, c_, 1, 1)
|
|
@@ -259,7 +277,8 @@ class C3Ghost(C3):
|
|
|
259
277
|
class GhostBottleneck(nn.Module):
|
|
260
278
|
"""Ghost Bottleneck https://github.com/huawei-noah/ghostnet."""
|
|
261
279
|
|
|
262
|
-
def __init__(self, c1, c2, k=3, s=1):
|
|
280
|
+
def __init__(self, c1, c2, k=3, s=1):
|
|
281
|
+
"""Initializes GhostBottleneck module with arguments ch_in, ch_out, kernel, stride."""
|
|
263
282
|
super().__init__()
|
|
264
283
|
c_ = c2 // 2
|
|
265
284
|
self.conv = nn.Sequential(
|
|
@@ -277,7 +296,10 @@ class GhostBottleneck(nn.Module):
|
|
|
277
296
|
class Bottleneck(nn.Module):
|
|
278
297
|
"""Standard bottleneck."""
|
|
279
298
|
|
|
280
|
-
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
|
|
299
|
+
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
|
|
300
|
+
"""Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
|
|
301
|
+
expansion.
|
|
302
|
+
"""
|
|
281
303
|
super().__init__()
|
|
282
304
|
c_ = int(c2 * e) # hidden channels
|
|
283
305
|
self.cv1 = Conv(c1, c_, k[0], 1)
|
|
@@ -285,14 +307,15 @@ class Bottleneck(nn.Module):
|
|
|
285
307
|
self.add = shortcut and c1 == c2
|
|
286
308
|
|
|
287
309
|
def forward(self, x):
|
|
288
|
-
"""'forward()' applies the
|
|
310
|
+
"""'forward()' applies the YOLO FPN to input data."""
|
|
289
311
|
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
|
290
312
|
|
|
291
313
|
|
|
292
314
|
class BottleneckCSP(nn.Module):
|
|
293
315
|
"""CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
|
|
294
316
|
|
|
295
|
-
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
317
|
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
318
|
+
"""Initializes the CSP Bottleneck given arguments for ch_in, ch_out, number, shortcut, groups, expansion."""
|
|
296
319
|
super().__init__()
|
|
297
320
|
c_ = int(c2 * e) # hidden channels
|
|
298
321
|
self.cv1 = Conv(c1, c_, 1, 1)
|
ultralytics/nn/modules/conv.py
CHANGED
|
@@ -88,6 +88,7 @@ class DWConv(Conv):
|
|
|
88
88
|
"""Depth-wise convolution."""
|
|
89
89
|
|
|
90
90
|
def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
|
|
91
|
+
"""Initialize Depth-wise convolution with given parameters."""
|
|
91
92
|
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
|
|
92
93
|
|
|
93
94
|
|
|
@@ -95,6 +96,7 @@ class DWConvTranspose2d(nn.ConvTranspose2d):
|
|
|
95
96
|
"""Depth-wise transpose convolution."""
|
|
96
97
|
|
|
97
98
|
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
|
|
99
|
+
"""Initialize DWConvTranspose2d class with given parameters."""
|
|
98
100
|
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
|
|
99
101
|
|
|
100
102
|
|
|
@@ -121,12 +123,18 @@ class ConvTranspose(nn.Module):
|
|
|
121
123
|
class Focus(nn.Module):
|
|
122
124
|
"""Focus wh information into c-space."""
|
|
123
125
|
|
|
124
|
-
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
|
|
126
|
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
|
|
127
|
+
"""Initializes Focus object with user defined channel, convolution, padding, group and activation values."""
|
|
125
128
|
super().__init__()
|
|
126
129
|
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
|
|
127
130
|
# self.contract = Contract(gain=2)
|
|
128
131
|
|
|
129
|
-
def forward(self, x):
|
|
132
|
+
def forward(self, x):
|
|
133
|
+
"""
|
|
134
|
+
Applies convolution to concatenated tensor and returns the output.
|
|
135
|
+
|
|
136
|
+
Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).
|
|
137
|
+
"""
|
|
130
138
|
return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
|
|
131
139
|
# return self.conv(self.contract(x))
|
|
132
140
|
|
|
@@ -134,7 +142,10 @@ class Focus(nn.Module):
|
|
|
134
142
|
class GhostConv(nn.Module):
|
|
135
143
|
"""Ghost Convolution https://github.com/huawei-noah/ghostnet."""
|
|
136
144
|
|
|
137
|
-
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
|
|
145
|
+
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
|
|
146
|
+
"""Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
|
|
147
|
+
activation.
|
|
148
|
+
"""
|
|
138
149
|
super().__init__()
|
|
139
150
|
c_ = c2 // 2 # hidden channels
|
|
140
151
|
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
|
|
@@ -280,7 +291,8 @@ class SpatialAttention(nn.Module):
|
|
|
280
291
|
class CBAM(nn.Module):
|
|
281
292
|
"""Convolutional Block Attention Module."""
|
|
282
293
|
|
|
283
|
-
def __init__(self, c1, kernel_size=7):
|
|
294
|
+
def __init__(self, c1, kernel_size=7):
|
|
295
|
+
"""Initialize CBAM with given input channel (c1) and kernel size."""
|
|
284
296
|
super().__init__()
|
|
285
297
|
self.channel_attention = ChannelAttention(c1)
|
|
286
298
|
self.spatial_attention = SpatialAttention(kernel_size)
|
ultralytics/nn/modules/head.py
CHANGED
|
@@ -25,7 +25,8 @@ class Detect(nn.Module):
|
|
|
25
25
|
anchors = torch.empty(0) # init
|
|
26
26
|
strides = torch.empty(0) # init
|
|
27
27
|
|
|
28
|
-
def __init__(self, nc=80, ch=()):
|
|
28
|
+
def __init__(self, nc=80, ch=()):
|
|
29
|
+
"""Initializes the YOLOv8 detection layer with specified number of classes and channels."""
|
|
29
30
|
super().__init__()
|
|
30
31
|
self.nc = nc # number of classes
|
|
31
32
|
self.nl = len(ch) # number of detection layers
|
|
@@ -149,7 +150,10 @@ class Pose(Detect):
|
|
|
149
150
|
class Classify(nn.Module):
|
|
150
151
|
"""YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
|
|
151
152
|
|
|
152
|
-
def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
|
|
153
|
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
|
|
154
|
+
"""Initializes YOLOv8 classification head with specified input and output channels, kernel size, stride,
|
|
155
|
+
padding, and groups.
|
|
156
|
+
"""
|
|
153
157
|
super().__init__()
|
|
154
158
|
c_ = 1280 # efficientnet_b0 size
|
|
155
159
|
self.conv = Conv(c1, c_, k, s, p, g)
|
|
@@ -166,6 +170,13 @@ class Classify(nn.Module):
|
|
|
166
170
|
|
|
167
171
|
|
|
168
172
|
class RTDETRDecoder(nn.Module):
|
|
173
|
+
"""
|
|
174
|
+
Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
|
|
175
|
+
|
|
176
|
+
This decoder module utilizes Transformer architecture along with deformable convolutions to predict bounding boxes
|
|
177
|
+
and class labels for objects in an image. It integrates features from multiple layers and runs through a series of
|
|
178
|
+
Transformer decoder layers to output the final predictions.
|
|
179
|
+
"""
|
|
169
180
|
export = False # export mode
|
|
170
181
|
|
|
171
182
|
def __init__(
|
|
@@ -181,11 +192,31 @@ class RTDETRDecoder(nn.Module):
|
|
|
181
192
|
dropout=0.,
|
|
182
193
|
act=nn.ReLU(),
|
|
183
194
|
eval_idx=-1,
|
|
184
|
-
#
|
|
195
|
+
# Training args
|
|
185
196
|
nd=100, # num denoising
|
|
186
197
|
label_noise_ratio=0.5,
|
|
187
198
|
box_noise_scale=1.0,
|
|
188
199
|
learnt_init_query=False):
|
|
200
|
+
"""
|
|
201
|
+
Initializes the RTDETRDecoder module with the given parameters.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
nc (int): Number of classes. Default is 80.
|
|
205
|
+
ch (tuple): Channels in the backbone feature maps. Default is (512, 1024, 2048).
|
|
206
|
+
hd (int): Dimension of hidden layers. Default is 256.
|
|
207
|
+
nq (int): Number of query points. Default is 300.
|
|
208
|
+
ndp (int): Number of decoder points. Default is 4.
|
|
209
|
+
nh (int): Number of heads in multi-head attention. Default is 8.
|
|
210
|
+
ndl (int): Number of decoder layers. Default is 6.
|
|
211
|
+
d_ffn (int): Dimension of the feed-forward networks. Default is 1024.
|
|
212
|
+
dropout (float): Dropout rate. Default is 0.
|
|
213
|
+
act (nn.Module): Activation function. Default is nn.ReLU.
|
|
214
|
+
eval_idx (int): Evaluation index. Default is -1.
|
|
215
|
+
nd (int): Number of denoising. Default is 100.
|
|
216
|
+
label_noise_ratio (float): Label noise ratio. Default is 0.5.
|
|
217
|
+
box_noise_scale (float): Box noise scale. Default is 1.0.
|
|
218
|
+
learnt_init_query (bool): Whether to learn initial query embeddings. Default is False.
|
|
219
|
+
"""
|
|
189
220
|
super().__init__()
|
|
190
221
|
self.hidden_dim = hd
|
|
191
222
|
self.nhead = nh
|
|
@@ -194,7 +225,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
194
225
|
self.num_queries = nq
|
|
195
226
|
self.num_decoder_layers = ndl
|
|
196
227
|
|
|
197
|
-
#
|
|
228
|
+
# Backbone feature projection
|
|
198
229
|
self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch)
|
|
199
230
|
# NOTE: simplified version but it's not consistent with .pt weights.
|
|
200
231
|
# self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch)
|
|
@@ -203,24 +234,24 @@ class RTDETRDecoder(nn.Module):
|
|
|
203
234
|
decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp)
|
|
204
235
|
self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx)
|
|
205
236
|
|
|
206
|
-
#
|
|
237
|
+
# Denoising part
|
|
207
238
|
self.denoising_class_embed = nn.Embedding(nc, hd)
|
|
208
239
|
self.num_denoising = nd
|
|
209
240
|
self.label_noise_ratio = label_noise_ratio
|
|
210
241
|
self.box_noise_scale = box_noise_scale
|
|
211
242
|
|
|
212
|
-
#
|
|
243
|
+
# Decoder embedding
|
|
213
244
|
self.learnt_init_query = learnt_init_query
|
|
214
245
|
if learnt_init_query:
|
|
215
246
|
self.tgt_embed = nn.Embedding(nq, hd)
|
|
216
247
|
self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2)
|
|
217
248
|
|
|
218
|
-
#
|
|
249
|
+
# Encoder head
|
|
219
250
|
self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd))
|
|
220
251
|
self.enc_score_head = nn.Linear(hd, nc)
|
|
221
252
|
self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3)
|
|
222
253
|
|
|
223
|
-
#
|
|
254
|
+
# Decoder head
|
|
224
255
|
self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)])
|
|
225
256
|
self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)])
|
|
226
257
|
|
|
@@ -230,10 +261,10 @@ class RTDETRDecoder(nn.Module):
|
|
|
230
261
|
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
|
|
231
262
|
from ultralytics.models.utils.ops import get_cdn_group
|
|
232
263
|
|
|
233
|
-
#
|
|
264
|
+
# Input projection and embedding
|
|
234
265
|
feats, shapes = self._get_encoder_input(x)
|
|
235
266
|
|
|
236
|
-
#
|
|
267
|
+
# Prepare denoising training
|
|
237
268
|
dn_embed, dn_bbox, attn_mask, dn_meta = \
|
|
238
269
|
get_cdn_group(batch,
|
|
239
270
|
self.nc,
|
|
@@ -247,7 +278,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
247
278
|
embed, refer_bbox, enc_bboxes, enc_scores = \
|
|
248
279
|
self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
|
|
249
280
|
|
|
250
|
-
#
|
|
281
|
+
# Decoder
|
|
251
282
|
dec_bboxes, dec_scores = self.decoder(embed,
|
|
252
283
|
refer_bbox,
|
|
253
284
|
feats,
|
|
@@ -285,9 +316,9 @@ class RTDETRDecoder(nn.Module):
|
|
|
285
316
|
|
|
286
317
|
def _get_encoder_input(self, x):
|
|
287
318
|
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
|
|
288
|
-
#
|
|
319
|
+
# Get projection features
|
|
289
320
|
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
|
|
290
|
-
#
|
|
321
|
+
# Get encoder inputs
|
|
291
322
|
feats = []
|
|
292
323
|
shapes = []
|
|
293
324
|
for feat in x:
|
|
@@ -304,13 +335,13 @@ class RTDETRDecoder(nn.Module):
|
|
|
304
335
|
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
|
|
305
336
|
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
|
|
306
337
|
bs = len(feats)
|
|
307
|
-
#
|
|
338
|
+
# Prepare input for decoder
|
|
308
339
|
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
|
|
309
340
|
features = self.enc_output(valid_mask * feats) # bs, h*w, 256
|
|
310
341
|
|
|
311
342
|
enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
|
|
312
343
|
|
|
313
|
-
#
|
|
344
|
+
# Query selection
|
|
314
345
|
# (bs, num_queries)
|
|
315
346
|
topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
|
|
316
347
|
# (bs, num_queries)
|
|
@@ -321,7 +352,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
321
352
|
# (bs, num_queries, 4)
|
|
322
353
|
top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
|
|
323
354
|
|
|
324
|
-
#
|
|
355
|
+
# Dynamic anchors + static content
|
|
325
356
|
refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
|
|
326
357
|
|
|
327
358
|
enc_bboxes = refer_bbox.sigmoid()
|
|
@@ -342,7 +373,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
342
373
|
# TODO
|
|
343
374
|
def _reset_parameters(self):
|
|
344
375
|
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
|
|
345
|
-
#
|
|
376
|
+
# Class and bbox head init
|
|
346
377
|
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
|
|
347
378
|
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
|
|
348
379
|
# linear_init_(self.enc_score_head)
|
|
@@ -81,7 +81,7 @@ class AIFI(TransformerEncoderLayer):
|
|
|
81
81
|
"""Forward pass for the AIFI transformer layer."""
|
|
82
82
|
c, h, w = x.shape[1:]
|
|
83
83
|
pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
|
|
84
|
-
#
|
|
84
|
+
# Flatten [B, C, H, W] to [B, HxW, C]
|
|
85
85
|
x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype))
|
|
86
86
|
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
|
|
87
87
|
|
|
@@ -213,7 +213,7 @@ class MSDeformAttn(nn.Module):
|
|
|
213
213
|
if d_model % n_heads != 0:
|
|
214
214
|
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
|
|
215
215
|
_d_per_head = d_model // n_heads
|
|
216
|
-
#
|
|
216
|
+
# Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation
|
|
217
217
|
assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`'
|
|
218
218
|
|
|
219
219
|
self.im2col_step = 64
|
ultralytics/nn/tasks.py
CHANGED
|
@@ -277,7 +277,7 @@ class DetectionModel(BaseModel):
|
|
|
277
277
|
return torch.cat((x, y, wh, cls), dim)
|
|
278
278
|
|
|
279
279
|
def _clip_augmented(self, y):
|
|
280
|
-
"""Clip
|
|
280
|
+
"""Clip YOLO augmented inference tails."""
|
|
281
281
|
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
|
282
282
|
g = sum(4 ** x for x in range(nl)) # grid points
|
|
283
283
|
e = 1 # exclude layer count
|
|
@@ -375,9 +375,9 @@ class RTDETRDetectionModel(DetectionModel):
|
|
|
375
375
|
"""
|
|
376
376
|
RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
|
|
377
377
|
|
|
378
|
-
This class is responsible for constructing the RTDETR architecture, defining loss functions, and
|
|
379
|
-
|
|
380
|
-
|
|
378
|
+
This class is responsible for constructing the RTDETR architecture, defining loss functions, and facilitating both
|
|
379
|
+
the training and inference processes. RTDETR is an object detection and tracking model that extends from the
|
|
380
|
+
DetectionModel base class.
|
|
381
381
|
|
|
382
382
|
Attributes:
|
|
383
383
|
cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
|
|
@@ -418,7 +418,7 @@ class RTDETRDetectionModel(DetectionModel):
|
|
|
418
418
|
preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
|
|
419
419
|
|
|
420
420
|
Returns:
|
|
421
|
-
tuple: A tuple containing the total loss and main three losses in a tensor.
|
|
421
|
+
(tuple): A tuple containing the total loss and main three losses in a tensor.
|
|
422
422
|
"""
|
|
423
423
|
if not hasattr(self, 'criterion'):
|
|
424
424
|
self.criterion = self.init_criterion()
|
|
@@ -466,7 +466,7 @@ class RTDETRDetectionModel(DetectionModel):
|
|
|
466
466
|
augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
|
|
467
467
|
|
|
468
468
|
Returns:
|
|
469
|
-
torch.Tensor: Model's output tensor.
|
|
469
|
+
(torch.Tensor): Model's output tensor.
|
|
470
470
|
"""
|
|
471
471
|
y, dt = [], [] # outputs
|
|
472
472
|
for m in self.model[:-1]: # except the head part
|
|
@@ -491,7 +491,7 @@ class Ensemble(nn.ModuleList):
|
|
|
491
491
|
super().__init__()
|
|
492
492
|
|
|
493
493
|
def forward(self, x, augment=False, profile=False, visualize=False):
|
|
494
|
-
"""Function generates the
|
|
494
|
+
"""Function generates the YOLO network's final layer."""
|
|
495
495
|
y = [module(x, augment, profile, visualize)[0] for module in self]
|
|
496
496
|
# y = torch.stack(y).max(0)[0] # max ensemble
|
|
497
497
|
# y = torch.stack(y).mean(0) # mean ensemble
|
ultralytics/utils/__init__.py
CHANGED
|
@@ -930,7 +930,8 @@ def url2file(url):
|
|
|
930
930
|
PREFIX = colorstr('Ultralytics: ')
|
|
931
931
|
SETTINGS = SettingsManager() # initialize settings
|
|
932
932
|
DATASETS_DIR = Path(SETTINGS['datasets_dir']) # global datasets directory
|
|
933
|
-
WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
|
|
933
|
+
WEIGHTS_DIR = Path(SETTINGS['weights_dir']) # global weights directory
|
|
934
|
+
RUNS_DIR = Path(SETTINGS['runs_dir']) # global runs directory
|
|
934
935
|
ENVIRONMENT = 'Colab' if is_colab() else 'Kaggle' if is_kaggle() else 'Jupyter' if is_jupyter() else \
|
|
935
936
|
'Docker' if is_docker() else platform.system()
|
|
936
937
|
TESTS_RUNNING = is_pytest_running() or is_github_actions_ci()
|
ultralytics/utils/benchmarks.py
CHANGED
|
@@ -184,6 +184,19 @@ class ProfileModels:
|
|
|
184
184
|
half=True,
|
|
185
185
|
trt=True,
|
|
186
186
|
device=None):
|
|
187
|
+
"""
|
|
188
|
+
Initialize the ProfileModels class for profiling models.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
paths (list): List of paths of the models to be profiled.
|
|
192
|
+
num_timed_runs (int, optional): Number of timed runs for the profiling. Default is 100.
|
|
193
|
+
num_warmup_runs (int, optional): Number of warmup runs before the actual profiling starts. Default is 10.
|
|
194
|
+
min_time (float, optional): Minimum time in seconds for profiling a model. Default is 60.
|
|
195
|
+
imgsz (int, optional): Size of the image used during profiling. Default is 640.
|
|
196
|
+
half (bool, optional): Flag to indicate whether to use half-precision floating point for profiling. Default is True.
|
|
197
|
+
trt (bool, optional): Flag to indicate whether to profile using TensorRT. Default is True.
|
|
198
|
+
device (torch.device, optional): Device used for profiling. If None, it is determined automatically. Default is None.
|
|
199
|
+
"""
|
|
187
200
|
self.paths = paths
|
|
188
201
|
self.num_timed_runs = num_timed_runs
|
|
189
202
|
self.num_warmup_runs = num_warmup_runs
|
|
@@ -1,64 +1,104 @@
|
|
|
1
1
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
2
|
+
"""
|
|
3
|
+
MLflow Logging for Ultralytics YOLO.
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts.
|
|
6
|
+
For setting up, a tracking URI should be specified. The logging can be customized using environment variables.
|
|
7
|
+
|
|
8
|
+
Commands:
|
|
9
|
+
1. To set a project name:
|
|
10
|
+
`export MLFLOW_EXPERIMENT_NAME=<your_experiment_name>` or use the project=<project> argument
|
|
11
|
+
|
|
12
|
+
2. To set a run name:
|
|
13
|
+
`export MLFLOW_RUN=<your_run_name>` or use the name=<name> argument
|
|
14
|
+
|
|
15
|
+
3. To start a local MLflow server:
|
|
16
|
+
mlflow server --backend-store-uri runs/mlflow
|
|
17
|
+
It will by default start a local server at http://127.0.0.1:5000.
|
|
18
|
+
To specify a different URI, set the MLFLOW_TRACKING_URI environment variable.
|
|
19
|
+
|
|
20
|
+
4. To kill all running MLflow server instances:
|
|
21
|
+
ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr
|
|
4
25
|
|
|
5
26
|
try:
|
|
6
|
-
|
|
27
|
+
import os
|
|
28
|
+
|
|
29
|
+
assert not TESTS_RUNNING or 'test_mlflow' in os.environ.get('PYTEST_CURRENT_TEST', '') # do not log pytest
|
|
7
30
|
assert SETTINGS['mlflow'] is True # verify integration is enabled
|
|
8
31
|
import mlflow
|
|
9
32
|
|
|
10
33
|
assert hasattr(mlflow, '__version__') # verify package is not directory
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
import re
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
PREFIX = colorstr('MLflow: ')
|
|
14
36
|
|
|
15
37
|
except (ImportError, AssertionError):
|
|
16
38
|
mlflow = None
|
|
17
39
|
|
|
18
40
|
|
|
19
41
|
def on_pretrain_routine_end(trainer):
|
|
20
|
-
"""
|
|
21
|
-
|
|
42
|
+
"""
|
|
43
|
+
Log training parameters to MLflow at the end of the pretraining routine.
|
|
22
44
|
|
|
23
|
-
|
|
24
|
-
|
|
45
|
+
This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI,
|
|
46
|
+
experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters
|
|
47
|
+
from the trainer.
|
|
25
48
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
49
|
+
Args:
|
|
50
|
+
trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log.
|
|
51
|
+
|
|
52
|
+
Global:
|
|
53
|
+
mlflow: The imported mlflow module to use for logging.
|
|
54
|
+
|
|
55
|
+
Environment Variables:
|
|
56
|
+
MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'.
|
|
57
|
+
MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project.
|
|
58
|
+
MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name.
|
|
59
|
+
"""
|
|
60
|
+
global mlflow
|
|
61
|
+
|
|
62
|
+
uri = os.environ.get('MLFLOW_TRACKING_URI') or str(RUNS_DIR / 'mlflow')
|
|
63
|
+
LOGGER.debug(f'{PREFIX} tracking uri: {uri}')
|
|
64
|
+
mlflow.set_tracking_uri(uri)
|
|
65
|
+
|
|
66
|
+
# Set experiment and run names
|
|
67
|
+
experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
|
|
68
|
+
run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
|
|
69
|
+
mlflow.set_experiment(experiment_name)
|
|
70
|
+
|
|
71
|
+
mlflow.autolog()
|
|
72
|
+
try:
|
|
73
|
+
active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name)
|
|
74
|
+
LOGGER.info(f'{PREFIX}logging run_id({active_run.info.run_id}) to {uri}')
|
|
75
|
+
if Path(uri).is_dir():
|
|
76
|
+
LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'")
|
|
77
|
+
LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'")
|
|
78
|
+
mlflow.log_params(dict(trainer.args))
|
|
79
|
+
except Exception as e:
|
|
80
|
+
LOGGER.warning(f'{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n'
|
|
81
|
+
f'{PREFIX}WARNING ⚠️ Not tracking this run')
|
|
45
82
|
|
|
46
83
|
|
|
47
84
|
def on_fit_epoch_end(trainer):
|
|
48
|
-
"""
|
|
85
|
+
"""Log training metrics at the end of each fit epoch to MLflow."""
|
|
49
86
|
if mlflow:
|
|
50
|
-
|
|
51
|
-
|
|
87
|
+
sanitized_metrics = {k.replace('(', '').replace(')', ''): float(v) for k, v in trainer.metrics.items()}
|
|
88
|
+
mlflow.log_metrics(metrics=sanitized_metrics, step=trainer.epoch)
|
|
52
89
|
|
|
53
90
|
|
|
54
91
|
def on_train_end(trainer):
|
|
55
|
-
"""
|
|
92
|
+
"""Log model artifacts at the end of the training."""
|
|
56
93
|
if mlflow:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
94
|
+
mlflow.log_artifact(str(trainer.best.parent)) # log save_dir/weights directory with best.pt and last.pt
|
|
95
|
+
for f in trainer.save_dir.glob('*'): # log all other files in save_dir
|
|
96
|
+
if f.suffix in {'.png', '.jpg', '.csv', '.pt', '.yaml'}:
|
|
97
|
+
mlflow.log_artifact(str(f))
|
|
98
|
+
|
|
60
99
|
mlflow.end_run()
|
|
61
|
-
LOGGER.
|
|
100
|
+
LOGGER.info(f'{PREFIX}results logged to {mlflow.get_tracking_uri()}\n'
|
|
101
|
+
f"{PREFIX}disable with 'yolo settings mlflow=False'")
|
|
62
102
|
|
|
63
103
|
|
|
64
104
|
callbacks = {
|