ultralytics 8.0.196__py3-none-any.whl → 8.0.198__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (49) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/cfg/__init__.py +4 -5
  3. ultralytics/data/augment.py +2 -2
  4. ultralytics/data/converter.py +12 -13
  5. ultralytics/data/dataset.py +1 -1
  6. ultralytics/engine/__init__.py +1 -0
  7. ultralytics/engine/exporter.py +1 -1
  8. ultralytics/engine/trainer.py +2 -1
  9. ultralytics/hub/session.py +1 -1
  10. ultralytics/models/fastsam/predict.py +33 -2
  11. ultralytics/models/fastsam/prompt.py +38 -1
  12. ultralytics/models/fastsam/utils.py +5 -5
  13. ultralytics/models/fastsam/val.py +27 -1
  14. ultralytics/models/nas/model.py +20 -0
  15. ultralytics/models/nas/predict.py +23 -0
  16. ultralytics/models/nas/val.py +24 -0
  17. ultralytics/models/rtdetr/val.py +17 -5
  18. ultralytics/models/sam/modules/decoders.py +26 -1
  19. ultralytics/models/sam/modules/encoders.py +31 -3
  20. ultralytics/models/sam/modules/sam.py +22 -7
  21. ultralytics/models/sam/modules/tiny_encoder.py +147 -45
  22. ultralytics/models/sam/modules/transformer.py +47 -2
  23. ultralytics/models/sam/predict.py +19 -2
  24. ultralytics/models/utils/loss.py +20 -2
  25. ultralytics/models/utils/ops.py +5 -5
  26. ultralytics/nn/modules/block.py +33 -10
  27. ultralytics/nn/modules/conv.py +16 -4
  28. ultralytics/nn/modules/head.py +48 -17
  29. ultralytics/nn/modules/transformer.py +2 -2
  30. ultralytics/nn/tasks.py +7 -7
  31. ultralytics/utils/__init__.py +2 -1
  32. ultralytics/utils/benchmarks.py +13 -0
  33. ultralytics/utils/callbacks/mlflow.py +76 -36
  34. ultralytics/utils/callbacks/wb.py +92 -1
  35. ultralytics/utils/checks.py +4 -4
  36. ultralytics/utils/errors.py +12 -0
  37. ultralytics/utils/files.py +1 -1
  38. ultralytics/utils/instance.py +41 -3
  39. ultralytics/utils/loss.py +22 -19
  40. ultralytics/utils/metrics.py +106 -24
  41. ultralytics/utils/tal.py +1 -1
  42. ultralytics/utils/torch_utils.py +4 -2
  43. ultralytics/utils/tuner.py +10 -4
  44. {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/METADATA +1 -1
  45. {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/RECORD +49 -49
  46. {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/LICENSE +0 -0
  47. {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/WHEEL +0 -0
  48. {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/entry_points.txt +0 -0
  49. {ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/top_level.txt +0 -0
@@ -37,7 +37,12 @@ class DFL(nn.Module):
37
37
  class Proto(nn.Module):
38
38
  """YOLOv8 mask Proto module for segmentation models."""
39
39
 
40
- def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
40
+ def __init__(self, c1, c_=256, c2=32):
41
+ """
42
+ Initializes the YOLOv8 mask Proto module with specified number of protos and masks.
43
+
44
+ Input arguments are ch_in, number of protos, number of masks.
45
+ """
41
46
  super().__init__()
42
47
  self.cv1 = Conv(c1, c_, k=3)
43
48
  self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest')
@@ -124,7 +129,12 @@ class SPP(nn.Module):
124
129
  class SPPF(nn.Module):
125
130
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
126
131
 
127
- def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
132
+ def __init__(self, c1, c2, k=5):
133
+ """
134
+ Initializes the SPPF layer with given input/output channels and kernel size.
135
+
136
+ This module is equivalent to SPP(k=(5, 9, 13)).
137
+ """
128
138
  super().__init__()
129
139
  c_ = c1 // 2 # hidden channels
130
140
  self.cv1 = Conv(c1, c_, 1, 1)
@@ -142,7 +152,8 @@ class SPPF(nn.Module):
142
152
  class C1(nn.Module):
143
153
  """CSP Bottleneck with 1 convolution."""
144
154
 
145
- def __init__(self, c1, c2, n=1): # ch_in, ch_out, number
155
+ def __init__(self, c1, c2, n=1):
156
+ """Initializes the CSP Bottleneck with configurations for 1 convolution with arguments ch_in, ch_out, number."""
146
157
  super().__init__()
147
158
  self.cv1 = Conv(c1, c2, 1, 1)
148
159
  self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
@@ -156,7 +167,10 @@ class C1(nn.Module):
156
167
  class C2(nn.Module):
157
168
  """CSP Bottleneck with 2 convolutions."""
158
169
 
159
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
170
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
171
+ """Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut,
172
+ groups, expansion.
173
+ """
160
174
  super().__init__()
161
175
  self.c = int(c2 * e) # hidden channels
162
176
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@@ -173,7 +187,10 @@ class C2(nn.Module):
173
187
  class C2f(nn.Module):
174
188
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
175
189
 
176
- def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
190
+ def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
191
+ """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
192
+ expansion.
193
+ """
177
194
  super().__init__()
178
195
  self.c = int(c2 * e) # hidden channels
179
196
  self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@@ -196,7 +213,8 @@ class C2f(nn.Module):
196
213
  class C3(nn.Module):
197
214
  """CSP Bottleneck with 3 convolutions."""
198
215
 
199
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
216
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
217
+ """Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values."""
200
218
  super().__init__()
201
219
  c_ = int(c2 * e) # hidden channels
202
220
  self.cv1 = Conv(c1, c_, 1, 1)
@@ -259,7 +277,8 @@ class C3Ghost(C3):
259
277
  class GhostBottleneck(nn.Module):
260
278
  """Ghost Bottleneck https://github.com/huawei-noah/ghostnet."""
261
279
 
262
- def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
280
+ def __init__(self, c1, c2, k=3, s=1):
281
+ """Initializes GhostBottleneck module with arguments ch_in, ch_out, kernel, stride."""
263
282
  super().__init__()
264
283
  c_ = c2 // 2
265
284
  self.conv = nn.Sequential(
@@ -277,7 +296,10 @@ class GhostBottleneck(nn.Module):
277
296
  class Bottleneck(nn.Module):
278
297
  """Standard bottleneck."""
279
298
 
280
- def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand
299
+ def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
300
+ """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
301
+ expansion.
302
+ """
281
303
  super().__init__()
282
304
  c_ = int(c2 * e) # hidden channels
283
305
  self.cv1 = Conv(c1, c_, k[0], 1)
@@ -285,14 +307,15 @@ class Bottleneck(nn.Module):
285
307
  self.add = shortcut and c1 == c2
286
308
 
287
309
  def forward(self, x):
288
- """'forward()' applies the YOLOv5 FPN to input data."""
310
+ """'forward()' applies the YOLO FPN to input data."""
289
311
  return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
290
312
 
291
313
 
292
314
  class BottleneckCSP(nn.Module):
293
315
  """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
294
316
 
295
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
317
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
318
+ """Initializes the CSP Bottleneck given arguments for ch_in, ch_out, number, shortcut, groups, expansion."""
296
319
  super().__init__()
297
320
  c_ = int(c2 * e) # hidden channels
298
321
  self.cv1 = Conv(c1, c_, 1, 1)
@@ -88,6 +88,7 @@ class DWConv(Conv):
88
88
  """Depth-wise convolution."""
89
89
 
90
90
  def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
91
+ """Initialize Depth-wise convolution with given parameters."""
91
92
  super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
92
93
 
93
94
 
@@ -95,6 +96,7 @@ class DWConvTranspose2d(nn.ConvTranspose2d):
95
96
  """Depth-wise transpose convolution."""
96
97
 
97
98
  def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
99
+ """Initialize DWConvTranspose2d class with given parameters."""
98
100
  super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
99
101
 
100
102
 
@@ -121,12 +123,18 @@ class ConvTranspose(nn.Module):
121
123
  class Focus(nn.Module):
122
124
  """Focus wh information into c-space."""
123
125
 
124
- def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
126
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
127
+ """Initializes Focus object with user defined channel, convolution, padding, group and activation values."""
125
128
  super().__init__()
126
129
  self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
127
130
  # self.contract = Contract(gain=2)
128
131
 
129
- def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
132
+ def forward(self, x):
133
+ """
134
+ Applies convolution to concatenated tensor and returns the output.
135
+
136
+ Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).
137
+ """
130
138
  return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
131
139
  # return self.conv(self.contract(x))
132
140
 
@@ -134,7 +142,10 @@ class Focus(nn.Module):
134
142
  class GhostConv(nn.Module):
135
143
  """Ghost Convolution https://github.com/huawei-noah/ghostnet."""
136
144
 
137
- def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
145
+ def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
146
+ """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
147
+ activation.
148
+ """
138
149
  super().__init__()
139
150
  c_ = c2 // 2 # hidden channels
140
151
  self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
@@ -280,7 +291,8 @@ class SpatialAttention(nn.Module):
280
291
  class CBAM(nn.Module):
281
292
  """Convolutional Block Attention Module."""
282
293
 
283
- def __init__(self, c1, kernel_size=7): # ch_in, kernels
294
+ def __init__(self, c1, kernel_size=7):
295
+ """Initialize CBAM with given input channel (c1) and kernel size."""
284
296
  super().__init__()
285
297
  self.channel_attention = ChannelAttention(c1)
286
298
  self.spatial_attention = SpatialAttention(kernel_size)
@@ -25,7 +25,8 @@ class Detect(nn.Module):
25
25
  anchors = torch.empty(0) # init
26
26
  strides = torch.empty(0) # init
27
27
 
28
- def __init__(self, nc=80, ch=()): # detection layer
28
+ def __init__(self, nc=80, ch=()):
29
+ """Initializes the YOLOv8 detection layer with specified number of classes and channels."""
29
30
  super().__init__()
30
31
  self.nc = nc # number of classes
31
32
  self.nl = len(ch) # number of detection layers
@@ -149,7 +150,10 @@ class Pose(Detect):
149
150
  class Classify(nn.Module):
150
151
  """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
151
152
 
152
- def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
153
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
154
+ """Initializes YOLOv8 classification head with specified input and output channels, kernel size, stride,
155
+ padding, and groups.
156
+ """
153
157
  super().__init__()
154
158
  c_ = 1280 # efficientnet_b0 size
155
159
  self.conv = Conv(c1, c_, k, s, p, g)
@@ -166,6 +170,13 @@ class Classify(nn.Module):
166
170
 
167
171
 
168
172
  class RTDETRDecoder(nn.Module):
173
+ """
174
+ Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
175
+
176
+ This decoder module utilizes Transformer architecture along with deformable convolutions to predict bounding boxes
177
+ and class labels for objects in an image. It integrates features from multiple layers and runs through a series of
178
+ Transformer decoder layers to output the final predictions.
179
+ """
169
180
  export = False # export mode
170
181
 
171
182
  def __init__(
@@ -181,11 +192,31 @@ class RTDETRDecoder(nn.Module):
181
192
  dropout=0.,
182
193
  act=nn.ReLU(),
183
194
  eval_idx=-1,
184
- # training args
195
+ # Training args
185
196
  nd=100, # num denoising
186
197
  label_noise_ratio=0.5,
187
198
  box_noise_scale=1.0,
188
199
  learnt_init_query=False):
200
+ """
201
+ Initializes the RTDETRDecoder module with the given parameters.
202
+
203
+ Args:
204
+ nc (int): Number of classes. Default is 80.
205
+ ch (tuple): Channels in the backbone feature maps. Default is (512, 1024, 2048).
206
+ hd (int): Dimension of hidden layers. Default is 256.
207
+ nq (int): Number of query points. Default is 300.
208
+ ndp (int): Number of decoder points. Default is 4.
209
+ nh (int): Number of heads in multi-head attention. Default is 8.
210
+ ndl (int): Number of decoder layers. Default is 6.
211
+ d_ffn (int): Dimension of the feed-forward networks. Default is 1024.
212
+ dropout (float): Dropout rate. Default is 0.
213
+ act (nn.Module): Activation function. Default is nn.ReLU.
214
+ eval_idx (int): Evaluation index. Default is -1.
215
+ nd (int): Number of denoising. Default is 100.
216
+ label_noise_ratio (float): Label noise ratio. Default is 0.5.
217
+ box_noise_scale (float): Box noise scale. Default is 1.0.
218
+ learnt_init_query (bool): Whether to learn initial query embeddings. Default is False.
219
+ """
189
220
  super().__init__()
190
221
  self.hidden_dim = hd
191
222
  self.nhead = nh
@@ -194,7 +225,7 @@ class RTDETRDecoder(nn.Module):
194
225
  self.num_queries = nq
195
226
  self.num_decoder_layers = ndl
196
227
 
197
- # backbone feature projection
228
+ # Backbone feature projection
198
229
  self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch)
199
230
  # NOTE: simplified version but it's not consistent with .pt weights.
200
231
  # self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch)
@@ -203,24 +234,24 @@ class RTDETRDecoder(nn.Module):
203
234
  decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp)
204
235
  self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx)
205
236
 
206
- # denoising part
237
+ # Denoising part
207
238
  self.denoising_class_embed = nn.Embedding(nc, hd)
208
239
  self.num_denoising = nd
209
240
  self.label_noise_ratio = label_noise_ratio
210
241
  self.box_noise_scale = box_noise_scale
211
242
 
212
- # decoder embedding
243
+ # Decoder embedding
213
244
  self.learnt_init_query = learnt_init_query
214
245
  if learnt_init_query:
215
246
  self.tgt_embed = nn.Embedding(nq, hd)
216
247
  self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2)
217
248
 
218
- # encoder head
249
+ # Encoder head
219
250
  self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd))
220
251
  self.enc_score_head = nn.Linear(hd, nc)
221
252
  self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3)
222
253
 
223
- # decoder head
254
+ # Decoder head
224
255
  self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)])
225
256
  self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)])
226
257
 
@@ -230,10 +261,10 @@ class RTDETRDecoder(nn.Module):
230
261
  """Runs the forward pass of the module, returning bounding box and classification scores for the input."""
231
262
  from ultralytics.models.utils.ops import get_cdn_group
232
263
 
233
- # input projection and embedding
264
+ # Input projection and embedding
234
265
  feats, shapes = self._get_encoder_input(x)
235
266
 
236
- # prepare denoising training
267
+ # Prepare denoising training
237
268
  dn_embed, dn_bbox, attn_mask, dn_meta = \
238
269
  get_cdn_group(batch,
239
270
  self.nc,
@@ -247,7 +278,7 @@ class RTDETRDecoder(nn.Module):
247
278
  embed, refer_bbox, enc_bboxes, enc_scores = \
248
279
  self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
249
280
 
250
- # decoder
281
+ # Decoder
251
282
  dec_bboxes, dec_scores = self.decoder(embed,
252
283
  refer_bbox,
253
284
  feats,
@@ -285,9 +316,9 @@ class RTDETRDecoder(nn.Module):
285
316
 
286
317
  def _get_encoder_input(self, x):
287
318
  """Processes and returns encoder inputs by getting projection features from input and concatenating them."""
288
- # get projection features
319
+ # Get projection features
289
320
  x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
290
- # get encoder inputs
321
+ # Get encoder inputs
291
322
  feats = []
292
323
  shapes = []
293
324
  for feat in x:
@@ -304,13 +335,13 @@ class RTDETRDecoder(nn.Module):
304
335
  def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
305
336
  """Generates and prepares the input required for the decoder from the provided features and shapes."""
306
337
  bs = len(feats)
307
- # prepare input for decoder
338
+ # Prepare input for decoder
308
339
  anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
309
340
  features = self.enc_output(valid_mask * feats) # bs, h*w, 256
310
341
 
311
342
  enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
312
343
 
313
- # query selection
344
+ # Query selection
314
345
  # (bs, num_queries)
315
346
  topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
316
347
  # (bs, num_queries)
@@ -321,7 +352,7 @@ class RTDETRDecoder(nn.Module):
321
352
  # (bs, num_queries, 4)
322
353
  top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
323
354
 
324
- # dynamic anchors + static content
355
+ # Dynamic anchors + static content
325
356
  refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
326
357
 
327
358
  enc_bboxes = refer_bbox.sigmoid()
@@ -342,7 +373,7 @@ class RTDETRDecoder(nn.Module):
342
373
  # TODO
343
374
  def _reset_parameters(self):
344
375
  """Initializes or resets the parameters of the model's various components with predefined weights and biases."""
345
- # class and bbox head init
376
+ # Class and bbox head init
346
377
  bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
347
378
  # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
348
379
  # linear_init_(self.enc_score_head)
@@ -81,7 +81,7 @@ class AIFI(TransformerEncoderLayer):
81
81
  """Forward pass for the AIFI transformer layer."""
82
82
  c, h, w = x.shape[1:]
83
83
  pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
84
- # flatten [B, C, H, W] to [B, HxW, C]
84
+ # Flatten [B, C, H, W] to [B, HxW, C]
85
85
  x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype))
86
86
  return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
87
87
 
@@ -213,7 +213,7 @@ class MSDeformAttn(nn.Module):
213
213
  if d_model % n_heads != 0:
214
214
  raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
215
215
  _d_per_head = d_model // n_heads
216
- # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
216
+ # Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation
217
217
  assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`'
218
218
 
219
219
  self.im2col_step = 64
ultralytics/nn/tasks.py CHANGED
@@ -277,7 +277,7 @@ class DetectionModel(BaseModel):
277
277
  return torch.cat((x, y, wh, cls), dim)
278
278
 
279
279
  def _clip_augmented(self, y):
280
- """Clip YOLOv5 augmented inference tails."""
280
+ """Clip YOLO augmented inference tails."""
281
281
  nl = self.model[-1].nl # number of detection layers (P3-P5)
282
282
  g = sum(4 ** x for x in range(nl)) # grid points
283
283
  e = 1 # exclude layer count
@@ -375,9 +375,9 @@ class RTDETRDetectionModel(DetectionModel):
375
375
  """
376
376
  RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
377
377
 
378
- This class is responsible for constructing the RTDETR architecture, defining loss functions, and
379
- facilitating both the training and inference processes. RTDETR is an object detection and tracking model
380
- that extends from the DetectionModel base class.
378
+ This class is responsible for constructing the RTDETR architecture, defining loss functions, and facilitating both
379
+ the training and inference processes. RTDETR is an object detection and tracking model that extends from the
380
+ DetectionModel base class.
381
381
 
382
382
  Attributes:
383
383
  cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
@@ -418,7 +418,7 @@ class RTDETRDetectionModel(DetectionModel):
418
418
  preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
419
419
 
420
420
  Returns:
421
- tuple: A tuple containing the total loss and main three losses in a tensor.
421
+ (tuple): A tuple containing the total loss and main three losses in a tensor.
422
422
  """
423
423
  if not hasattr(self, 'criterion'):
424
424
  self.criterion = self.init_criterion()
@@ -466,7 +466,7 @@ class RTDETRDetectionModel(DetectionModel):
466
466
  augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
467
467
 
468
468
  Returns:
469
- torch.Tensor: Model's output tensor.
469
+ (torch.Tensor): Model's output tensor.
470
470
  """
471
471
  y, dt = [], [] # outputs
472
472
  for m in self.model[:-1]: # except the head part
@@ -491,7 +491,7 @@ class Ensemble(nn.ModuleList):
491
491
  super().__init__()
492
492
 
493
493
  def forward(self, x, augment=False, profile=False, visualize=False):
494
- """Function generates the YOLOv5 network's final layer."""
494
+ """Function generates the YOLO network's final layer."""
495
495
  y = [module(x, augment, profile, visualize)[0] for module in self]
496
496
  # y = torch.stack(y).max(0)[0] # max ensemble
497
497
  # y = torch.stack(y).mean(0) # mean ensemble
@@ -930,7 +930,8 @@ def url2file(url):
930
930
  PREFIX = colorstr('Ultralytics: ')
931
931
  SETTINGS = SettingsManager() # initialize settings
932
932
  DATASETS_DIR = Path(SETTINGS['datasets_dir']) # global datasets directory
933
- WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
933
+ WEIGHTS_DIR = Path(SETTINGS['weights_dir']) # global weights directory
934
+ RUNS_DIR = Path(SETTINGS['runs_dir']) # global runs directory
934
935
  ENVIRONMENT = 'Colab' if is_colab() else 'Kaggle' if is_kaggle() else 'Jupyter' if is_jupyter() else \
935
936
  'Docker' if is_docker() else platform.system()
936
937
  TESTS_RUNNING = is_pytest_running() or is_github_actions_ci()
@@ -184,6 +184,19 @@ class ProfileModels:
184
184
  half=True,
185
185
  trt=True,
186
186
  device=None):
187
+ """
188
+ Initialize the ProfileModels class for profiling models.
189
+
190
+ Args:
191
+ paths (list): List of paths of the models to be profiled.
192
+ num_timed_runs (int, optional): Number of timed runs for the profiling. Default is 100.
193
+ num_warmup_runs (int, optional): Number of warmup runs before the actual profiling starts. Default is 10.
194
+ min_time (float, optional): Minimum time in seconds for profiling a model. Default is 60.
195
+ imgsz (int, optional): Size of the image used during profiling. Default is 640.
196
+ half (bool, optional): Flag to indicate whether to use half-precision floating point for profiling. Default is True.
197
+ trt (bool, optional): Flag to indicate whether to profile using TensorRT. Default is True.
198
+ device (torch.device, optional): Device used for profiling. If None, it is determined automatically. Default is None.
199
+ """
187
200
  self.paths = paths
188
201
  self.num_timed_runs = num_timed_runs
189
202
  self.num_warmup_runs = num_warmup_runs
@@ -1,64 +1,104 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ """
3
+ MLflow Logging for Ultralytics YOLO.
2
4
 
3
- from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
5
+ This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts.
6
+ For setting up, a tracking URI should be specified. The logging can be customized using environment variables.
7
+
8
+ Commands:
9
+ 1. To set a project name:
10
+ `export MLFLOW_EXPERIMENT_NAME=<your_experiment_name>` or use the project=<project> argument
11
+
12
+ 2. To set a run name:
13
+ `export MLFLOW_RUN=<your_run_name>` or use the name=<name> argument
14
+
15
+ 3. To start a local MLflow server:
16
+ mlflow server --backend-store-uri runs/mlflow
17
+ It will by default start a local server at http://127.0.0.1:5000.
18
+ To specify a different URI, set the MLFLOW_TRACKING_URI environment variable.
19
+
20
+ 4. To kill all running MLflow server instances:
21
+ ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9
22
+ """
23
+
24
+ from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr
4
25
 
5
26
  try:
6
- assert not TESTS_RUNNING # do not log pytest
27
+ import os
28
+
29
+ assert not TESTS_RUNNING or 'test_mlflow' in os.environ.get('PYTEST_CURRENT_TEST', '') # do not log pytest
7
30
  assert SETTINGS['mlflow'] is True # verify integration is enabled
8
31
  import mlflow
9
32
 
10
33
  assert hasattr(mlflow, '__version__') # verify package is not directory
11
- PREFIX = colorstr('MLFlow:')
12
- import os
13
- import re
34
+ from pathlib import Path
35
+ PREFIX = colorstr('MLflow: ')
14
36
 
15
37
  except (ImportError, AssertionError):
16
38
  mlflow = None
17
39
 
18
40
 
19
41
  def on_pretrain_routine_end(trainer):
20
- """Logs training parameters to MLflow."""
21
- global mlflow, run, experiment_name
42
+ """
43
+ Log training parameters to MLflow at the end of the pretraining routine.
22
44
 
23
- if os.environ.get('MLFLOW_TRACKING_URI') is None:
24
- mlflow = None
45
+ This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI,
46
+ experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters
47
+ from the trainer.
25
48
 
26
- if mlflow:
27
- mlflow_location = os.environ['MLFLOW_TRACKING_URI'] # "http://192.168.xxx.xxx:5000"
28
- LOGGER.debug(f'{PREFIX} tracking uri: {mlflow_location}')
29
- mlflow.set_tracking_uri(mlflow_location)
30
- experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
31
- run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
32
- experiment = mlflow.set_experiment(experiment_name) # change since mlflow does this now by default
33
-
34
- mlflow.autolog()
35
- prefix = colorstr('MLFlow: ')
36
- try:
37
- run, active_run = mlflow, mlflow.active_run()
38
- if not active_run:
39
- active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
40
- LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}')
41
- run.log_params(vars(trainer.model.args))
42
- except Exception as err:
43
- LOGGER.error(f'{prefix}Failing init - {repr(err)}')
44
- LOGGER.warning(f'{prefix}Continuing without Mlflow')
49
+ Args:
50
+ trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log.
51
+
52
+ Global:
53
+ mlflow: The imported mlflow module to use for logging.
54
+
55
+ Environment Variables:
56
+ MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'.
57
+ MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project.
58
+ MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name.
59
+ """
60
+ global mlflow
61
+
62
+ uri = os.environ.get('MLFLOW_TRACKING_URI') or str(RUNS_DIR / 'mlflow')
63
+ LOGGER.debug(f'{PREFIX} tracking uri: {uri}')
64
+ mlflow.set_tracking_uri(uri)
65
+
66
+ # Set experiment and run names
67
+ experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
68
+ run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
69
+ mlflow.set_experiment(experiment_name)
70
+
71
+ mlflow.autolog()
72
+ try:
73
+ active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name)
74
+ LOGGER.info(f'{PREFIX}logging run_id({active_run.info.run_id}) to {uri}')
75
+ if Path(uri).is_dir():
76
+ LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'")
77
+ LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'")
78
+ mlflow.log_params(dict(trainer.args))
79
+ except Exception as e:
80
+ LOGGER.warning(f'{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n'
81
+ f'{PREFIX}WARNING ⚠️ Not tracking this run')
45
82
 
46
83
 
47
84
  def on_fit_epoch_end(trainer):
48
- """Logs training metrics to Mlflow."""
85
+ """Log training metrics at the end of each fit epoch to MLflow."""
49
86
  if mlflow:
50
- metrics_dict = {f"{re.sub('[()]', '', k)}": float(v) for k, v in trainer.metrics.items()}
51
- run.log_metrics(metrics=metrics_dict, step=trainer.epoch)
87
+ sanitized_metrics = {k.replace('(', '').replace(')', ''): float(v) for k, v in trainer.metrics.items()}
88
+ mlflow.log_metrics(metrics=sanitized_metrics, step=trainer.epoch)
52
89
 
53
90
 
54
91
  def on_train_end(trainer):
55
- """Called at end of train loop to log model artifact info."""
92
+ """Log model artifacts at the end of the training."""
56
93
  if mlflow:
57
- run.log_artifact(trainer.last)
58
- run.log_artifact(trainer.best)
59
- run.log_artifact(trainer.save_dir)
94
+ mlflow.log_artifact(str(trainer.best.parent)) # log save_dir/weights directory with best.pt and last.pt
95
+ for f in trainer.save_dir.glob('*'): # log all other files in save_dir
96
+ if f.suffix in {'.png', '.jpg', '.csv', '.pt', '.yaml'}:
97
+ mlflow.log_artifact(str(f))
98
+
60
99
  mlflow.end_run()
61
- LOGGER.debug(f'{PREFIX} ending run')
100
+ LOGGER.info(f'{PREFIX}results logged to {mlflow.get_tracking_uri()}\n'
101
+ f"{PREFIX}disable with 'yolo settings mlflow=False'")
62
102
 
63
103
 
64
104
  callbacks = {