PyPI - ultralytics - Versions diffs - 8.0.194__py3-none-any.whl → 8.0.196__py3-none-any.whl - Mend

ultralytics 8.0.194py3-none-any.whl → 8.0.196py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (84) hide show

ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +5 -6
ultralytics/data/augment.py +234 -29
ultralytics/data/base.py +2 -1
ultralytics/data/build.py +9 -3
ultralytics/data/converter.py +5 -2
ultralytics/data/dataset.py +16 -2
ultralytics/data/loaders.py +111 -7
ultralytics/data/utils.py +3 -3
ultralytics/engine/exporter.py +1 -3
ultralytics/engine/model.py +16 -9
ultralytics/engine/predictor.py +10 -6
ultralytics/engine/results.py +18 -8
ultralytics/engine/trainer.py +19 -31
ultralytics/engine/tuner.py +20 -20
ultralytics/engine/validator.py +3 -4
ultralytics/hub/__init__.py +2 -2
ultralytics/hub/auth.py +18 -3
ultralytics/hub/session.py +1 -0
ultralytics/hub/utils.py +1 -3
ultralytics/models/fastsam/model.py +2 -1
ultralytics/models/fastsam/predict.py +10 -7
ultralytics/models/fastsam/prompt.py +15 -1
ultralytics/models/nas/model.py +3 -1
ultralytics/models/rtdetr/model.py +4 -6
ultralytics/models/rtdetr/predict.py +2 -1
ultralytics/models/rtdetr/train.py +2 -1
ultralytics/models/rtdetr/val.py +1 -0
ultralytics/models/sam/amg.py +12 -6
ultralytics/models/sam/model.py +5 -6
ultralytics/models/sam/modules/decoders.py +5 -1
ultralytics/models/sam/modules/encoders.py +15 -12
ultralytics/models/sam/modules/tiny_encoder.py +38 -2
ultralytics/models/sam/modules/transformer.py +2 -4
ultralytics/models/sam/predict.py +8 -4
ultralytics/models/utils/loss.py +35 -8
ultralytics/models/utils/ops.py +14 -18
ultralytics/models/yolo/classify/predict.py +1 -0
ultralytics/models/yolo/classify/train.py +4 -2
ultralytics/models/yolo/classify/val.py +1 -0
ultralytics/models/yolo/detect/train.py +4 -3
ultralytics/models/yolo/model.py +2 -4
ultralytics/models/yolo/pose/predict.py +1 -0
ultralytics/models/yolo/segment/predict.py +2 -0
ultralytics/models/yolo/segment/val.py +1 -1
ultralytics/nn/autobackend.py +54 -43
ultralytics/nn/modules/__init__.py +13 -9
ultralytics/nn/modules/block.py +11 -5
ultralytics/nn/modules/conv.py +16 -7
ultralytics/nn/modules/head.py +6 -3
ultralytics/nn/modules/transformer.py +47 -15
ultralytics/nn/modules/utils.py +6 -4
ultralytics/nn/tasks.py +61 -21
ultralytics/trackers/bot_sort.py +53 -6
ultralytics/trackers/byte_tracker.py +71 -15
ultralytics/trackers/track.py +0 -1
ultralytics/trackers/utils/gmc.py +23 -0
ultralytics/trackers/utils/kalman_filter.py +6 -6
ultralytics/utils/__init__.py +32 -19
ultralytics/utils/autobatch.py +1 -3
ultralytics/utils/benchmarks.py +14 -1
ultralytics/utils/callbacks/base.py +1 -3
ultralytics/utils/callbacks/comet.py +11 -3
ultralytics/utils/callbacks/dvc.py +9 -0
ultralytics/utils/callbacks/neptune.py +5 -6
ultralytics/utils/callbacks/wb.py +1 -0
ultralytics/utils/checks.py +13 -9
ultralytics/utils/dist.py +2 -1
ultralytics/utils/downloads.py +7 -3
ultralytics/utils/files.py +3 -3
ultralytics/utils/instance.py +12 -3
ultralytics/utils/loss.py +97 -22
ultralytics/utils/metrics.py +35 -34
ultralytics/utils/ops.py +10 -9
ultralytics/utils/patches.py +9 -7
ultralytics/utils/plotting.py +4 -3
ultralytics/utils/torch_utils.py +8 -6
ultralytics/utils/triton.py +87 -0
{ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/METADATA +1 -1
{ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/RECORD +84 -83
{ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/LICENSE +0 -0
{ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/WHEEL +0 -0
{ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/entry_points.txt +0 -0
{ultralytics-8.0.194.dist-info → ultralytics-8.0.196.dist-info}/top_level.txt +0 -0

ultralytics/nn/modules/head.py CHANGED Viewed

@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Model head modules
-"""
+"""Model head modules."""
 import math
@@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
         self._reset_parameters()
     def forward(self, x, batch=None):
+        """Runs the forward pass of the module, returning bounding box and classification scores for the input."""
         from ultralytics.models.utils.ops import get_cdn_group
         # input projection and embedding
@@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
         return y if self.export else (y, x)
     def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
+        """Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
         anchors = []
         for i, (h, w) in enumerate(shapes):
             sy = torch.arange(end=h, dtype=dtype, device=device)
@@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
         return anchors, valid_mask
     def _get_encoder_input(self, x):
+        """Processes and returns encoder inputs by getting projection features from input and concatenating them."""
         # get projection features
         x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
         # get encoder inputs
@@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
         return feats, shapes
     def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
+        """Generates and prepares the input required for the decoder from the provided features and shapes."""
         bs = len(feats)
         # prepare input for decoder
         anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
@@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
     # TODO
     def _reset_parameters(self):
+        """Initializes or resets the parameters of the model's various components with predefined weights and biases."""
         # class and bbox head init
         bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
         # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.

ultralytics/nn/modules/transformer.py CHANGED Viewed

@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Transformer modules
-"""
+"""Transformer modules."""
 import math
@@ -18,9 +16,10 @@ __all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'M
 class TransformerEncoderLayer(nn.Module):
-    """Transformer Encoder."""
+    """Defines a single layer of the transformer encoder."""
     def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
+        """Initialize the TransformerEncoderLayer with specified parameters."""
         super().__init__()
         from ...utils.torch_utils import TORCH_1_9
         if not TORCH_1_9:
@@ -41,10 +40,11 @@ class TransformerEncoderLayer(nn.Module):
         self.normalize_before = normalize_before
     def with_pos_embed(self, tensor, pos=None):
-        """Add position embeddings if given."""
+        """Add position embeddings to the tensor if provided."""
         return tensor if pos is None else tensor + pos
     def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+        """Performs forward pass with post-normalization."""
         q = k = self.with_pos_embed(src, pos)
         src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
         src = src + self.dropout1(src2)
@@ -54,6 +54,7 @@ class TransformerEncoderLayer(nn.Module):
         return self.norm2(src)
     def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+        """Performs forward pass with pre-normalization."""
         src2 = self.norm1(src)
         q = k = self.with_pos_embed(src2, pos)
         src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
@@ -70,11 +71,14 @@ class TransformerEncoderLayer(nn.Module):
 class AIFI(TransformerEncoderLayer):
+    """Defines the AIFI transformer layer."""
     def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
+        """Initialize the AIFI instance with specified parameters."""
         super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
     def forward(self, x):
+        """Forward pass for the AIFI transformer layer."""
         c, h, w = x.shape[1:]
         pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
         # flatten [B, C, H, W] to [B, HxW, C]
@@ -82,7 +86,8 @@ class AIFI(TransformerEncoderLayer):
         return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
     @staticmethod
-    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.):
+    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
+        """Builds 2D sine-cosine position embedding."""
         grid_w = torch.arange(int(w), dtype=torch.float32)
         grid_h = torch.arange(int(h), dtype=torch.float32)
         grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij')
@@ -140,27 +145,32 @@ class TransformerBlock(nn.Module):
 class MLPBlock(nn.Module):
+    """Implements a single block of a multi-layer perceptron."""
     def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
+        """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function."""
         super().__init__()
         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
         self.act = act()
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass for the MLPBlock."""
         return self.lin2(self.act(self.lin1(x)))
 class MLP(nn.Module):
-    """ Very simple multi-layer perceptron (also called FFN)"""
+    """Implements a simple multi-layer perceptron (also called FFN)."""
     def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        """Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
         self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
     def forward(self, x):
+        """Forward pass for the entire MLP."""
         for i, layer in enumerate(self.layers):
             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
         return x
@@ -168,17 +178,22 @@ class MLP(nn.Module):
 class LayerNorm2d(nn.Module):
     """
-    LayerNorm2d module from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
+    2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
+    Original implementation at
+    https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
     https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
     """
     def __init__(self, num_channels, eps=1e-6):
+        """Initialize LayerNorm2d with the given parameters."""
         super().__init__()
         self.weight = nn.Parameter(torch.ones(num_channels))
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
     def forward(self, x):
+        """Perform forward pass for 2D layer normalization."""
         u = x.mean(1, keepdim=True)
         s = (x - u).pow(2).mean(1, keepdim=True)
         x = (x - u) / torch.sqrt(s + self.eps)
@@ -187,11 +202,13 @@ class LayerNorm2d(nn.Module):
 class MSDeformAttn(nn.Module):
     """
-    Original Multi-Scale Deformable Attention Module.
+    Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
     https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
     """
     def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """Initialize MSDeformAttn with the given parameters."""
         super().__init__()
         if d_model % n_heads != 0:
             raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
@@ -214,6 +231,7 @@ class MSDeformAttn(nn.Module):
         self._reset_parameters()
     def _reset_parameters(self):
+        """Reset module parameters."""
         constant_(self.sampling_offsets.weight.data, 0.)
         thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
         grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
@@ -232,7 +250,10 @@ class MSDeformAttn(nn.Module):
     def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
         """
+        Perform forward pass for multi-scale deformable attention.
         https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
         Args:
             query (torch.Tensor): [bs, query_length, C]
             refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
@@ -272,24 +293,27 @@ class MSDeformAttn(nn.Module):
 class DeformableTransformerDecoderLayer(nn.Module):
     """
+    Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
     https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
     https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
     """
     def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4):
+        """Initialize the DeformableTransformerDecoderLayer with the given parameters."""
         super().__init__()
-        # self attention
+        # Self attention
         self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
         self.dropout1 = nn.Dropout(dropout)
         self.norm1 = nn.LayerNorm(d_model)
-        # cross attention
+        # Cross attention
         self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
         self.dropout2 = nn.Dropout(dropout)
         self.norm2 = nn.LayerNorm(d_model)
-        # ffn
+        # FFN
         self.linear1 = nn.Linear(d_model, d_ffn)
         self.act = act
         self.dropout3 = nn.Dropout(dropout)
@@ -299,37 +323,44 @@ class DeformableTransformerDecoderLayer(nn.Module):
     @staticmethod
     def with_pos_embed(tensor, pos):
+        """Add positional embeddings to the input tensor, if provided."""
         return tensor if pos is None else tensor + pos
     def forward_ffn(self, tgt):
+        """Perform forward pass through the Feed-Forward Network part of the layer."""
         tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt))))
         tgt = tgt + self.dropout4(tgt2)
         return self.norm3(tgt)
     def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
-        # self attention
+        """Perform the forward pass through the entire decoder layer."""
+        # Self attention
         q = k = self.with_pos_embed(embed, query_pos)
         tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1),
                              attn_mask=attn_mask)[0].transpose(0, 1)
         embed = embed + self.dropout1(tgt)
         embed = self.norm1(embed)
-        # cross attention
+        # Cross attention
         tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes,
                               padding_mask)
         embed = embed + self.dropout2(tgt)
         embed = self.norm2(embed)
-        # ffn
+        # FFN
         return self.forward_ffn(embed)
 class DeformableTransformerDecoder(nn.Module):
     """
+    Implementation of Deformable Transformer Decoder based on PaddleDetection.
     https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
     """
     def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
+        """Initialize the DeformableTransformerDecoder with the given parameters."""
         super().__init__()
         self.layers = _get_clones(decoder_layer, num_layers)
         self.num_layers = num_layers
@@ -347,6 +378,7 @@ class DeformableTransformerDecoder(nn.Module):
             pos_mlp,
             attn_mask=None,
             padding_mask=None):
+        """Perform the forward pass through the entire decoder."""
         output = embed
         dec_bboxes = []
         dec_cls = []

ultralytics/nn/modules/utils.py CHANGED Viewed

@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Module utils
-"""
+"""Module utils."""
 import copy
 import math
@@ -16,15 +14,17 @@ __all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid'
 def _get_clones(module, n):
+    """Create a list of cloned modules from the given module."""
     return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
 def bias_init_with_prob(prior_prob=0.01):
-    """initialize conv/fc bias value according to a given probability value."""
+    """Initialize conv/fc bias value according to a given probability value."""
     return float(-np.log((1 - prior_prob) / prior_prob))  # return bias_init
 def linear_init_(module):
+    """Initialize the weights and biases of a linear module."""
     bound = 1 / math.sqrt(module.weight.shape[0])
     uniform_(module.weight, -bound, bound)
     if hasattr(module, 'bias') and module.bias is not None:
@@ -32,6 +32,7 @@ def linear_init_(module):
 def inverse_sigmoid(x, eps=1e-5):
+    """Calculate the inverse sigmoid function for a tensor."""
     x = x.clamp(min=0, max=1)
     x1 = x.clamp(min=eps)
     x2 = (1 - x).clamp(min=eps)
@@ -43,6 +44,7 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape
                                         attention_weights: torch.Tensor) -> torch.Tensor:
     """
     Multi-scale deformable attention.
     https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
     """

ultralytics/nn/tasks.py CHANGED Viewed

@@ -25,14 +25,11 @@ except ImportError:
 class BaseModel(nn.Module):
-    """
-    The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.
-    """
+    """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
     def forward(self, x, *args, **kwargs):
         """
-        Forward pass of the model on a single scale.
-        Wrapper for `_forward_once` method.
+        Forward pass of the model on a single scale. Wrapper for `_forward_once` method.
         Args:
             x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels.
@@ -93,8 +90,8 @@ class BaseModel(nn.Module):
     def _profile_one_layer(self, m, x, dt):
         """
-        Profile the computation time and FLOPs of a single layer of the model on a given input.
-        Appends the results to the provided list.
+        Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to
+        the provided list.
         Args:
             m (nn.Module): The layer to be profiled.
@@ -158,7 +155,7 @@ class BaseModel(nn.Module):
     def info(self, detailed=False, verbose=True, imgsz=640):
         """
-        Prints model information
+        Prints model information.
         Args:
             detailed (bool): if True, prints out detailed information about the model. Defaults to False
@@ -175,7 +172,7 @@ class BaseModel(nn.Module):
             fn (function): the function to apply to the model
         Returns:
-            A model that is a Detect() object.
+            (BaseModel): An updated BaseModel object.
         """
         self = super()._apply(fn)
         m = self.model[-1]  # Detect()
@@ -202,7 +199,7 @@ class BaseModel(nn.Module):
     def loss(self, batch, preds=None):
         """
-        Compute loss
+        Compute loss.
         Args:
             batch (dict): Batch to compute loss on
@@ -215,6 +212,7 @@ class BaseModel(nn.Module):
         return self.criterion(preds, batch)
     def init_criterion(self):
+        """Initialize the loss criterion for the BaseModel."""
         raise NotImplementedError('compute_loss() needs to be implemented by task heads')
@@ -222,6 +220,7 @@ class DetectionModel(BaseModel):
     """YOLOv8 detection model."""
     def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True):  # model, input channels, number of classes
+        """Initialize the YOLOv8 detection model with the given config and parameters."""
         super().__init__()
         self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg)  # cfg dict
@@ -289,6 +288,7 @@ class DetectionModel(BaseModel):
         return y
     def init_criterion(self):
+        """Initialize the loss criterion for the DetectionModel."""
         return v8DetectionLoss(self)
@@ -300,6 +300,7 @@ class SegmentationModel(DetectionModel):
         super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
     def init_criterion(self):
+        """Initialize the loss criterion for the SegmentationModel."""
         return v8SegmentationLoss(self)
@@ -316,6 +317,7 @@ class PoseModel(DetectionModel):
         super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
     def init_criterion(self):
+        """Initialize the loss criterion for the PoseModel."""
         return v8PoseLoss(self)
@@ -365,22 +367,59 @@ class ClassificationModel(BaseModel):
                     m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
     def init_criterion(self):
-        """Compute the classification loss between predictions and true labels."""
+        """Initialize the loss criterion for the ClassificationModel."""
         return v8ClassificationLoss()
 class RTDETRDetectionModel(DetectionModel):
+    """
+    RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
+    This class is responsible for constructing the RTDETR architecture, defining loss functions, and
+    facilitating both the training and inference processes. RTDETR is an object detection and tracking model
+    that extends from the DetectionModel base class.
+    Attributes:
+        cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
+        ch (int): Number of input channels. Default is 3 (RGB).
+        nc (int, optional): Number of classes for object detection. Default is None.
+        verbose (bool): Specifies if summary statistics are shown during initialization. Default is True.
+    Methods:
+        init_criterion: Initializes the criterion used for loss calculation.
+        loss: Computes and returns the loss during training.
+        predict: Performs a forward pass through the network and returns the output.
+    """
     def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
+        """
+        Initialize the RTDETRDetectionModel.
+        Args:
+            cfg (str): Configuration file name or path.
+            ch (int): Number of input channels.
+            nc (int, optional): Number of classes. Defaults to None.
+            verbose (bool, optional): Print additional information during initialization. Defaults to True.
+        """
         super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
     def init_criterion(self):
-        """Compute the classification loss between predictions and true labels."""
+        """Initialize the loss criterion for the RTDETRDetectionModel."""
         from ultralytics.models.utils.loss import RTDETRDetectionLoss
         return RTDETRDetectionLoss(nc=self.nc, use_vfl=True)
     def loss(self, batch, preds=None):
+        """
+        Compute the loss for the given batch of data.
+        Args:
+            batch (dict): Dictionary containing image and label data.
+            preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
+        Returns:
+            tuple: A tuple containing the total loss and main three losses in a tensor.
+        """
         if not hasattr(self, 'criterion'):
             self.criterion = self.init_criterion()
@@ -417,16 +456,17 @@ class RTDETRDetectionModel(DetectionModel):
     def predict(self, x, profile=False, visualize=False, batch=None, augment=False):
         """
-        Perform a forward pass through the network.
+        Perform a forward pass through the model.
         Args:
-            x (torch.Tensor): The input tensor to the model
-            profile (bool):  Print the computation time of each layer if True, defaults to False.
-            visualize (bool): Save the feature maps of the model if True, defaults to False
-            batch (dict): A dict including gt boxes and labels from dataloader.
+            x (torch.Tensor): The input tensor.
+            profile (bool, optional): If True, profile the computation time for each layer. Defaults to False.
+            visualize (bool, optional): If True, save feature maps for visualization. Defaults to False.
+            batch (dict, optional): Ground truth data for evaluation. Defaults to None.
+            augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
         Returns:
-            (torch.Tensor): The last output of the model.
+            torch.Tensor: Model's output tensor.
         """
         y, dt = [], []  # outputs
         for m in self.model[:-1]:  # except the head part
@@ -708,9 +748,9 @@ def yaml_model_load(path):
 def guess_model_scale(model_path):
     """
-    Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale.
-    The function uses regular expression matching to find the pattern of the model scale in the YAML file name,
-    which is denoted by n, s, m, l, or x. The function returns the size character of the model scale as a string.
+    Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function
+    uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by
+    n, s, m, l, or x. The function returns the size character of the model scale as a string.
     Args:
         model_path (str | Path): The path to the YOLO model's YAML file.

ultralytics/trackers/bot_sort.py CHANGED Viewed

@@ -12,6 +12,33 @@ from .utils.kalman_filter import KalmanFilterXYWH
 class BOTrack(STrack):
+    """
+    An extended version of the STrack class for YOLOv8, adding object tracking features.
+    Attributes:
+        shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack.
+        smooth_feat (np.ndarray): Smoothed feature vector.
+        curr_feat (np.ndarray): Current feature vector.
+        features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`.
+        alpha (float): Smoothing factor for the exponential moving average of features.
+        mean (np.ndarray): The mean state of the Kalman filter.
+        covariance (np.ndarray): The covariance matrix of the Kalman filter.
+    Methods:
+        update_features(feat): Update features vector and smooth it using exponential moving average.
+        predict(): Predicts the mean and covariance using Kalman filter.
+        re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID.
+        update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID.
+        tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`.
+        multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter.
+        convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format.
+        tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`.
+    Usage:
+        bo_track = BOTrack(tlwh, score, cls, feat)
+        bo_track.predict()
+        bo_track.update(new_track, frame_id)
+    """
     shared_kalman = KalmanFilterXYWH()
     def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
@@ -59,9 +86,7 @@ class BOTrack(STrack):
     @property
     def tlwh(self):
-        """Get current position in bounding box format `(top left x, top left y,
-        width, height)`.
-        """
+        """Get current position in bounding box format `(top left x, top left y, width, height)`."""
         if self.mean is None:
             return self._tlwh.copy()
         ret = self.mean[:4].copy()
@@ -90,15 +115,37 @@ class BOTrack(STrack):
     @staticmethod
     def tlwh_to_xywh(tlwh):
-        """Convert bounding box to format `(center x, center y, width,
-        height)`.
-        """
+        """Convert bounding box to format `(center x, center y, width, height)`."""
         ret = np.asarray(tlwh).copy()
         ret[:2] += ret[2:] / 2
         return ret
 class BOTSORT(BYTETracker):
+    """
+    An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm.
+    Attributes:
+        proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
+        appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections.
+        encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled.
+        gmc (GMC): An instance of the GMC algorithm for data association.
+        args (object): Parsed command-line arguments containing tracking parameters.
+    Methods:
+        get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking.
+        init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes.
+        get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID.
+        multi_predict(tracks): Predict and track multiple objects with YOLOv8 model.
+    Usage:
+        bot_sort = BOTSORT(args, frame_rate)
+        bot_sort.init_track(dets, scores, cls, img)
+        bot_sort.multi_predict(tracks)
+    Note:
+        The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args.
+    """
     def __init__(self, args, frame_rate=30):
         """Initialize YOLOv8 object with ReID module and GMC algorithm."""

ultralytics 8.0.194__py3-none-any.whl → 8.0.196__py3-none-any.whl

Potentially problematic release.

ultralytics 8.0.194py3-none-any.whl → 8.0.196py3-none-any.whl