PyPI - ultralytics - Versions diffs - 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl - Mend

ultralytics 8.3.143py3-none-any.whl → 8.3.144py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +11 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +39 -39
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +187 -157
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +6 -3
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +15 -7
ultralytics/solutions/object_cropper.py +3 -2
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +184 -75
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +42 -28
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
ultralytics-8.3.144.dist-info/RECORD +272 -0
ultralytics-8.3.143.dist-info/RECORD +0 -272
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0

ultralytics/nn/modules/transformer.py CHANGED Viewed

@@ -2,6 +2,7 @@
 """Transformer modules."""
 import math
+from typing import List, Optional
 import torch
 import torch.nn as nn
@@ -27,7 +28,10 @@ __all__ = (
 class TransformerEncoderLayer(nn.Module):
     """
-    Defines a single layer of the transformer encoder.
+    A single layer of the transformer encoder.
+    This class implements a standard transformer encoder layer with multi-head attention and feedforward network,
+    supporting both pre-normalization and post-normalization configurations.
     Attributes:
         ma (nn.MultiheadAttention): Multi-head attention module.
@@ -42,7 +46,15 @@ class TransformerEncoderLayer(nn.Module):
         normalize_before (bool): Whether to apply normalization before attention and feedforward.
     """
-    def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
+    def __init__(
+        self,
+        c1: int,
+        cm: int = 2048,
+        num_heads: int = 8,
+        dropout: float = 0.0,
+        act: nn.Module = nn.GELU(),
+        normalize_before: bool = False,
+    ):
         """
         Initialize the TransformerEncoderLayer with specified parameters.
@@ -76,11 +88,17 @@ class TransformerEncoderLayer(nn.Module):
         self.normalize_before = normalize_before
     @staticmethod
-    def with_pos_embed(tensor, pos=None):
+    def with_pos_embed(tensor: torch.Tensor, pos: Optional[torch.Tensor] = None) -> torch.Tensor:
         """Add position embeddings to the tensor if provided."""
         return tensor if pos is None else tensor + pos
-    def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+    def forward_post(
+        self,
+        src: torch.Tensor,
+        src_mask: Optional[torch.Tensor] = None,
+        src_key_padding_mask: Optional[torch.Tensor] = None,
+        pos: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         """
         Perform forward pass with post-normalization.
@@ -101,7 +119,13 @@ class TransformerEncoderLayer(nn.Module):
         src = src + self.dropout2(src2)
         return self.norm2(src)
-    def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+    def forward_pre(
+        self,
+        src: torch.Tensor,
+        src_mask: Optional[torch.Tensor] = None,
+        src_key_padding_mask: Optional[torch.Tensor] = None,
+        pos: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         """
         Perform forward pass with pre-normalization.
@@ -122,9 +146,15 @@ class TransformerEncoderLayer(nn.Module):
         src2 = self.fc2(self.dropout(self.act(self.fc1(src2))))
         return src + self.dropout2(src2)
-    def forward(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+    def forward(
+        self,
+        src: torch.Tensor,
+        src_mask: Optional[torch.Tensor] = None,
+        src_key_padding_mask: Optional[torch.Tensor] = None,
+        pos: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         """
-        Forward propagates the input through the encoder module.
+        Forward propagate the input through the encoder module.
         Args:
             src (torch.Tensor): Input tensor.
@@ -142,12 +172,21 @@ class TransformerEncoderLayer(nn.Module):
 class AIFI(TransformerEncoderLayer):
     """
-    Defines the AIFI transformer layer.
+    AIFI transformer layer for 2D data with positional embeddings.
-    This class extends TransformerEncoderLayer to work with 2D data by adding positional embeddings.
+    This class extends TransformerEncoderLayer to work with 2D feature maps by adding 2D sine-cosine positional
+    embeddings and handling the spatial dimensions appropriately.
     """
-    def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
+    def __init__(
+        self,
+        c1: int,
+        cm: int = 2048,
+        num_heads: int = 8,
+        dropout: float = 0,
+        act: nn.Module = nn.GELU(),
+        normalize_before: bool = False,
+    ):
         """
         Initialize the AIFI instance with specified parameters.
@@ -161,7 +200,7 @@ class AIFI(TransformerEncoderLayer):
         """
         super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Forward pass for the AIFI transformer layer.
@@ -178,7 +217,9 @@ class AIFI(TransformerEncoderLayer):
         return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
     @staticmethod
-    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
+    def build_2d_sincos_position_embedding(
+        w: int, h: int, embed_dim: int = 256, temperature: float = 10000.0
+    ) -> torch.Tensor:
         """
         Build 2D sine-cosine position embedding.
@@ -208,7 +249,7 @@ class AIFI(TransformerEncoderLayer):
 class TransformerLayer(nn.Module):
     """Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
-    def __init__(self, c, num_heads):
+    def __init__(self, c: int, num_heads: int):
         """
         Initialize a self-attention mechanism using linear transformations and multi-head attention.
@@ -224,7 +265,7 @@ class TransformerLayer(nn.Module):
         self.fc1 = nn.Linear(c, c, bias=False)
         self.fc2 = nn.Linear(c, c, bias=False)
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Apply a transformer block to the input x and return the output.
@@ -240,7 +281,10 @@ class TransformerLayer(nn.Module):
 class TransformerBlock(nn.Module):
     """
-    Vision Transformer https://arxiv.org/abs/2010.11929.
+    Vision Transformer block based on https://arxiv.org/abs/2010.11929.
+    This class implements a complete transformer block with optional convolution layer for channel adjustment,
+    learnable position embedding, and multiple transformer layers.
     Attributes:
         conv (Conv, optional): Convolution layer if input and output channels differ.
@@ -249,7 +293,7 @@ class TransformerBlock(nn.Module):
         c2 (int): Output channel dimension.
     """
-    def __init__(self, c1, c2, num_heads, num_layers):
+    def __init__(self, c1: int, c2: int, num_heads: int, num_layers: int):
         """
         Initialize a Transformer module with position embedding and specified number of heads and layers.
@@ -267,9 +311,9 @@ class TransformerBlock(nn.Module):
         self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
         self.c2 = c2
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
-        Forward propagates the input through the bottleneck module.
+        Forward propagate the input through the transformer block.
         Args:
             x (torch.Tensor): Input tensor with shape [b, c1, w, h].
@@ -285,9 +329,9 @@ class TransformerBlock(nn.Module):
 class MLPBlock(nn.Module):
-    """Implements a single block of a multi-layer perceptron."""
+    """A single block of a multi-layer perceptron."""
-    def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
+    def __init__(self, embedding_dim: int, mlp_dim: int, act=nn.GELU):
         """
         Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.
@@ -316,7 +360,10 @@ class MLPBlock(nn.Module):
 class MLP(nn.Module):
     """
-    Implements a simple multi-layer perceptron (also called FFN).
+    A simple multi-layer perceptron (also called FFN).
+    This class implements a configurable MLP with multiple linear layers, activation functions, and optional
+    sigmoid output activation.
     Attributes:
         num_layers (int): Number of layers in the MLP.
@@ -325,7 +372,9 @@ class MLP(nn.Module):
         act (nn.Module): Activation function.
     """
-    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act=nn.ReLU, sigmoid=False):
+    def __init__(
+        self, input_dim: int, hidden_dim: int, output_dim: int, num_layers: int, act=nn.ReLU, sigmoid: bool = False
+    ):
         """
         Initialize the MLP with specified input, hidden, output dimensions and number of layers.
@@ -344,7 +393,7 @@ class MLP(nn.Module):
         self.sigmoid = sigmoid
         self.act = act()
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Forward pass for the entire MLP.
@@ -363,18 +412,20 @@ class LayerNorm2d(nn.Module):
     """
     2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
-    Original implementations in
-    https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
-    and
-    https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py.
+    This class implements layer normalization for 2D feature maps, normalizing across the channel dimension
+    while preserving spatial dimensions.
     Attributes:
         weight (nn.Parameter): Learnable scale parameter.
         bias (nn.Parameter): Learnable bias parameter.
         eps (float): Small constant for numerical stability.
+    References:
+        https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
+        https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py
     """
-    def __init__(self, num_channels, eps=1e-6):
+    def __init__(self, num_channels: int, eps: float = 1e-6):
         """
         Initialize LayerNorm2d with the given parameters.
@@ -387,7 +438,7 @@ class LayerNorm2d(nn.Module):
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Perform forward pass for 2D layer normalization.
@@ -407,7 +458,8 @@ class MSDeformAttn(nn.Module):
     """
     Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
-    https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
+    This module implements multiscale deformable attention that can attend to features at multiple scales
+    with learnable sampling locations and attention weights.
     Attributes:
         im2col_step (int): Step size for im2col operations.
@@ -419,9 +471,12 @@ class MSDeformAttn(nn.Module):
         attention_weights (nn.Linear): Linear layer for generating attention weights.
         value_proj (nn.Linear): Linear layer for projecting values.
         output_proj (nn.Linear): Linear layer for projecting output.
+    References:
+        https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
     """
-    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+    def __init__(self, d_model: int = 256, n_levels: int = 4, n_heads: int = 8, n_points: int = 4):
         """
         Initialize MSDeformAttn with the given parameters.
@@ -473,23 +528,31 @@ class MSDeformAttn(nn.Module):
         xavier_uniform_(self.output_proj.weight.data)
         constant_(self.output_proj.bias.data, 0.0)
-    def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
+    def forward(
+        self,
+        query: torch.Tensor,
+        refer_bbox: torch.Tensor,
+        value: torch.Tensor,
+        value_shapes: List,
+        value_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         """
         Perform forward pass for multiscale deformable attention.
-        https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
         Args:
-            query (torch.Tensor): Tensor with shape [bs, query_length, C].
-            refer_bbox (torch.Tensor): Tensor with shape [bs, query_length, n_levels, 2], range in [0, 1],
-                top-left (0,0), bottom-right (1, 1), including padding area.
-            value (torch.Tensor): Tensor with shape [bs, value_length, C].
+            query (torch.Tensor): Query tensor with shape [bs, query_length, C].
+            refer_bbox (torch.Tensor): Reference bounding boxes with shape [bs, query_length, n_levels, 2],
+                range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area.
+            value (torch.Tensor): Value tensor with shape [bs, value_length, C].
             value_shapes (list): List with shape [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})].
-            value_mask (torch.Tensor, optional): Tensor with shape [bs, value_length], True for non-padding elements,
-                False for padding elements.
+            value_mask (torch.Tensor, optional): Mask tensor with shape [bs, value_length], True for non-padding
+                elements, False for padding elements.
         Returns:
             (torch.Tensor): Output tensor with shape [bs, Length_{query}, C].
+        References:
+            https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
         """
         bs, len_q = query.shape[:2]
         len_v = value.shape[1]
@@ -521,8 +584,8 @@ class DeformableTransformerDecoderLayer(nn.Module):
     """
     Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
-    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
-    https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
+    This class implements a single decoder layer with self-attention, cross-attention using multiscale deformable
+    attention, and a feedforward network.
     Attributes:
         self_attn (nn.MultiheadAttention): Self-attention module.
@@ -537,9 +600,22 @@ class DeformableTransformerDecoderLayer(nn.Module):
         linear2 (nn.Linear): Second linear layer in the feedforward network.
         dropout4 (nn.Dropout): Dropout after the feedforward network.
         norm3 (nn.LayerNorm): Layer normalization after the feedforward network.
+    References:
+        https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
+        https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
     """
-    def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0.0, act=nn.ReLU(), n_levels=4, n_points=4):
+    def __init__(
+        self,
+        d_model: int = 256,
+        n_heads: int = 8,
+        d_ffn: int = 1024,
+        dropout: float = 0.0,
+        act: nn.Module = nn.ReLU(),
+        n_levels: int = 4,
+        n_points: int = 4,
+    ):
         """
         Initialize the DeformableTransformerDecoderLayer with the given parameters.
@@ -573,11 +649,11 @@ class DeformableTransformerDecoderLayer(nn.Module):
         self.norm3 = nn.LayerNorm(d_model)
     @staticmethod
-    def with_pos_embed(tensor, pos):
+    def with_pos_embed(tensor: torch.Tensor, pos: Optional[torch.Tensor]) -> torch.Tensor:
         """Add positional embeddings to the input tensor, if provided."""
         return tensor if pos is None else tensor + pos
-    def forward_ffn(self, tgt):
+    def forward_ffn(self, tgt: torch.Tensor) -> torch.Tensor:
         """
         Perform forward pass through the Feed-Forward Network part of the layer.
@@ -591,7 +667,16 @@ class DeformableTransformerDecoderLayer(nn.Module):
         tgt = tgt + self.dropout4(tgt2)
         return self.norm3(tgt)
-    def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
+    def forward(
+        self,
+        embed: torch.Tensor,
+        refer_bbox: torch.Tensor,
+        feats: torch.Tensor,
+        shapes: List,
+        padding_mask: Optional[torch.Tensor] = None,
+        attn_mask: Optional[torch.Tensor] = None,
+        query_pos: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         """
         Perform the forward pass through the entire decoder layer.
@@ -628,18 +713,22 @@ class DeformableTransformerDecoderLayer(nn.Module):
 class DeformableTransformerDecoder(nn.Module):
     """
-    Implementation of Deformable Transformer Decoder based on PaddleDetection.
+    Deformable Transformer Decoder based on PaddleDetection implementation.
-    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
+    This class implements a complete deformable transformer decoder with multiple decoder layers and prediction
+    heads for bounding box regression and classification.
     Attributes:
         layers (nn.ModuleList): List of decoder layers.
         num_layers (int): Number of decoder layers.
         hidden_dim (int): Hidden dimension.
         eval_idx (int): Index of the layer to use during evaluation.
+    References:
+        https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
     """
-    def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
+    def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1):
         """
         Initialize the DeformableTransformerDecoder with the given parameters.
@@ -657,15 +746,15 @@ class DeformableTransformerDecoder(nn.Module):
     def forward(
         self,
-        embed,  # decoder embeddings
-        refer_bbox,  # anchor
-        feats,  # image features
-        shapes,  # feature shapes
-        bbox_head,
-        score_head,
-        pos_mlp,
-        attn_mask=None,
-        padding_mask=None,
+        embed: torch.Tensor,  # decoder embeddings
+        refer_bbox: torch.Tensor,  # anchor
+        feats: torch.Tensor,  # image features
+        shapes: List,  # feature shapes
+        bbox_head: nn.Module,
+        score_head: nn.Module,
+        pos_mlp: nn.Module,
+        attn_mask: Optional[torch.Tensor] = None,
+        padding_mask: Optional[torch.Tensor] = None,
     ):
         """
         Perform the forward pass through the entire decoder.

ultralytics 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl

ultralytics 8.3.143py3-none-any.whl → 8.3.144py3-none-any.whl