PyPI - ultralytics - Versions diffs - 8.3.89__py3-none-any.whl → 8.3.90__py3-none-any.whl - Mend

ultralytics 8.3.89py3-none-any.whl → 8.3.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

tests/conftest.py +2 -2
tests/test_cli.py +13 -11
tests/test_cuda.py +10 -1
tests/test_integrations.py +1 -5
tests/test_python.py +16 -16
tests/test_solutions.py +9 -9
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +3 -1
ultralytics/cfg/models/11/yolo11-cls.yaml +5 -5
ultralytics/cfg/models/11/yolo11-obb.yaml +5 -5
ultralytics/cfg/models/11/yolo11-pose.yaml +5 -5
ultralytics/cfg/models/11/yolo11-seg.yaml +5 -5
ultralytics/cfg/models/11/yolo11.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-world.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -5
ultralytics/cfg/models/v8/yolov8.yaml +5 -5
ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
ultralytics/cfg/models/v9/yolov9e-seg.yaml +1 -1
ultralytics/cfg/models/v9/yolov9e.yaml +1 -1
ultralytics/cfg/models/v9/yolov9m.yaml +1 -1
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/cfg/models/v9/yolov9t.yaml +1 -1
ultralytics/data/annotator.py +9 -14
ultralytics/data/base.py +118 -30
ultralytics/data/build.py +63 -24
ultralytics/data/converter.py +5 -5
ultralytics/data/dataset.py +207 -53
ultralytics/data/loaders.py +1 -0
ultralytics/data/split_dota.py +39 -12
ultralytics/data/utils.py +13 -19
ultralytics/engine/exporter.py +19 -17
ultralytics/engine/model.py +67 -88
ultralytics/engine/predictor.py +106 -21
ultralytics/engine/trainer.py +32 -23
ultralytics/engine/tuner.py +21 -18
ultralytics/engine/validator.py +75 -41
ultralytics/hub/__init__.py +12 -13
ultralytics/hub/auth.py +9 -12
ultralytics/hub/session.py +76 -21
ultralytics/hub/utils.py +19 -17
ultralytics/models/fastsam/model.py +20 -11
ultralytics/models/fastsam/predict.py +36 -16
ultralytics/models/fastsam/utils.py +5 -5
ultralytics/models/fastsam/val.py +6 -6
ultralytics/models/nas/model.py +22 -11
ultralytics/models/nas/predict.py +9 -4
ultralytics/models/nas/val.py +5 -5
ultralytics/models/rtdetr/model.py +20 -11
ultralytics/models/rtdetr/predict.py +18 -15
ultralytics/models/rtdetr/train.py +20 -16
ultralytics/models/rtdetr/val.py +42 -6
ultralytics/models/sam/__init__.py +1 -1
ultralytics/models/sam/amg.py +50 -4
ultralytics/models/sam/model.py +8 -14
ultralytics/models/sam/modules/decoders.py +18 -21
ultralytics/models/sam/modules/encoders.py +25 -46
ultralytics/models/sam/modules/memory_attention.py +19 -15
ultralytics/models/sam/modules/sam.py +18 -25
ultralytics/models/sam/modules/tiny_encoder.py +19 -29
ultralytics/models/sam/modules/transformer.py +35 -57
ultralytics/models/sam/modules/utils.py +15 -15
ultralytics/models/sam/predict.py +0 -3
ultralytics/models/utils/loss.py +87 -36
ultralytics/models/utils/ops.py +26 -31
ultralytics/models/yolo/classify/predict.py +24 -3
ultralytics/models/yolo/classify/train.py +77 -10
ultralytics/models/yolo/classify/val.py +40 -15
ultralytics/models/yolo/detect/predict.py +23 -10
ultralytics/models/yolo/detect/train.py +85 -15
ultralytics/models/yolo/detect/val.py +145 -21
ultralytics/models/yolo/model.py +1 -2
ultralytics/models/yolo/obb/predict.py +12 -4
ultralytics/models/yolo/obb/train.py +7 -0
ultralytics/models/yolo/obb/val.py +25 -7
ultralytics/models/yolo/pose/predict.py +22 -6
ultralytics/models/yolo/pose/train.py +17 -1
ultralytics/models/yolo/pose/val.py +46 -21
ultralytics/models/yolo/segment/predict.py +22 -8
ultralytics/models/yolo/segment/train.py +6 -0
ultralytics/models/yolo/segment/val.py +100 -14
ultralytics/models/yolo/world/train.py +38 -8
ultralytics/models/yolo/world/train_world.py +39 -10
ultralytics/nn/autobackend.py +28 -14
ultralytics/nn/modules/__init__.py +3 -0
ultralytics/nn/modules/activation.py +12 -3
ultralytics/nn/modules/block.py +587 -84
ultralytics/nn/modules/conv.py +418 -54
ultralytics/nn/modules/head.py +3 -4
ultralytics/nn/modules/transformer.py +320 -34
ultralytics/nn/modules/utils.py +17 -3
ultralytics/nn/tasks.py +221 -69
ultralytics/solutions/ai_gym.py +2 -2
ultralytics/solutions/analytics.py +4 -4
ultralytics/solutions/heatmap.py +4 -4
ultralytics/solutions/instance_segmentation.py +10 -4
ultralytics/solutions/object_blurrer.py +2 -2
ultralytics/solutions/object_counter.py +2 -2
ultralytics/solutions/object_cropper.py +2 -2
ultralytics/solutions/parking_management.py +9 -9
ultralytics/solutions/queue_management.py +1 -1
ultralytics/solutions/region_counter.py +2 -2
ultralytics/solutions/security_alarm.py +7 -7
ultralytics/solutions/solutions.py +7 -4
ultralytics/solutions/speed_estimation.py +2 -2
ultralytics/solutions/streamlit_inference.py +6 -6
ultralytics/solutions/trackzone.py +9 -2
ultralytics/solutions/vision_eye.py +4 -4
ultralytics/trackers/basetrack.py +1 -1
ultralytics/trackers/bot_sort.py +23 -22
ultralytics/trackers/byte_tracker.py +4 -4
ultralytics/trackers/track.py +2 -1
ultralytics/trackers/utils/gmc.py +26 -27
ultralytics/trackers/utils/kalman_filter.py +31 -29
ultralytics/trackers/utils/matching.py +7 -7
ultralytics/utils/__init__.py +32 -27
ultralytics/utils/autobatch.py +5 -5
ultralytics/utils/benchmarks.py +111 -18
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +11 -11
ultralytics/utils/callbacks/comet.py +35 -22
ultralytics/utils/callbacks/dvc.py +11 -10
ultralytics/utils/callbacks/hub.py +8 -8
ultralytics/utils/callbacks/mlflow.py +1 -1
ultralytics/utils/callbacks/neptune.py +12 -10
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +6 -6
ultralytics/utils/callbacks/wb.py +16 -16
ultralytics/utils/checks.py +116 -35
ultralytics/utils/dist.py +15 -2
ultralytics/utils/downloads.py +13 -9
ultralytics/utils/files.py +12 -13
ultralytics/utils/instance.py +112 -45
ultralytics/utils/loss.py +28 -33
ultralytics/utils/metrics.py +246 -181
ultralytics/utils/ops.py +61 -53
ultralytics/utils/patches.py +8 -6
ultralytics/utils/plotting.py +64 -45
ultralytics/utils/tal.py +88 -57
ultralytics/utils/torch_utils.py +181 -33
ultralytics/utils/triton.py +13 -3
ultralytics/utils/tuner.py +8 -16
{ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/METADATA +1 -1
ultralytics-8.3.90.dist-info/RECORD +250 -0
ultralytics-8.3.89.dist-info/RECORD +0 -250
{ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/LICENSE +0 -0
{ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/WHEEL +0 -0
{ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.89.dist-info → ultralytics-8.3.90.dist-info}/top_level.txt +0 -0

ultralytics/models/utils/loss.py CHANGED Viewed

@@ -12,21 +12,22 @@ from .ops import HungarianMatcher
 class DETRLoss(nn.Module):
     """
-    DETR (DEtection TRansformer) Loss class. This class calculates and returns the different loss components for the
-    DETR object detection model. It computes classification loss, bounding box loss, GIoU loss, and optionally auxiliary
-    losses.
+    DETR (DEtection TRansformer) Loss class for calculating various loss components.
+    This class computes classification loss, bounding box loss, GIoU loss, and optionally auxiliary losses for the
+    DETR object detection model.
     Attributes:
-        nc (int): The number of classes.
-        loss_gain (dict): Coefficients for different loss components.
+        nc (int): Number of classes.
+        loss_gain (Dict): Coefficients for different loss components.
         aux_loss (bool): Whether to compute auxiliary losses.
-        use_fl (bool): Use FocalLoss or not.
-        use_vfl (bool): Use VarifocalLoss or not.
-        use_uni_match (bool): Whether to use a fixed layer to assign labels for the auxiliary branch.
-        uni_match_ind (int): The fixed indices of a layer to use if `use_uni_match` is True.
+        use_fl (bool): Whether to use FocalLoss.
+        use_vfl (bool): Whether to use VarifocalLoss.
+        use_uni_match (bool): Whether to use a fixed layer for auxiliary branch label assignment.
+        uni_match_ind (int): Index of fixed layer to use if use_uni_match is True.
         matcher (HungarianMatcher): Object to compute matching cost and indices.
-        fl (FocalLoss or None): Focal Loss object if `use_fl` is True, otherwise None.
-        vfl (VarifocalLoss or None): Varifocal Loss object if `use_vfl` is True, otherwise None.
+        fl (FocalLoss | None): Focal Loss object if use_fl is True, otherwise None.
+        vfl (VarifocalLoss | None): Varifocal Loss object if use_vfl is True, otherwise None.
         device (torch.device): Device on which tensors are stored.
     """
@@ -36,16 +37,16 @@ class DETRLoss(nn.Module):
         """
         Initialize DETR loss function with customizable components and gains.
-        Uses default loss_gain if not provided. Initializes HungarianMatcher with
-        preset cost gains. Supports auxiliary losses and various loss types.
+        Uses default loss_gain if not provided. Initializes HungarianMatcher with preset cost gains. Supports auxiliary
+        losses and various loss types.
         Args:
             nc (int): Number of classes.
-            loss_gain (dict): Coefficients for different loss components.
-            aux_loss (bool): Use auxiliary losses from each decoder layer.
-            use_fl (bool): Use FocalLoss.
-            use_vfl (bool): Use VarifocalLoss.
-            use_uni_match (bool): Use fixed layer for auxiliary branch label assignment.
+            loss_gain (Dict): Coefficients for different loss components.
+            aux_loss (bool): Whether to use auxiliary losses from each decoder layer.
+            use_fl (bool): Whether to use FocalLoss.
+            use_vfl (bool): Whether to use VarifocalLoss.
+            use_uni_match (bool): Whether to use fixed layer for auxiliary branch label assignment.
             uni_match_ind (int): Index of fixed layer for uni_match.
         """
         super().__init__()
@@ -64,7 +65,7 @@ class DETRLoss(nn.Module):
         self.device = None
     def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=""):
-        """Computes the classification loss based on predictions, target values, and ground truth scores."""
+        """Compute classification loss based on predictions, target values, and ground truth scores."""
         # Logits: [b, query, num_classes], gt_class: list[[n, 1]]
         name_class = f"loss_class{postfix}"
         bs, nq = pred_scores.shape[:2]
@@ -86,7 +87,7 @@ class DETRLoss(nn.Module):
         return {name_class: loss_cls.squeeze() * self.loss_gain["class"]}
     def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=""):
-        """Computes bounding box and GIoU losses for predicted and ground truth bounding boxes."""
+        """Compute bounding box and GIoU losses for predicted and ground truth bounding boxes."""
         # Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
         name_bbox = f"loss_bbox{postfix}"
         name_giou = f"loss_giou{postfix}"
@@ -146,7 +147,23 @@ class DETRLoss(nn.Module):
         masks=None,
         gt_mask=None,
     ):
-        """Get auxiliary losses."""
+        """
+        Get auxiliary losses for intermediate decoder layers.
+        Args:
+            pred_bboxes (torch.Tensor): Predicted bounding boxes from auxiliary layers.
+            pred_scores (torch.Tensor): Predicted scores from auxiliary layers.
+            gt_bboxes (torch.Tensor): Ground truth bounding boxes.
+            gt_cls (torch.Tensor): Ground truth classes.
+            gt_groups (List[int]): Number of ground truths per image.
+            match_indices (List[tuple], optional): Pre-computed matching indices.
+            postfix (str): String to append to loss names.
+            masks (torch.Tensor, optional): Predicted masks if using segmentation.
+            gt_mask (torch.Tensor, optional): Ground truth masks if using segmentation.
+        Returns:
+            (Dict): Dictionary of auxiliary losses.
+        """
         # NOTE: loss class, bbox, giou, mask, dice
         loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
         if match_indices is None and self.use_uni_match:
@@ -192,14 +209,32 @@ class DETRLoss(nn.Module):
     @staticmethod
     def _get_index(match_indices):
-        """Returns batch indices, source indices, and destination indices from provided match indices."""
+        """
+        Extract batch indices, source indices, and destination indices from match indices.
+        Args:
+            match_indices (List[tuple]): List of tuples containing matched indices.
+        Returns:
+            (tuple): Tuple containing (batch_idx, src_idx) and dst_idx.
+        """
         batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
         src_idx = torch.cat([src for (src, _) in match_indices])
         dst_idx = torch.cat([dst for (_, dst) in match_indices])
         return (batch_idx, src_idx), dst_idx
     def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
-        """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
+        """
+        Assign predicted bounding boxes to ground truth bounding boxes based on match indices.
+        Args:
+            pred_bboxes (torch.Tensor): Predicted bounding boxes.
+            gt_bboxes (torch.Tensor): Ground truth bounding boxes.
+            match_indices (List[tuple]): List of tuples containing matched indices.
+        Returns:
+            (tuple): Tuple containing assigned predictions and ground truths.
+        """
         pred_assigned = torch.cat(
             [
                 t[i] if len(i) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
@@ -226,7 +261,23 @@ class DETRLoss(nn.Module):
         postfix="",
         match_indices=None,
     ):
-        """Get losses."""
+        """
+        Calculate losses for a single prediction layer.
+        Args:
+            pred_bboxes (torch.Tensor): Predicted bounding boxes.
+            pred_scores (torch.Tensor): Predicted class scores.
+            gt_bboxes (torch.Tensor): Ground truth bounding boxes.
+            gt_cls (torch.Tensor): Ground truth classes.
+            gt_groups (List[int]): Number of ground truths per image.
+            masks (torch.Tensor, optional): Predicted masks if using segmentation.
+            gt_mask (torch.Tensor, optional): Ground truth masks if using segmentation.
+            postfix (str): String to append to loss names.
+            match_indices (List[tuple], optional): Pre-computed matching indices.
+        Returns:
+            (Dict): Dictionary of losses.
+        """
         if match_indices is None:
             match_indices = self.matcher(
                 pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=masks, gt_mask=gt_mask
@@ -256,7 +307,7 @@ class DETRLoss(nn.Module):
         Args:
             pred_bboxes (torch.Tensor): Predicted bounding boxes, shape [l, b, query, 4].
             pred_scores (torch.Tensor): Predicted class scores, shape [l, b, query, num_classes].
-            batch (dict): Batch information containing:
+            batch (Dict): Batch information containing:
                 cls (torch.Tensor): Ground truth classes, shape [num_gts].
                 bboxes (torch.Tensor): Ground truth bounding boxes, shape [num_gts, 4].
                 gt_groups (List[int]): Number of ground truths for each image in the batch.
@@ -264,9 +315,9 @@ class DETRLoss(nn.Module):
             **kwargs (Any): Additional arguments, may include 'match_indices'.
         Returns:
-            (dict): Computed losses, including main and auxiliary (if enabled).
+            (Dict): Computed losses, including main and auxiliary (if enabled).
-        Note:
+        Notes:
             Uses last elements of pred_bboxes and pred_scores for main loss, and the rest for auxiliary losses if
             self.aux_loss is True.
         """
@@ -298,17 +349,17 @@ class RTDETRDetectionLoss(DETRLoss):
     def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
         """
-        Forward pass to compute the detection loss.
+        Forward pass to compute detection loss with optional denoising loss.
         Args:
-            preds (tuple): Predicted bounding boxes and scores.
-            batch (dict): Batch data containing ground truth information.
-            dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
-            dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
-            dn_meta (dict, optional): Metadata for denoising. Default is None.
+            preds (tuple): Tuple containing predicted bounding boxes and scores.
+            batch (Dict): Batch data containing ground truth information.
+            dn_bboxes (torch.Tensor, optional): Denoising bounding boxes.
+            dn_scores (torch.Tensor, optional): Denoising scores.
+            dn_meta (Dict, optional): Metadata for denoising.
         Returns:
-            (dict): Dictionary containing the total loss and, if applicable, the denoising loss.
+            (Dict): Dictionary containing total loss and denoising loss if applicable.
         """
         pred_bboxes, pred_scores = preds
         total_loss = super().forward(pred_bboxes, pred_scores, batch)
@@ -333,12 +384,12 @@ class RTDETRDetectionLoss(DETRLoss):
     @staticmethod
     def get_dn_match_indices(dn_pos_idx, dn_num_group, gt_groups):
         """
-        Get the match indices for denoising.
+        Get match indices for denoising.
         Args:
             dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
             dn_num_group (int): Number of denoising groups.
-            gt_groups (List[int]): List of integers representing the number of ground truths for each image.
+            gt_groups (List[int]): List of integers representing number of ground truths per image.
         Returns:
             (List[tuple]): List of tuples containing matched indices for denoising.

ultralytics/models/utils/ops.py CHANGED Viewed

@@ -18,7 +18,7 @@ class HungarianMatcher(nn.Module):
     function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
     Attributes:
-        cost_gain (dict): Dictionary of cost coefficients: 'class', 'bbox', 'giou', 'mask', and 'dice'.
+        cost_gain (Dict): Dictionary of cost coefficients: 'class', 'bbox', 'giou', 'mask', and 'dice'.
         use_fl (bool): Indicates whether to use Focal Loss for the classification cost calculation.
         with_mask (bool): Indicates whether the model makes mask predictions.
         num_sample_points (int): The number of sample points used in mask cost calculation.
@@ -26,13 +26,12 @@ class HungarianMatcher(nn.Module):
         gamma (float): The gamma factor in Focal Loss calculation.
     Methods:
-        forward(pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None): Computes the
-            assignment between predictions and ground truths for a batch.
-        _cost_mask(bs, num_gts, masks=None, gt_mask=None): Computes the mask cost and dice cost if masks are predicted.
+        forward: Computes the assignment between predictions and ground truths for a batch.
+        _cost_mask: Computes the mask cost and dice cost if masks are predicted.
     """
     def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
-        """Initializes a HungarianMatcher module for optimal assignment of predicted and ground truth bounding boxes."""
+        """Initialize a HungarianMatcher module for optimal assignment of predicted and ground truth bounding boxes."""
         super().__init__()
         if cost_gain is None:
             cost_gain = {"class": 1, "bbox": 5, "giou": 2, "mask": 1, "dice": 1}
@@ -45,24 +44,21 @@ class HungarianMatcher(nn.Module):
     def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
         """
-        Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
-        (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
-        predictions and ground truth based on these costs.
+        Forward pass for HungarianMatcher. Computes costs based on prediction and ground truth and finds the optimal
+        matching between predictions and ground truth based on these costs.
         Args:
-            pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
-            pred_scores (Tensor): Predicted scores with shape [batch_size, num_queries, num_classes].
-            gt_cls (torch.Tensor): Ground truth classes with shape [num_gts, ].
-            gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape [num_gts, 4].
+            pred_bboxes (torch.Tensor): Predicted bounding boxes with shape (batch_size, num_queries, 4).
+            pred_scores (torch.Tensor): Predicted scores with shape (batch_size, num_queries, num_classes).
+            gt_cls (torch.Tensor): Ground truth classes with shape (num_gts, ).
+            gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (num_gts, 4).
             gt_groups (List[int]): List of length equal to batch size, containing the number of ground truths for
                 each image.
-            masks (Tensor, optional): Predicted masks with shape [batch_size, num_queries, height, width].
-                Defaults to None.
-            gt_mask (List[Tensor], optional): List of ground truth masks, each with shape [num_masks, Height, Width].
-                Defaults to None.
+            masks (torch.Tensor, optional): Predicted masks with shape (batch_size, num_queries, height, width).
+            gt_mask (List[torch.Tensor], optional): List of ground truth masks, each with shape (num_masks, Height, Width).
         Returns:
-            (List[Tuple[Tensor, Tensor]]): A list of size batch_size, each element is a tuple (index_i, index_j), where:
+            (List[Tuple[torch.Tensor, torch.Tensor]]): A list of size batch_size, each element is a tuple (index_i, index_j), where:
                 - index_i is the tensor of indices of the selected predictions (in order)
                 - index_j is the tensor of indices of the corresponding selected ground truth targets (in order)
                 For each batch element, it holds:
@@ -74,10 +70,10 @@ class HungarianMatcher(nn.Module):
             return [(torch.tensor([], dtype=torch.long), torch.tensor([], dtype=torch.long)) for _ in range(bs)]
         # We flatten to compute the cost matrices in a batch
-        # [batch_size * num_queries, num_classes]
+        # (batch_size * num_queries, num_classes)
         pred_scores = pred_scores.detach().view(-1, nc)
         pred_scores = F.sigmoid(pred_scores) if self.use_fl else F.softmax(pred_scores, dim=-1)
-        # [batch_size * num_queries, 4]
+        # (batch_size * num_queries, 4)
         pred_bboxes = pred_bboxes.detach().view(-1, 4)
         # Compute the classification cost
@@ -151,26 +147,25 @@ def get_cdn_group(
     batch, num_classes, num_queries, class_embed, num_dn=100, cls_noise_ratio=0.5, box_noise_scale=1.0, training=False
 ):
     """
-    Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
-    and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
-    and returns the modified labels, bounding boxes, attention mask and meta information.
+    Get contrastive denoising training group with positive and negative samples from ground truths.
     Args:
-        batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
-            (torch.Tensor with shape [num_gts, 4]), 'gt_groups' (List(int)) which is a list of batch size length
+        batch (Dict): A dict that includes 'gt_cls' (torch.Tensor with shape (num_gts, )), 'gt_bboxes'
+            (torch.Tensor with shape (num_gts, 4)), 'gt_groups' (List[int]) which is a list of batch size length
             indicating the number of gts of each image.
         num_classes (int): Number of classes.
         num_queries (int): Number of queries.
         class_embed (torch.Tensor): Embedding weights to map class labels to embedding space.
-        num_dn (int, optional): Number of denoising. Defaults to 100.
-        cls_noise_ratio (float, optional): Noise ratio for class labels. Defaults to 0.5.
-        box_noise_scale (float, optional): Noise scale for bounding box coordinates. Defaults to 1.0.
-        training (bool, optional): If it's in training mode. Defaults to False.
+        num_dn (int, optional): Number of denoising queries.
+        cls_noise_ratio (float, optional): Noise ratio for class labels.
+        box_noise_scale (float, optional): Noise scale for bounding box coordinates.
+        training (bool, optional): If it's in training mode.
     Returns:
-        (Tuple[Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Dict]]): The modified class embeddings,
-            bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn'
-            is less than or equal to 0, the function returns None for all elements in the tuple.
+        padding_cls (Optional[torch.Tensor]): The modified class embeddings for denoising.
+        padding_bbox (Optional[torch.Tensor]): The modified bounding boxes for denoising.
+        attn_mask (Optional[torch.Tensor]): The attention mask for denoising.
+        dn_meta (Optional[Dict]): Meta information for denoising.
     """
     if (not training) or num_dn <= 0 or batch is None:
         return None, None, None, None

ultralytics/models/yolo/classify/predict.py CHANGED Viewed

@@ -13,6 +13,17 @@ class ClassificationPredictor(BasePredictor):
     """
     A class extending the BasePredictor class for prediction based on a classification model.
+    This predictor handles the specific requirements of classification models, including preprocessing images
+    and postprocessing predictions to generate classification results.
+    Attributes:
+        args (Dict): Configuration arguments for the predictor.
+        _legacy_transform_name (str): Name of the legacy transform class for backward compatibility.
+    Methods:
+        preprocess: Convert input images to model-compatible format.
+        postprocess: Process model predictions into Results objects.
     Notes:
         - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'.
@@ -25,13 +36,13 @@ class ClassificationPredictor(BasePredictor):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """Initializes ClassificationPredictor setting the task to 'classify'."""
+        """Initialize the ClassificationPredictor with the specified configuration and set task to 'classify'."""
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = "classify"
         self._legacy_transform_name = "ultralytics.yolo.data.augment.ToTensor"
     def preprocess(self, img):
-        """Converts input image to model-compatible data type."""
+        """Convert input images to model-compatible tensor format with appropriate normalization."""
         if not isinstance(img, torch.Tensor):
             is_legacy_transform = any(
                 self._legacy_transform_name in str(transform) for transform in self.transforms.transforms
@@ -46,7 +57,17 @@ class ClassificationPredictor(BasePredictor):
         return img.half() if self.model.fp16 else img.float()  # uint8 to fp16/32
     def postprocess(self, preds, img, orig_imgs):
-        """Post-processes predictions to return Results objects."""
+        """
+        Process predictions to return Results objects with classification probabilities.
+        Args:
+            preds (torch.Tensor): Raw predictions from the model.
+            img (torch.Tensor): Input images after preprocessing.
+            orig_imgs (List[np.ndarray] | torch.Tensor): Original images before preprocessing.
+        Returns:
+            (List[Results]): List of Results objects containing classification results for each image.
+        """
         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)

ultralytics/models/yolo/classify/train.py CHANGED Viewed

@@ -17,8 +17,28 @@ class ClassificationTrainer(BaseTrainer):
     """
     A class extending the BaseTrainer class for training based on a classification model.
-    Notes:
-        - Torchvision classification models can also be passed to the 'model' argument, i.e. model='resnet18'.
+    This trainer handles the training process for image classification tasks, supporting both YOLO classification models
+    and torchvision models.
+    Attributes:
+        model (ClassificationModel): The classification model to be trained.
+        data (Dict): Dictionary containing dataset information including class names and number of classes.
+        loss_names (List[str]): Names of the loss functions used during training.
+        validator (ClassificationValidator): Validator instance for model evaluation.
+    Methods:
+        set_model_attributes: Set the model's class names from the loaded dataset.
+        get_model: Return a modified PyTorch model configured for training.
+        setup_model: Load, create or download model for classification.
+        build_dataset: Create a ClassificationDataset instance.
+        get_dataloader: Return PyTorch DataLoader with transforms for image preprocessing.
+        preprocess_batch: Preprocess a batch of images and classes.
+        progress_string: Return a formatted string showing training progress.
+        get_validator: Return an instance of ClassificationValidator.
+        label_loss_items: Return a loss dict with labelled training loss items.
+        plot_metrics: Plot metrics from a CSV file.
+        final_eval: Evaluate trained model and save validation results.
+        plot_training_samples: Plot training samples with their annotations.
     Examples:
         >>> from ultralytics.models.yolo.classify import ClassificationTrainer
@@ -41,7 +61,17 @@ class ClassificationTrainer(BaseTrainer):
         self.model.names = self.data["names"]
     def get_model(self, cfg=None, weights=None, verbose=True):
-        """Returns a modified PyTorch model configured for training YOLO."""
+        """
+        Return a modified PyTorch model configured for training YOLO.
+        Args:
+            cfg (Any): Model configuration.
+            weights (Any): Pre-trained model weights.
+            verbose (bool): Whether to display model information.
+        Returns:
+            (ClassificationModel): Configured PyTorch model for classification.
+        """
         model = ClassificationModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1)
         if weights:
             model.load(weights)
@@ -56,7 +86,12 @@ class ClassificationTrainer(BaseTrainer):
         return model
     def setup_model(self):
-        """Load, create or download model for any task."""
+        """
+        Load, create or download model for classification tasks.
+        Returns:
+            (Any): Model checkpoint if applicable, otherwise None.
+        """
         import torchvision  # scope for faster 'import ultralytics'
         if str(self.model) in torchvision.models.__dict__:
@@ -70,11 +105,32 @@ class ClassificationTrainer(BaseTrainer):
         return ckpt
     def build_dataset(self, img_path, mode="train", batch=None):
-        """Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
+        """
+        Create a ClassificationDataset instance given an image path and mode.
+        Args:
+            img_path (str): Path to the dataset images.
+            mode (str): Dataset mode ('train', 'val', or 'test').
+            batch (Any): Batch information (unused in this implementation).
+        Returns:
+            (ClassificationDataset): Dataset for the specified mode.
+        """
         return ClassificationDataset(root=img_path, args=self.args, augment=mode == "train", prefix=mode)
     def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
-        """Returns PyTorch DataLoader with transforms to preprocess images for inference."""
+        """
+        Return PyTorch DataLoader with transforms to preprocess images.
+        Args:
+            dataset_path (str): Path to the dataset.
+            batch_size (int): Number of images per batch.
+            rank (int): Process rank for distributed training.
+            mode (str): 'train', 'val', or 'test' mode.
+        Returns:
+            (torch.utils.data.DataLoader): DataLoader for the specified dataset and mode.
+        """
         with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
             dataset = self.build_dataset(dataset_path, mode)
@@ -112,9 +168,14 @@ class ClassificationTrainer(BaseTrainer):
     def label_loss_items(self, loss_items=None, prefix="train"):
         """
-        Returns a loss dict with labelled training loss items tensor.
+        Return a loss dict with labelled training loss items tensor.
-        Not needed for classification but necessary for segmentation & detection
+        Args:
+            loss_items (torch.Tensor, optional): Loss tensor items.
+            prefix (str): Prefix to prepend to loss names.
+        Returns:
+            (Dict[str, float] | List[str]): Dictionary of loss items or list of loss keys if loss_items is None.
         """
         keys = [f"{prefix}/{x}" for x in self.loss_names]
         if loss_items is None:
@@ -123,7 +184,7 @@ class ClassificationTrainer(BaseTrainer):
         return dict(zip(keys, loss_items))
     def plot_metrics(self):
-        """Plots metrics from a CSV file."""
+        """Plot metrics from a CSV file."""
         plot_results(file=self.csv, classify=True, on_plot=self.on_plot)  # save results.png
     def final_eval(self):
@@ -140,7 +201,13 @@ class ClassificationTrainer(BaseTrainer):
                     self.run_callbacks("on_fit_epoch_end")
     def plot_training_samples(self, batch, ni):
-        """Plots training samples with their annotations."""
+        """
+        Plot training samples with their annotations.
+        Args:
+            batch (Dict[str, torch.Tensor]): Batch containing images and class labels.
+            ni (int): Number of iterations.
+        """
         plot_images(
             images=batch["img"],
             batch_idx=torch.arange(len(batch["img"])),

ultralytics 8.3.89__py3-none-any.whl → 8.3.90__py3-none-any.whl

ultralytics 8.3.89py3-none-any.whl → 8.3.90py3-none-any.whl