PyPI - ultralytics - Versions diffs - 8.0.196__py3-none-any.whl → 8.0.198__py3-none-any.whl - Mend

ultralytics 8.0.196py3-none-any.whl → 8.0.198py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (49) hide show

ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +4 -5
ultralytics/data/augment.py +2 -2
ultralytics/data/converter.py +12 -13
ultralytics/data/dataset.py +1 -1
ultralytics/engine/__init__.py +1 -0
ultralytics/engine/exporter.py +1 -1
ultralytics/engine/trainer.py +2 -1
ultralytics/hub/session.py +1 -1
ultralytics/models/fastsam/predict.py +33 -2
ultralytics/models/fastsam/prompt.py +38 -1
ultralytics/models/fastsam/utils.py +5 -5
ultralytics/models/fastsam/val.py +27 -1
ultralytics/models/nas/model.py +20 -0
ultralytics/models/nas/predict.py +23 -0
ultralytics/models/nas/val.py +24 -0
ultralytics/models/rtdetr/val.py +17 -5
ultralytics/models/sam/modules/decoders.py +26 -1
ultralytics/models/sam/modules/encoders.py +31 -3
ultralytics/models/sam/modules/sam.py +22 -7
ultralytics/models/sam/modules/tiny_encoder.py +147 -45
ultralytics/models/sam/modules/transformer.py +47 -2
ultralytics/models/sam/predict.py +19 -2
ultralytics/models/utils/loss.py +20 -2
ultralytics/models/utils/ops.py +5 -5
ultralytics/nn/modules/block.py +33 -10
ultralytics/nn/modules/conv.py +16 -4
ultralytics/nn/modules/head.py +48 -17
ultralytics/nn/modules/transformer.py +2 -2
ultralytics/nn/tasks.py +7 -7
ultralytics/utils/__init__.py +2 -1
ultralytics/utils/benchmarks.py +13 -0
ultralytics/utils/callbacks/mlflow.py +76 -36
ultralytics/utils/callbacks/wb.py +92 -1
ultralytics/utils/checks.py +4 -4
ultralytics/utils/errors.py +12 -0
ultralytics/utils/files.py +1 -1
ultralytics/utils/instance.py +41 -3
ultralytics/utils/loss.py +22 -19
ultralytics/utils/metrics.py +106 -24
ultralytics/utils/tal.py +1 -1
ultralytics/utils/torch_utils.py +4 -2
ultralytics/utils/tuner.py +10 -4
{ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/METADATA +1 -1
{ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/RECORD +49 -49
{ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/LICENSE +0 -0
{ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/WHEEL +0 -0
{ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/entry_points.txt +0 -0
{ultralytics-8.0.196.dist-info → ultralytics-8.0.198.dist-info}/top_level.txt +0 -0

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = '8.0.196'
+__version__ = '8.0.198'
 from ultralytics.models import RTDETR, SAM, YOLO
 from ultralytics.models.fastsam import FastSAM

ultralytics/cfg/__init__.py CHANGED Viewed

@@ -7,9 +7,9 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Dict, List, Union
-from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, ROOT, SETTINGS,
-                               SETTINGS_YAML, TESTS_RUNNING, IterableSimpleNamespace, __version__, checks, colorstr,
-                               deprecation_warn, yaml_load, yaml_print)
+from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, ROOT, RUNS_DIR,
+                               SETTINGS, SETTINGS_YAML, TESTS_RUNNING, IterableSimpleNamespace, __version__, checks,
+                               colorstr, deprecation_warn, yaml_load, yaml_print)
 # Define valid tasks and modes
 MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark'
@@ -153,8 +153,7 @@ def get_save_dir(args, name=None):
     else:
         from ultralytics.utils.files import increment_path
-        project = args.project or (ROOT /
-                                   '../tests/tmp/runs' if TESTS_RUNNING else Path(SETTINGS['runs_dir'])) / args.task
+        project = args.project or (ROOT.parent / 'tests/tmp/runs' if TESTS_RUNNING else RUNS_DIR) / args.task
         name = name or args.name or f'{args.mode}'
         save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)

ultralytics/data/augment.py CHANGED Viewed

@@ -491,7 +491,7 @@ class RandomPerspective:
         border = labels.pop('mosaic_border', self.border)
         self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2  # w, h
         # M is affine matrix
-        # scale for func:`box_candidates`
+        # Scale for func:`box_candidates`
         img, M, scale = self.affine_transform(img, border)
         bboxes = self.apply_bboxes(instances.bboxes, M)
@@ -894,7 +894,7 @@ class Format:
         return labels
     def _format_img(self, img):
-        """Format the image for YOLOv5 from Numpy array to PyTorch tensor."""
+        """Format the image for YOLO from Numpy array to PyTorch tensor."""
         if len(img.shape) < 3:
             img = np.expand_dims(img, -1)
         img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])

ultralytics/data/converter.py CHANGED Viewed

@@ -1,14 +1,14 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import json
-import shutil
 from collections import defaultdict
 from pathlib import Path
 import cv2
 import numpy as np
-from ultralytics.utils import TQDM
+from ultralytics.utils import LOGGER, TQDM
+from ultralytics.utils.files import increment_path
 def coco91_to_coco80_class():
@@ -48,12 +48,12 @@ def coco80_to_coco91_class():  #
 def convert_coco(labels_dir='../coco/annotations/',
-                 save_dir='.',
+                 save_dir='coco_converted/',
                  use_segments=False,
                  use_keypoints=False,
                  cls91to80=True):
     """
-    Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
+    Converts COCO dataset annotations to a YOLO annotation format  suitable for training YOLO models.
     Args:
         labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@@ -74,9 +74,7 @@ def convert_coco(labels_dir='../coco/annotations/',
     """
     # Create dataset directory
-    save_dir = Path(save_dir)
-    if save_dir.exists():
-        shutil.rmtree(save_dir)  # delete dir
+    save_dir = increment_path(save_dir)  # increment if save directory already exists
     for p in save_dir / 'labels', save_dir / 'images':
         p.mkdir(parents=True, exist_ok=True)  # make dir
@@ -147,6 +145,8 @@ def convert_coco(labels_dir='../coco/annotations/',
                                  if use_segments and len(segments[i]) > 0 else bboxes[i]),  # cls, box or segments
                     file.write(('%g ' * len(line)).rstrip() % line + '\n')
+    LOGGER.info(f'COCO data converted successfully.\nResults saved to {save_dir.resolve()}')
 def convert_dota_to_yolo_obb(dota_root_path: str):
     """
@@ -271,26 +271,25 @@ def merge_multi_segment(segments):
     segments = [np.array(i).reshape(-1, 2) for i in segments]
     idx_list = [[] for _ in range(len(segments))]
-    # record the indexes with min distance between each segment
+    # Record the indexes with min distance between each segment
     for i in range(1, len(segments)):
         idx1, idx2 = min_index(segments[i - 1], segments[i])
         idx_list[i - 1].append(idx1)
         idx_list[i].append(idx2)
-    # use two round to connect all the segments
+    # Use two round to connect all the segments
     for k in range(2):
-        # forward connection
+        # Forward connection
         if k == 0:
             for i, idx in enumerate(idx_list):
-                # middle segments have two indexes
-                # reverse the index of middle segments
+                # Middle segments have two indexes, reverse the index of middle segments
                 if len(idx) == 2 and idx[0] > idx[1]:
                     idx = idx[::-1]
                     segments[i] = segments[i][::-1, :]
                 segments[i] = np.roll(segments[i], -idx[0], axis=0)
                 segments[i] = np.concatenate([segments[i], segments[i][:1]])
-                # deal with the first segment and the last one
+                # Deal with the first segment and the last one
                 if i in [0, len(idx_list) - 1]:
                     s.append(segments[i])
                 else:

ultralytics/data/dataset.py CHANGED Viewed

@@ -162,7 +162,7 @@ class YOLODataset(BaseDataset):
     def update_labels_info(self, label):
         """Custom your label format here."""
         # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
-        # we can make it also support classification and semantic segmentation by add or remove some dict keys there.
+        # We can make it also support classification and semantic segmentation by add or remove some dict keys there.
         bboxes = label.pop('bboxes')
         segments = label.pop('segments')
         keypoints = label.pop('keypoints', None)

ultralytics/engine/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ # Ultralytics YOLO 🚀, AGPL-3.0 license

ultralytics/engine/exporter.py CHANGED Viewed

@@ -140,7 +140,7 @@ class Exporter:
         Args:
             cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
             overrides (dict, optional): Configuration overrides. Defaults to None.
-            _callbacks (list, optional): List of callback functions. Defaults to None.
+            _callbacks (dict, optional): Dictionary of callback functions. Defaults to None.
         """
         self.args = get_cfg(cfg, overrides)
         if self.args.format.lower() in ('coreml', 'mlmodel'):  # fix attempt for protobuf<3.20.x errors

ultralytics/engine/trainer.py CHANGED Viewed

@@ -91,6 +91,7 @@ class BaseTrainer:
         # Dirs
         self.save_dir = get_save_dir(self.args)
+        self.args.name = self.save_dir.name  # update name for loggers
         self.wdir = self.save_dir / 'weights'  # weights dir
         if RANK in (-1, 0):
             self.wdir.mkdir(parents=True, exist_ok=True)  # make dir
@@ -526,7 +527,7 @@ class BaseTrainer:
     # TODO: may need to put these following functions into callback
     def plot_training_samples(self, batch, ni):
-        """Plots training samples during YOLOv5 training."""
+        """Plots training samples during YOLO training."""
         pass
     def plot_training_labels(self):

ultralytics/hub/session.py CHANGED Viewed

@@ -23,7 +23,7 @@ class HUBTrainingSession:
     Attributes:
         agent_id (str): Identifier for the instance communicating with the server.
-        model_id (str): Identifier for the YOLOv5 model being trained.
+        model_id (str): Identifier for the YOLO model being trained.
         model_url (str): URL for the model in Ultralytics HUB.
         api_url (str): API URL for the model in Ultralytics HUB.
         auth_header (dict): Authentication header for the Ultralytics HUB API requests.

ultralytics/models/fastsam/predict.py CHANGED Viewed

@@ -9,14 +9,45 @@ from ultralytics.utils import DEFAULT_CFG, ops
 class FastSAMPredictor(DetectionPredictor):
+    """
+    FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks in Ultralytics
+    YOLO framework.
+    This class extends the DetectionPredictor, customizing the prediction pipeline specifically for fast SAM.
+    It adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing
+    for single-class segmentation.
+    Attributes:
+        cfg (dict): Configuration parameters for prediction.
+        overrides (dict, optional): Optional parameter overrides for custom behavior.
+        _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
+    """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """Initializes FastSAMPredictor class by inheriting from DetectionPredictor and setting task to 'segment'."""
+        """
+        Initializes the FastSAMPredictor class, inheriting from DetectionPredictor and setting the task to 'segment'.
+        Args:
+            cfg (dict): Configuration parameters for prediction.
+            overrides (dict, optional): Optional parameter overrides for custom behavior.
+            _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
+        """
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = 'segment'
     def postprocess(self, preds, img, orig_imgs):
-        """Postprocesses the predictions, applies non-max suppression, scales the boxes, and returns the results."""
+        """
+        Perform post-processing steps on predictions, including non-max suppression and scaling boxes to original image
+        size, and returns the final results.
+        Args:
+            preds (list): The raw output predictions from the model.
+            img (torch.Tensor): The processed image tensor.
+            orig_imgs (list | torch.Tensor): The original image or list of images.
+        Returns:
+            (list): A list of Results objects, each containing processed boxes, masks, and other metadata.
+        """
         p = ops.non_max_suppression(
             preds[0],
             self.args.conf,

ultralytics/models/fastsam/prompt.py CHANGED Viewed

@@ -13,6 +13,15 @@ from ultralytics.utils import TQDM
 class FastSAMPrompt:
+    """
+    Fast Segment Anything Model class for image annotation and visualization.
+    Attributes:
+        device (str): Computing device ('cuda' or 'cpu').
+        results: Object detection or segmentation results.
+        source: Source image or image path.
+        clip: CLIP model for linear assignment.
+    """
     def __init__(self, source, results, device='cuda') -> None:
         """Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment."""
@@ -92,12 +101,26 @@ class FastSAMPrompt:
              better_quality=True,
              retina=False,
              with_contours=True):
+        """
+        Plots annotations, bounding boxes, and points on images and saves the output.
+        Args:
+            annotations (list): Annotations to be plotted.
+            output (str or Path): Output directory for saving the plots.
+            bbox (list, optional): Bounding box coordinates [x1, y1, x2, y2]. Defaults to None.
+            points (list, optional): Points to be plotted. Defaults to None.
+            point_label (list, optional): Labels for the points. Defaults to None.
+            mask_random_color (bool, optional): Whether to use random color for masks. Defaults to True.
+            better_quality (bool, optional): Whether to apply morphological transformations for better mask quality. Defaults to True.
+            retina (bool, optional): Whether to use retina mask. Defaults to False.
+            with_contours (bool, optional): Whether to plot contours. Defaults to True.
+        """
         pbar = TQDM(annotations, total=len(annotations))
         for ann in pbar:
             result_name = os.path.basename(ann.path)
             image = ann.orig_img[..., ::-1]  # BGR to RGB
             original_h, original_w = ann.orig_shape
-            # for macOS only
+            # For macOS only
             # plt.switch_backend('TkAgg')
             plt.figure(figsize=(original_w / 100, original_h / 100))
             # Add subplot with no margin.
@@ -160,6 +183,20 @@ class FastSAMPrompt:
         target_height=960,
         target_width=960,
     ):
+        """
+        Quickly shows the mask annotations on the given matplotlib axis.
+        Args:
+            annotation (array-like): Mask annotation.
+            ax (matplotlib.axes.Axes): Matplotlib axis.
+            random_color (bool, optional): Whether to use random color for masks. Defaults to False.
+            bbox (list, optional): Bounding box coordinates [x1, y1, x2, y2]. Defaults to None.
+            points (list, optional): Points to be plotted. Defaults to None.
+            pointlabel (list, optional): Labels for the points. Defaults to None.
+            retinamask (bool, optional): Whether to use retina mask. Defaults to True.
+            target_height (int, optional): Target height for resizing. Defaults to 960.
+            target_width (int, optional): Target width for resizing. Defaults to 960.
+        """
         n, h, w = annotation.shape  # batch, height, width
         areas = np.sum(annotation, axis=(1, 2))

ultralytics/models/fastsam/utils.py CHANGED Viewed

@@ -42,23 +42,23 @@ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=Fals
         high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
     """
     boxes = adjust_bboxes_to_image_border(boxes, image_shape)
-    # obtain coordinates for intersections
+    # Obtain coordinates for intersections
     x1 = torch.max(box1[0], boxes[:, 0])
     y1 = torch.max(box1[1], boxes[:, 1])
     x2 = torch.min(box1[2], boxes[:, 2])
     y2 = torch.min(box1[3], boxes[:, 3])
-    # compute the area of intersection
+    # Compute the area of intersection
     intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
-    # compute the area of both individual boxes
+    # Compute the area of both individual boxes
     box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
     box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-    # compute the area of union
+    # Compute the area of union
     union = box1_area + box2_area - intersection
-    # compute the IoU
+    # Compute the IoU
     iou = intersection / union  # Should be shape (n, )
     if raw_output:
         return 0 if iou.numel() == 0 else iou

ultralytics/models/fastsam/val.py CHANGED Viewed

@@ -5,9 +5,35 @@ from ultralytics.utils.metrics import SegmentMetrics
 class FastSAMValidator(SegmentationValidator):
+    """
+    Custom validation class for fast SAM (Segment Anything Model) segmentation in Ultralytics YOLO framework.
+    Extends the SegmentationValidator class, customizing the validation process specifically for fast SAM. This class
+    sets the task to 'segment' and uses the SegmentMetrics for evaluation. Additionally, plotting features are disabled
+    to avoid errors during validation.
+    Attributes:
+        dataloader: The data loader object used for validation.
+        save_dir (str): The directory where validation results will be saved.
+        pbar: A progress bar object.
+        args: Additional arguments for customization.
+        _callbacks: List of callback functions to be invoked during validation.
+    """
     def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
-        """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics."""
+        """
+        Initialize the FastSAMValidator class, setting the task to 'segment' and metrics to SegmentMetrics.
+        Args:
+            dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation.
+            save_dir (Path, optional): Directory to save results.
+            pbar (tqdm.tqdm): Progress bar for displaying progress.
+            args (SimpleNamespace): Configuration for the validator.
+            _callbacks (dict): Dictionary to store various callback functions.
+        Notes:
+            Plots for ConfusionMatrix and other related metrics are disabled in this class to avoid errors.
+        """
         super().__init__(dataloader, save_dir, pbar, args, _callbacks)
         self.args.task = 'segment'
         self.args.plots = False  # disable ConfusionMatrix and other plots to avoid errors

ultralytics/models/nas/model.py CHANGED Viewed

@@ -23,6 +23,26 @@ from .val import NASValidator
 class NAS(Model):
+    """
+    YOLO NAS model for object detection.
+    This class provides an interface for the YOLO-NAS models and extends the `Model` class from Ultralytics engine.
+    It is designed to facilitate the task of object detection using pre-trained or custom-trained YOLO-NAS models.
+    Example:
+        ```python
+        from ultralytics import NAS
+        model = NAS('yolo_nas_s')
+        results = model.predict('ultralytics/assets/bus.jpg')
+        ```
+    Attributes:
+        model (str): Path to the pre-trained model or model name. Defaults to 'yolo_nas_s.pt'.
+    Note:
+        YOLO-NAS models only support pre-trained models. Do not provide YAML configuration files.
+    """
     def __init__(self, model='yolo_nas_s.pt') -> None:
         """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""

ultralytics/models/nas/predict.py CHANGED Viewed

@@ -8,6 +8,29 @@ from ultralytics.utils import ops
 class NASPredictor(BasePredictor):
+    """
+    Ultralytics YOLO NAS Predictor for object detection.
+    This class extends the `BasePredictor` from Ultralytics engine and is responsible for post-processing the
+    raw predictions generated by the YOLO NAS models. It applies operations like non-maximum suppression and
+    scaling the bounding boxes to fit the original image dimensions.
+    Attributes:
+        args (Namespace): Namespace containing various configurations for post-processing.
+    Example:
+        ```python
+        from ultralytics import NAS
+        model = NAS('yolo_nas_s')
+        predictor = model.predictor
+        # Assumes that raw_preds, img, orig_imgs are available
+        results = predictor.postprocess(raw_preds, img, orig_imgs)
+        ```
+    Note:
+        Typically, this class is not instantiated directly. It is used internally within the `NAS` class.
+    """
     def postprocess(self, preds_in, img, orig_imgs):
         """Postprocess predictions and returns a list of Results objects."""

ultralytics/models/nas/val.py CHANGED Viewed

@@ -9,6 +9,30 @@ __all__ = ['NASValidator']
 class NASValidator(DetectionValidator):
+    """
+    Ultralytics YOLO NAS Validator for object detection.
+    Extends `DetectionValidator` from the Ultralytics models package and is designed to post-process the raw predictions
+    generated by YOLO NAS models. It performs non-maximum suppression to remove overlapping and low-confidence boxes,
+    ultimately producing the final detections.
+    Attributes:
+        args (Namespace): Namespace containing various configurations for post-processing, such as confidence and IoU thresholds.
+        lb (torch.Tensor): Optional tensor for multilabel NMS.
+    Example:
+        ```python
+        from ultralytics import NAS
+        model = NAS('yolo_nas_s')
+        validator = model.validator
+        # Assumes that raw_preds are available
+        final_preds = validator.postprocess(raw_preds)
+        ```
+    Note:
+        This class is generally not instantiated directly but is used internally within the `NAS` class.
+    """
     def postprocess(self, preds_in):
         """Apply Non-maximum suppression to prediction outputs."""

ultralytics/models/rtdetr/val.py CHANGED Viewed

@@ -12,14 +12,19 @@ from ultralytics.utils import colorstr, ops
 __all__ = 'RTDETRValidator',  # tuple or list
-# TODO: Temporarily RT-DETR does not need padding.
 class RTDETRDataset(YOLODataset):
+    """
+    Real-Time DEtection and TRacking (RT-DETR) dataset class extending the base YOLODataset class.
+    This specialized dataset class is designed for use with the RT-DETR object detection model and is optimized for
+    real-time detection and tracking tasks.
+    """
     def __init__(self, *args, data=None, **kwargs):
         """Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
         super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
-    # NOTE: add stretch version load_image for rtdetr mosaic
+    # NOTE: add stretch version load_image for RTDETR mosaic
     def load_image(self, i, rect_mode=False):
         """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
         return super().load_image(i=i, rect_mode=rect_mode)
@@ -46,7 +51,11 @@ class RTDETRDataset(YOLODataset):
 class RTDETRValidator(DetectionValidator):
     """
-    A class extending the DetectionValidator class for validation based on an RT-DETR detection model.
+    RTDETRValidator extends the DetectionValidator class to provide validation capabilities specifically tailored for
+    the RT-DETR (Real-Time DETR) object detection model.
+    The class allows building of an RTDETR-specific dataset for validation, applies Non-maximum suppression for
+    post-processing, and updates evaluation metrics accordingly.
     Example:
         ```python
@@ -56,6 +65,9 @@ class RTDETRValidator(DetectionValidator):
         validator = RTDETRValidator(args=args)
         validator()
         ```
+    Note:
+        For further details on the attributes and methods, refer to the parent DetectionValidator class.
     """
     def build_dataset(self, img_path, mode='val', batch=None):
@@ -87,10 +99,10 @@ class RTDETRValidator(DetectionValidator):
         for i, bbox in enumerate(bboxes):  # (300, 4)
             bbox = ops.xywh2xyxy(bbox)
             score, cls = scores[i].max(-1)  # (300, )
-            # Do not need threshold for evaluation as only got 300 boxes here.
+            # Do not need threshold for evaluation as only got 300 boxes here
             # idx = score > self.args.conf
             pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1)  # filter
-            # sort by confidence to correctly get internal metrics.
+            # Sort by confidence to correctly get internal metrics
             pred = pred[score.argsort(descending=True)]
             outputs[i] = pred  # [idx]

ultralytics/models/sam/modules/decoders.py CHANGED Viewed

@@ -10,6 +10,21 @@ from ultralytics.nn.modules import LayerNorm2d
 class MaskDecoder(nn.Module):
+    """
+    Decoder module for generating masks and their associated quality scores, using a transformer architecture to predict
+    masks given image and prompt embeddings.
+    Attributes:
+        transformer_dim (int): Channel dimension for the transformer module.
+        transformer (nn.Module): The transformer module used for mask prediction.
+        num_multimask_outputs (int): Number of masks to predict for disambiguating masks.
+        iou_token (nn.Embedding): Embedding for the IoU token.
+        num_mask_tokens (int): Number of mask tokens.
+        mask_tokens (nn.Embedding): Embedding for the mask tokens.
+        output_upscaling (nn.Sequential): Neural network sequence for upscaling the output.
+        output_hypernetworks_mlps (nn.ModuleList): Hypernetwork MLPs for generating masks.
+        iou_prediction_head (nn.Module): MLP for predicting mask quality.
+    """
     def __init__(
         self,
@@ -136,7 +151,7 @@ class MaskDecoder(nn.Module):
 class MLP(nn.Module):
     """
-    Lightly adapted from
+    MLP (Multi-Layer Perceptron) model lightly adapted from
     https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py
     """
@@ -148,6 +163,16 @@ class MLP(nn.Module):
         num_layers: int,
         sigmoid_output: bool = False,
     ) -> None:
+        """
+        Initializes the MLP (Multi-Layer Perceptron) model.
+        Args:
+            input_dim (int): The dimensionality of the input features.
+            hidden_dim (int): The dimensionality of the hidden layers.
+            output_dim (int): The dimensionality of the output layer.
+            num_layers (int): The number of hidden layers.
+            sigmoid_output (bool, optional): Whether to apply a sigmoid activation to the output layer. Defaults to False.
+        """
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)

ultralytics/models/sam/modules/encoders.py CHANGED Viewed

@@ -12,6 +12,18 @@ from ultralytics.nn.modules import LayerNorm2d, MLPBlock
 # This class and its supporting functions below lightly adapted from the ViTDet backbone available at: https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py # noqa
 class ImageEncoderViT(nn.Module):
+    """
+    An image encoder using Vision Transformer (ViT) architecture for encoding an image into a compact latent space. The
+    encoder takes an image, splits it into patches, and processes these patches through a series of transformer blocks.
+    The encoded patches are then processed through a neck to generate the final encoded representation.
+    Attributes:
+        img_size (int): Dimension of input images, assumed to be square.
+        patch_embed (PatchEmbed): Module for patch embedding.
+        pos_embed (nn.Parameter, optional): Absolute positional embedding for patches.
+        blocks (nn.ModuleList): List of transformer blocks for processing patch embeddings.
+        neck (nn.Sequential): Neck module to further process the output.
+    """
     def __init__(
             self,
@@ -112,6 +124,22 @@ class ImageEncoderViT(nn.Module):
 class PromptEncoder(nn.Module):
+    """
+    Encodes different types of prompts, including points, boxes, and masks, for input to SAM's mask decoder. The encoder
+    produces both sparse and dense embeddings for the input prompts.
+    Attributes:
+        embed_dim (int): Dimension of the embeddings.
+        input_image_size (Tuple[int, int]): Size of the input image as (H, W).
+        image_embedding_size (Tuple[int, int]): Spatial size of the image embedding as (H, W).
+        pe_layer (PositionEmbeddingRandom): Module for random position embedding.
+        num_point_embeddings (int): Number of point embeddings for different types of points.
+        point_embeddings (nn.ModuleList): List of point embeddings.
+        not_a_point_embed (nn.Embedding): Embedding for points that are not a part of any label.
+        mask_input_size (Tuple[int, int]): Size of the input mask.
+        mask_downscaling (nn.Sequential): Neural network for downscaling the mask.
+        no_mask_embed (nn.Embedding): Embedding for cases where no mask is provided.
+    """
     def __init__(
         self,
@@ -276,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module):
     def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
         """Positionally encode points that are normalized to [0,1]."""
-        # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
+        # Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
         coords = 2 * coords - 1
         coords = coords @ self.positional_encoding_gaussian_matrix
         coords = 2 * np.pi * coords
-        # outputs d_1 x ... x d_n x C shape
+        # Outputs d_1 x ... x d_n x C shape
         return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
     def forward(self, size: Tuple[int, int]) -> torch.Tensor:
@@ -401,7 +429,7 @@ class Attention(nn.Module):
         self.use_rel_pos = use_rel_pos
         if self.use_rel_pos:
             assert (input_size is not None), 'Input size must be provided if using relative positional encoding.'
-            # initialize relative positional embeddings
+            # Initialize relative positional embeddings
             self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
             self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))

ultralytics 8.0.196__py3-none-any.whl → 8.0.198__py3-none-any.whl

Potentially problematic release.

ultralytics 8.0.196py3-none-any.whl → 8.0.198py3-none-any.whl