PyPI - ultralytics - Versions diffs - 8.3.101__py3-none-any.whl → 8.3.103__py3-none-any.whl - Mend

ultralytics 8.3.101py3-none-any.whl → 8.3.103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

tests/test_exports.py +14 -5
tests/test_solutions.py +140 -76
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +1 -1
ultralytics/engine/exporter.py +23 -8
ultralytics/engine/tuner.py +8 -2
ultralytics/hub/__init__.py +29 -2
ultralytics/hub/google/__init__.py +18 -1
ultralytics/models/fastsam/predict.py +12 -1
ultralytics/models/nas/predict.py +21 -3
ultralytics/models/rtdetr/val.py +26 -2
ultralytics/models/sam/amg.py +22 -1
ultralytics/models/sam/modules/encoders.py +85 -4
ultralytics/models/sam/modules/memory_attention.py +61 -3
ultralytics/models/sam/modules/utils.py +108 -5
ultralytics/models/utils/loss.py +38 -2
ultralytics/models/utils/ops.py +15 -1
ultralytics/models/yolo/classify/predict.py +11 -1
ultralytics/models/yolo/classify/train.py +17 -1
ultralytics/models/yolo/classify/val.py +82 -6
ultralytics/models/yolo/detect/predict.py +20 -1
ultralytics/models/yolo/model.py +55 -4
ultralytics/models/yolo/obb/predict.py +16 -1
ultralytics/models/yolo/obb/train.py +35 -2
ultralytics/models/yolo/obb/val.py +87 -6
ultralytics/models/yolo/pose/predict.py +18 -1
ultralytics/models/yolo/pose/train.py +48 -3
ultralytics/models/yolo/pose/val.py +113 -8
ultralytics/models/yolo/segment/predict.py +27 -2
ultralytics/models/yolo/segment/train.py +61 -3
ultralytics/models/yolo/segment/val.py +10 -1
ultralytics/models/yolo/world/train_world.py +29 -1
ultralytics/models/yolo/yoloe/train.py +47 -3
ultralytics/nn/autobackend.py +9 -8
ultralytics/nn/modules/activation.py +26 -3
ultralytics/nn/modules/block.py +89 -0
ultralytics/nn/modules/head.py +3 -92
ultralytics/nn/modules/utils.py +70 -4
ultralytics/nn/tasks.py +3 -0
ultralytics/nn/text_model.py +93 -17
ultralytics/solutions/instance_segmentation.py +15 -7
ultralytics/solutions/solutions.py +2 -47
ultralytics/utils/benchmarks.py +1 -1
ultralytics/utils/callbacks/base.py +22 -5
ultralytics/utils/callbacks/comet.py +93 -5
ultralytics/utils/callbacks/dvc.py +64 -5
ultralytics/utils/callbacks/neptune.py +25 -2
ultralytics/utils/callbacks/tensorboard.py +30 -2
ultralytics/utils/callbacks/wb.py +16 -1
ultralytics/utils/dist.py +35 -2
ultralytics/utils/errors.py +27 -6
ultralytics/utils/metrics.py +1 -1
ultralytics/utils/patches.py +33 -5
ultralytics/utils/torch_utils.py +14 -6
ultralytics/utils/triton.py +16 -3
ultralytics/utils/tuner.py +17 -9
{ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/METADATA +3 -4
{ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/RECORD +62 -62
{ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/WHEEL +0 -0
{ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/top_level.txt +0 -0

ultralytics/models/yolo/model.py CHANGED Viewed

@@ -22,7 +22,24 @@ class YOLO(Model):
     """YOLO (You Only Look Once) object detection model."""
     def __init__(self, model="yolo11n.pt", task=None, verbose=False):
-        """Initialize YOLO model, switching to YOLOWorld/YOLOE if model filename contains '-world'/'yoloe'."""
+        """
+        Initialize a YOLO model.
+        This constructor initializes a YOLO model, automatically switching to specialized model types
+        (YOLOWorld or YOLOE) based on the model filename.
+        Args:
+            model (str | Path): Model name or path to model file, i.e. 'yolo11n.pt', 'yolov8n.yaml'.
+            task (str | None): YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'.
+                Defaults to auto-detection based on model.
+            verbose (bool): Display model info on load.
+        Examples:
+            >>> from ultralytics import YOLO
+            >>> model = YOLO("yolov8n.pt")  # load a pretrained YOLOv8n detection model
+            >>> model = YOLO("yolov8n-seg.pt")  # load a pretrained YOLOv8n segmentation model
+            >>> model = YOLO("yolo11n.pt")  # load a pretrained YOLOv11n detection model
+        """
         path = Path(model)
         if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}:  # if YOLOWorld PyTorch model
             new_instance = YOLOWorld(path, verbose=verbose)
@@ -166,12 +183,46 @@ class YOLOE(Model):
         return self.model.get_text_pe(texts)
     def get_visual_pe(self, img, visual):
-        """Get visual positional embeddings for the given image and visual features."""
+        """
+        Get visual positional embeddings for the given image and visual features.
+        This method extracts positional embeddings from visual features based on the input image. It requires
+        that the model is an instance of YOLOEModel.
+        Args:
+            img (torch.Tensor): Input image tensor.
+            visual (torch.Tensor): Visual features extracted from the image.
+        Returns:
+            (torch.Tensor): Visual positional embeddings.
+        Examples:
+            >>> model = YOLOE("yoloe-v8s.pt")
+            >>> img = torch.rand(1, 3, 640, 640)
+            >>> visual_features = model.model.backbone(img)
+            >>> pe = model.get_visual_pe(img, visual_features)
+        """
         assert isinstance(self.model, YOLOEModel)
         return self.model.get_visual_pe(img, visual)
     def set_vocab(self, vocab, names):
-        """Set vocabulary and class names for the model."""
+        """
+        Set vocabulary and class names for the YOLOE model.
+        This method configures the vocabulary and class names used by the model for text processing and
+        classification tasks. The model must be an instance of YOLOEModel.
+        Args:
+            vocab (list): Vocabulary list containing tokens or words used by the model for text processing.
+            names (list): List of class names that the model can detect or classify.
+        Raises:
+            AssertionError: If the model is not an instance of YOLOEModel.
+        Examples:
+            >>> model = YOLOE("yoloe-v8s.pt")
+            >>> model.set_vocab(["person", "car", "dog"], ["person", "car", "dog"])
+        """
         assert isinstance(self.model, YOLOEModel)
         self.model.set_vocab(vocab, names=names)
@@ -290,7 +341,7 @@ class YOLOE(Model):
         self.predictor.setup_model(model=self.model)
-        if refer_image is None:
+        if refer_image is None and source:
             dataset = load_inference_source(source)
             if dataset.mode in {"video", "stream"}:
                 # NOTE: set the first frame as refer image for videos/streams inference

ultralytics/models/yolo/obb/predict.py CHANGED Viewed

@@ -27,7 +27,22 @@ class OBBPredictor(DetectionPredictor):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """Initialize OBBPredictor with optional model and data configuration overrides."""
+        """
+        Initialize OBBPredictor with optional model and data configuration overrides.
+        This constructor sets up an OBBPredictor instance for oriented bounding box detection tasks.
+        Args:
+            cfg (dict, optional): Default configuration for the predictor.
+            overrides (dict, optional): Configuration overrides that take precedence over the default config.
+            _callbacks (list, optional): List of callback functions to be invoked during prediction.
+        Examples:
+            >>> from ultralytics.utils import ASSETS
+            >>> from ultralytics.models.yolo.obb import OBBPredictor
+            >>> args = dict(model="yolo11n-obb.pt", source=ASSETS)
+            >>> predictor = OBBPredictor(overrides=args)
+        """
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = "obb"

ultralytics/models/yolo/obb/train.py CHANGED Viewed

@@ -26,14 +26,47 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """Initialize a OBBTrainer object with given arguments."""
+        """
+        Initialize an OBBTrainer object for training Oriented Bounding Box (OBB) models.
+        This trainer extends the DetectionTrainer class to specialize in training models that detect oriented
+        bounding boxes. It automatically sets the task to 'obb' in the configuration.
+        Args:
+            cfg (dict, optional): Configuration dictionary for the trainer. Contains training parameters and
+                model configuration.
+            overrides (dict, optional): Dictionary of parameter overrides for the configuration. Any values here
+                will take precedence over those in cfg.
+            _callbacks (list, optional): List of callback functions to be invoked during training.
+        Examples:
+            >>> from ultralytics.models.yolo.obb import OBBTrainer
+            >>> args = dict(model="yolo11n-obb.pt", data="dota8.yaml", epochs=3)
+            >>> trainer = OBBTrainer(overrides=args)
+            >>> trainer.train()
+        """
         if overrides is None:
             overrides = {}
         overrides["task"] = "obb"
         super().__init__(cfg, overrides, _callbacks)
     def get_model(self, cfg=None, weights=None, verbose=True):
-        """Return OBBModel initialized with specified config and weights."""
+        """
+        Return OBBModel initialized with specified config and weights.
+        Args:
+            cfg (str | dict | None): Model configuration. Can be a path to a YAML config file, a dictionary
+                containing configuration parameters, or None to use default configuration.
+            weights (str | Path | None): Path to pretrained weights file. If None, random initialization is used.
+            verbose (bool): Whether to display model information during initialization.
+        Returns:
+            (OBBModel): Initialized OBBModel with the specified configuration and weights.
+        Examples:
+            >>> trainer = OBBTrainer()
+            >>> model = trainer.get_model(cfg="yolov8n-obb.yaml", weights="yolov8n-obb.pt")
+        """
         model = OBBModel(cfg, ch=3, nc=self.data["nc"], verbose=verbose and RANK == -1)
         if weights:
             model.load(weights)

ultralytics/models/yolo/obb/val.py CHANGED Viewed

@@ -40,7 +40,19 @@ class OBBValidator(DetectionValidator):
     """
     def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
-        """Initialize OBBValidator and set task to 'obb', metrics to OBBMetrics."""
+        """
+        Initialize OBBValidator and set task to 'obb', metrics to OBBMetrics.
+        This constructor initializes an OBBValidator instance for validating Oriented Bounding Box (OBB) models.
+        It extends the DetectionValidator class and configures it specifically for the OBB task.
+        Args:
+            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            save_dir (str | Path, optional): Directory to save results.
+            pbar (bool, optional): Display progress bar during validation.
+            args (dict, optional): Arguments containing validation parameters.
+            _callbacks (list, optional): List of callback functions to be called during validation.
+        """
         super().__init__(dataloader, save_dir, pbar, args, _callbacks)
         self.args.task = "obb"
         self.metrics = OBBMetrics(save_dir=self.save_dir, plot=True)
@@ -79,7 +91,22 @@ class OBBValidator(DetectionValidator):
         return self.match_predictions(detections[:, 5], gt_cls, iou)
     def _prepare_batch(self, si, batch):
-        """Prepare batch data for OBB validation with proper scaling and formatting."""
+        """
+        Prepare batch data for OBB validation with proper scaling and formatting.
+        Args:
+            si (int): Batch index to process.
+            batch (dict): Dictionary containing batch data with keys:
+                - batch_idx: Tensor of batch indices
+                - cls: Tensor of class labels
+                - bboxes: Tensor of bounding boxes
+                - ori_shape: Original image shapes
+                - img: Batch of images
+                - ratio_pad: Ratio and padding information
+        This method filters the batch data for a specific batch index, extracts class labels and bounding boxes,
+        and scales the bounding boxes to the original image dimensions.
+        """
         idx = batch["batch_idx"] == si
         cls = batch["cls"][idx].squeeze(-1)
         bbox = batch["bboxes"][idx]
@@ -92,7 +119,22 @@ class OBBValidator(DetectionValidator):
         return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
     def _prepare_pred(self, pred, pbatch):
-        """Prepare predictions by scaling bounding boxes to original image dimensions."""
+        """
+        Prepare predictions by scaling bounding boxes to original image dimensions.
+        This method takes prediction tensors containing bounding box coordinates and scales them from the model's
+        input dimensions to the original image dimensions using the provided batch information.
+        Args:
+            pred (torch.Tensor): Prediction tensor containing bounding box coordinates and other information.
+            pbatch (dict): Dictionary containing batch information with keys:
+                - imgsz (tuple): Model input image size.
+                - ori_shape (tuple): Original image shape.
+                - ratio_pad (tuple): Ratio and padding information for scaling.
+        Returns:
+            (torch.Tensor): Scaled prediction tensor with bounding boxes in original image dimensions.
+        """
         predn = pred.clone()
         ops.scale_boxes(
             pbatch["imgsz"], predn[:, :4], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
@@ -100,7 +142,20 @@ class OBBValidator(DetectionValidator):
         return predn
     def plot_predictions(self, batch, preds, ni):
-        """Plot predicted bounding boxes on input images and save the result."""
+        """
+        Plot predicted bounding boxes on input images and save the result.
+        Args:
+            batch (dict): Batch data containing images, file paths, and other metadata.
+            preds (list): List of prediction tensors for each image in the batch.
+            ni (int): Batch index used for naming the output file.
+        Examples:
+            >>> validator = OBBValidator()
+            >>> batch = {"img": images, "im_file": paths}
+            >>> preds = [torch.rand(10, 7)]  # Example predictions for one image
+            >>> validator.plot_predictions(batch, preds, 0)
+        """
         plot_images(
             batch["img"],
             *output_to_rotated_target(preds, max_det=self.args.max_det),
@@ -111,7 +166,19 @@ class OBBValidator(DetectionValidator):
         )  # pred
     def pred_to_json(self, predn, filename):
-        """Convert YOLO predictions to COCO JSON format with rotated bounding box information."""
+        """
+        Convert YOLO predictions to COCO JSON format with rotated bounding box information.
+        Args:
+            predn (torch.Tensor): Prediction tensor containing bounding box coordinates, confidence scores,
+                class predictions, and rotation angles with shape (N, 6+) where the last column is the angle.
+            filename (str | Path): Path to the image file for which predictions are being processed.
+        Notes:
+            This method processes rotated bounding box predictions and converts them to both rbox format
+            (x, y, w, h, angle) and polygon format (x1, y1, x2, y2, x3, y3, x4, y4) before adding them
+            to the JSON dictionary.
+        """
         stem = Path(filename).stem
         image_id = int(stem) if stem.isnumeric() else stem
         rbox = torch.cat([predn[:, :4], predn[:, -1:]], dim=-1)
@@ -128,7 +195,21 @@ class OBBValidator(DetectionValidator):
             )
     def save_one_txt(self, predn, save_conf, shape, file):
-        """Save YOLO detections to a txt file in normalized coordinates using the Results class."""
+        """
+        Save YOLO OBB (Oriented Bounding Box) detections to a text file in normalized coordinates.
+        Args:
+            predn (torch.Tensor): Predicted detections with shape (N, 7) containing bounding boxes, confidence scores,
+                class predictions, and angles in format (x, y, w, h, conf, cls, angle).
+            save_conf (bool): Whether to save confidence scores in the text file.
+            shape (tuple): Original image shape in format (height, width).
+            file (Path | str): Output file path to save detections.
+        Examples:
+            >>> validator = OBBValidator()
+            >>> predn = torch.tensor([[100, 100, 50, 30, 0.9, 0, 45]])  # One detection: x,y,w,h,conf,cls,angle
+            >>> validator.save_one_txt(predn, True, (640, 480), "detection.txt")
+        """
         import numpy as np
         from ultralytics.engine.results import Results

ultralytics/models/yolo/pose/predict.py CHANGED Viewed

@@ -27,7 +27,24 @@ class PosePredictor(DetectionPredictor):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """Initialize PosePredictor, set task to 'pose' and log a warning for using 'mps' as device."""
+        """
+        Initialize PosePredictor, a specialized predictor for pose estimation tasks.
+        This initializer sets up a PosePredictor instance, configuring it for pose detection tasks and handling
+        device-specific warnings for Apple MPS.
+        Args:
+            cfg (Any): Configuration for the predictor. Default is DEFAULT_CFG.
+            overrides (dict, optional): Configuration overrides that take precedence over cfg.
+            _callbacks (list, optional): List of callback functions to be invoked during prediction.
+        Examples:
+            >>> from ultralytics.utils import ASSETS
+            >>> from ultralytics.models.yolo.pose import PosePredictor
+            >>> args = dict(model="yolov8n-pose.pt", source=ASSETS)
+            >>> predictor = PosePredictor(overrides=args)
+            >>> predictor.predict_cli()
+        """
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = "pose"
         if isinstance(self.args.device, str) and self.args.device.lower() == "mps":

ultralytics/models/yolo/pose/train.py CHANGED Viewed

@@ -36,7 +36,27 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """Initialize a PoseTrainer object with specified configurations and overrides."""
+        """
+        Initialize a PoseTrainer object for training YOLO pose estimation models.
+        This initializes a trainer specialized for pose estimation tasks, setting the task to 'pose' and
+        handling specific configurations needed for keypoint detection models.
+        Args:
+            cfg (dict, optional): Default configuration dictionary containing training parameters.
+            overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
+            _callbacks (list, optional): List of callback functions to be executed during training.
+        Notes:
+            This trainer will automatically set the task to 'pose' regardless of what is provided in overrides.
+            A warning is issued when using Apple MPS device due to known bugs with pose models.
+        Examples:
+            >>> from ultralytics.models.yolo.pose import PoseTrainer
+            >>> args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml", epochs=3)
+            >>> trainer = PoseTrainer(overrides=args)
+            >>> trainer.train()
+        """
         if overrides is None:
             overrides = {}
         overrides["task"] = "pose"
@@ -49,7 +69,17 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
             )
     def get_model(self, cfg=None, weights=None, verbose=True):
-        """Get pose estimation model with specified configuration and weights."""
+        """
+        Get pose estimation model with specified configuration and weights.
+        Args:
+            cfg (str | Path | dict | None): Model configuration file path or dictionary.
+            weights (str | Path | None): Path to the model weights file.
+            verbose (bool): Whether to display model information.
+        Returns:
+            (PoseModel): Initialized pose estimation model.
+        """
         model = PoseModel(cfg, ch=3, nc=self.data["nc"], data_kpt_shape=self.data["kpt_shape"], verbose=verbose)
         if weights:
             model.load(weights)
@@ -69,7 +99,22 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
         )
     def plot_training_samples(self, batch, ni):
-        """Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints."""
+        """
+        Plot a batch of training samples with annotated class labels, bounding boxes, and keypoints.
+        Args:
+            batch (dict): Dictionary containing batch data with the following keys:
+                - img (torch.Tensor): Batch of images
+                - keypoints (torch.Tensor): Keypoints coordinates for pose estimation
+                - cls (torch.Tensor): Class labels
+                - bboxes (torch.Tensor): Bounding box coordinates
+                - im_file (list): List of image file paths
+                - batch_idx (torch.Tensor): Batch indices for each instance
+            ni (int): Current training iteration number used for filename
+        The function saves the plotted batch as an image in the trainer's save directory with the filename
+        'train_batch{ni}.jpg', where ni is the iteration number.
+        """
         images = batch["img"]
         kpts = batch["keypoints"]
         cls = batch["cls"].squeeze(-1)

ultralytics/models/yolo/pose/val.py CHANGED Viewed

@@ -20,7 +20,7 @@ class PoseValidator(DetectionValidator):
     specialized metrics for pose evaluation.
     Attributes:
-        sigma (np.ndarray): Sigma values for OKS calculation, either from OKS_SIGMA or ones divided by number of keypoints.
+        sigma (np.ndarray): Sigma values for OKS calculation, either OKS_SIGMA or ones divided by number of keypoints.
         kpt_shape (List[int]): Shape of the keypoints, typically [17, 3] for COCO format.
         args (dict): Arguments for the validator including task set to "pose".
         metrics (PoseMetrics): Metrics object for pose evaluation.
@@ -47,7 +47,30 @@ class PoseValidator(DetectionValidator):
     """
     def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
-        """Initialize a PoseValidator object with custom parameters and assigned attributes."""
+        """
+        Initialize a PoseValidator object for pose estimation validation.
+        This validator is specifically designed for pose estimation tasks, handling keypoints and implementing
+        specialized metrics for pose evaluation.
+        Args:
+            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            save_dir (Path | str, optional): Directory to save results.
+            pbar (Any, optional): Progress bar for displaying progress.
+            args (dict, optional): Arguments for the validator including task set to "pose".
+            _callbacks (list, optional): List of callback functions to be executed during validation.
+        Examples:
+            >>> from ultralytics.models.yolo.pose import PoseValidator
+            >>> args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml")
+            >>> validator = PoseValidator(args=args)
+            >>> validator()
+        Notes:
+            This class extends DetectionValidator with pose-specific functionality. It initializes with sigma values
+            for OKS calculation and sets up PoseMetrics for evaluation. A warning is displayed when using Apple MPS
+            due to a known bug with pose models.
+        """
         super().__init__(dataloader, save_dir, pbar, args, _callbacks)
         self.sigma = None
         self.kpt_shape = None
@@ -91,7 +114,20 @@ class PoseValidator(DetectionValidator):
         self.stats = dict(tp_p=[], tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
     def _prepare_batch(self, si, batch):
-        """Prepare a batch for processing by converting keypoints to float and scaling to original dimensions."""
+        """
+        Prepare a batch for processing by converting keypoints to float and scaling to original dimensions.
+        Args:
+            si (int): Batch index.
+            batch (dict): Dictionary containing batch data with keys like 'keypoints', 'batch_idx', etc.
+        Returns:
+            pbatch (dict): Prepared batch with keypoints scaled to original image dimensions.
+        Notes:
+            This method extends the parent class's _prepare_batch method by adding keypoint processing.
+            Keypoints are scaled from normalized coordinates to original image dimensions.
+        """
         pbatch = super()._prepare_batch(si, batch)
         kpts = batch["keypoints"][batch["batch_idx"] == si]
         h, w = pbatch["imgsz"]
@@ -103,7 +139,23 @@ class PoseValidator(DetectionValidator):
         return pbatch
     def _prepare_pred(self, pred, pbatch):
-        """Prepare and scale keypoints in predictions for pose processing."""
+        """
+        Prepare and scale keypoints in predictions for pose processing.
+        This method extends the parent class's _prepare_pred method to handle keypoint scaling. It first calls
+        the parent method to get the basic prediction boxes, then extracts and scales the keypoint coordinates
+        to match the original image dimensions.
+        Args:
+            pred (torch.Tensor): Raw prediction tensor from the model.
+            pbatch (dict): Processed batch dictionary containing image information including:
+                - imgsz: Image size used for inference
+                - ori_shape: Original image shape
+                - ratio_pad: Ratio and padding information for coordinate scaling
+        Returns:
+            predn (torch.Tensor): Processed prediction boxes scaled to original image dimensions.
+        """
         predn = super()._prepare_pred(pred, pbatch)
         nk = pbatch["kpts"].shape[1]
         pred_kpts = predn[:, 6:].view(len(predn), nk, -1)
@@ -204,7 +256,19 @@ class PoseValidator(DetectionValidator):
         return self.match_predictions(detections[:, 5], gt_cls, iou)
     def plot_val_samples(self, batch, ni):
-        """Plot and save validation set samples with ground truth bounding boxes and keypoints."""
+        """
+        Plot and save validation set samples with ground truth bounding boxes and keypoints.
+        Args:
+            batch (dict): Dictionary containing batch data with keys:
+                - img (torch.Tensor): Batch of images
+                - batch_idx (torch.Tensor): Batch indices for each image
+                - cls (torch.Tensor): Class labels
+                - bboxes (torch.Tensor): Bounding box coordinates
+                - keypoints (torch.Tensor): Keypoint coordinates
+                - im_file (list): List of image file paths
+            ni (int): Batch index used for naming the output file
+        """
         plot_images(
             batch["img"],
             batch["batch_idx"],
@@ -218,7 +282,18 @@ class PoseValidator(DetectionValidator):
         )
     def plot_predictions(self, batch, preds, ni):
-        """Plot and save model predictions with bounding boxes and keypoints."""
+        """
+        Plot and save model predictions with bounding boxes and keypoints.
+        Args:
+            batch (dict): Dictionary containing batch data including images, file paths, and other metadata.
+            preds (List[torch.Tensor]): List of prediction tensors from the model, each containing bounding boxes,
+                confidence scores, class predictions, and keypoints.
+            ni (int): Batch index used for naming the output file.
+        The function extracts keypoints from predictions, converts predictions to target format, and plots them
+        on the input images. The resulting visualization is saved to the specified save directory.
+        """
         pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape) for p in preds], 0)
         plot_images(
             batch["img"],
@@ -231,7 +306,21 @@ class PoseValidator(DetectionValidator):
         )  # pred
     def save_one_txt(self, predn, pred_kpts, save_conf, shape, file):
-        """Save YOLO detections to a txt file in normalized coordinates in a specific format."""
+        """
+        Save YOLO pose detections to a text file in normalized coordinates.
+        Args:
+            predn (torch.Tensor): Prediction boxes and scores with shape (N, 6) for (x1, y1, x2, y2, conf, cls).
+            pred_kpts (torch.Tensor): Predicted keypoints with shape (N, K, D) where K is the number of keypoints
+                and D is the dimension (typically 3 for x, y, visibility).
+            save_conf (bool): Whether to save confidence scores.
+            shape (tuple): Original image shape (height, width).
+            file (Path): Output file path to save detections.
+        Notes:
+            The output format is: class_id x_center y_center width height confidence keypoints where keypoints are
+            normalized (x, y, visibility) values for each point.
+        """
         from ultralytics.engine.results import Results
         Results(
@@ -243,7 +332,23 @@ class PoseValidator(DetectionValidator):
         ).save_txt(file, save_conf=save_conf)
     def pred_to_json(self, predn, filename):
-        """Convert YOLO predictions to COCO JSON format."""
+        """
+        Convert YOLO predictions to COCO JSON format.
+        This method takes prediction tensors and a filename, converts the bounding boxes from YOLO format
+        to COCO format, and appends the results to the internal JSON dictionary (self.jdict).
+        Args:
+            predn (torch.Tensor): Prediction tensor containing bounding boxes, confidence scores, class IDs,
+                and keypoints, with shape (N, 6+K) where N is the number of predictions and K is the flattened
+                keypoints dimension.
+            filename (str | Path): Path to the image file for which predictions are being processed.
+        Notes:
+            The method extracts the image ID from the filename stem (either as an integer if numeric, or as a string),
+            converts bounding boxes from xyxy to xywh format, and adjusts coordinates from center to top-left corner
+            before saving to the JSON dictionary.
+        """
         stem = Path(filename).stem
         image_id = int(stem) if stem.isnumeric() else stem
         box = ops.xyxy2xywh(predn[:, :4])  # xywh

ultralytics/models/yolo/segment/predict.py CHANGED Viewed

@@ -31,12 +31,37 @@ class SegmentationPredictor(DetectionPredictor):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """Initialize the SegmentationPredictor with configuration, overrides, and callbacks."""
+        """
+        Initialize the SegmentationPredictor with configuration, overrides, and callbacks.
+        This class specializes in processing segmentation model outputs, handling both bounding boxes and masks in the
+        prediction results.
+        Args:
+            cfg (dict): Configuration for the predictor. Defaults to Ultralytics DEFAULT_CFG.
+            overrides (dict, optional): Configuration overrides that take precedence over cfg.
+            _callbacks (list, optional): List of callback functions to be invoked during prediction.
+        """
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = "segment"
     def postprocess(self, preds, img, orig_imgs):
-        """Apply non-max suppression and process detections for each image in the input batch."""
+        """
+        Apply non-max suppression and process segmentation detections for each image in the input batch.
+        Args:
+            preds (tuple): Model predictions, containing bounding boxes, scores, classes, and mask coefficients.
+            img (torch.Tensor): Input image tensor in model format, with shape (B, C, H, W).
+            orig_imgs (list | torch.Tensor | np.ndarray): Original image or batch of images.
+        Returns:
+            (list): List of Results objects containing the segmentation predictions for each image in the batch.
+                   Each Results object includes both bounding boxes and segmentation masks.
+        Examples:
+            >>> predictor = SegmentationPredictor(overrides=dict(model="yolov8n-seg.pt"))
+            >>> results = predictor.postprocess(preds, img, orig_img)
+        """
         # Extract protos - tuple if PyTorch model or array if exported
         protos = preds[1][-1] if isinstance(preds[1], tuple) else preds[1]
         return super().postprocess(preds[0], img, orig_imgs, protos=protos)

ultralytics 8.3.101__py3-none-any.whl → 8.3.103__py3-none-any.whl

ultralytics 8.3.101py3-none-any.whl → 8.3.103py3-none-any.whl